Skip to content

Commit 6c9dfcf

Browse files
committed
PGPRO-2095: use latest replayed lsn instead of STOP LSN
1 parent d29aa8b commit 6c9dfcf

File tree

3 files changed

+67
-47
lines changed

3 files changed

+67
-47
lines changed

Diff for: src/backup.c

+48-32
Original file line numberDiff line numberDiff line change
@@ -756,28 +756,25 @@ do_backup_instance(void)
756756
parray_free(prev_backup_filelist);
757757
}
758758

759-
/* Copy pg_control in case of backup from replica >= 9.6 */
759+
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
760+
* First we must find pg_control in backup_files_list.
761+
*/
760762
if (current.from_replica && !exclusive_backup)
761763
{
764+
char pg_control_path[MAXPGPATH];
765+
766+
snprintf(pg_control_path, sizeof(pg_control_path), "%s/%s", pgdata, "global/pg_control");
767+
762768
for (i = 0; i < parray_num(backup_files_list); i++)
763769
{
764770
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
765771

766-
if (strcmp(tmp_file->name, "pg_control") == 0)
772+
if (strcmp(tmp_file->path, pg_control_path) == 0)
767773
{
768774
pg_control = tmp_file;
769775
break;
770776
}
771777
}
772-
773-
if (!pg_control)
774-
elog(ERROR, "Failed to locate pg_control in copied files");
775-
776-
if (is_remote_backup)
777-
remote_copy_file(NULL, pg_control);
778-
else
779-
if (!copy_file(pgdata, database_path, pg_control))
780-
elog(ERROR, "Failed to copy pg_control");
781778
}
782779

783780

@@ -1160,9 +1157,6 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
11601157
*/
11611158
pg_switch_wal(conn);
11621159

1163-
//elog(INFO, "START LSN: %X/%X",
1164-
// (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn));
1165-
11661160
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
11671161
/* In PAGE mode wait for current segment... */
11681162
wait_wal_lsn(backup->start_lsn, true, false);
@@ -1175,8 +1169,10 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
11751169
/* ...for others wait for previous segment */
11761170
wait_wal_lsn(backup->start_lsn, true, true);
11771171

1178-
/* Wait for start_lsn to be replayed by replica */
1179-
if (backup->from_replica)
1172+
/* In case of backup from replica for PostgreSQL 9.5
1173+
* wait for start_lsn to be replayed by replica
1174+
*/
1175+
if (backup->from_replica && exclusive_backup)
11801176
wait_replica_wal_lsn(backup->start_lsn, true);
11811177
}
11821178

@@ -1526,7 +1522,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15261522
GetXLogFileName(wal_segment, tli, targetSegNo, xlog_seg_size);
15271523

15281524
/*
1529-
* In pg_start_backup we wait for 'lsn' in 'pg_wal' directory iff it is
1525+
* In pg_start_backup we wait for 'lsn' in 'pg_wal' directory if it is
15301526
* stream and non-page backup. Page backup needs archived WAL files, so we
15311527
* wait for 'lsn' in archive 'wal' directory for page backups.
15321528
*
@@ -1547,7 +1543,12 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15471543
{
15481544
join_path_components(wal_segment_path, arclog_path, wal_segment);
15491545
wal_segment_dir = arclog_path;
1550-
timeout = archive_timeout;
1546+
1547+
if (archive_timeout > 0)
1548+
timeout = archive_timeout;
1549+
else
1550+
timeout = ARCHIVE_TIMEOUT_DEFAULT;
1551+
15511552
}
15521553

15531554
if (wait_prev_segment)
@@ -1780,14 +1781,29 @@ pg_stop_backup(pgBackup *backup)
17801781
* Stop the non-exclusive backup. Besides stop_lsn it returns from
17811782
* pg_stop_backup(false) copy of the backup label and tablespace map
17821783
* so they can be written to disk by the caller.
1784+
* In case of backup from replica >= 9.6 we do not trust minRecPoint
1785+
* and stop_backup LSN, so we use latest replayed LSN as STOP LSN.
17831786
*/
1784-
stop_backup_query = "SELECT"
1785-
" pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot()),"
1786-
" current_timestamp(0)::timestamptz,"
1787-
" lsn,"
1788-
" labelfile,"
1789-
" spcmapfile"
1790-
" FROM pg_catalog.pg_stop_backup(false)";
1787+
if (current.from_replica)
1788+
stop_backup_query = "SELECT"
1789+
" pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot()),"
1790+
" current_timestamp(0)::timestamptz,"
1791+
#if PG_VERSION_NUM >= 100000
1792+
" pg_catalog.pg_last_wal_replay_lsn(),"
1793+
#else
1794+
" pg_catalog.pg_last_xlog_replay_location(),"
1795+
#endif
1796+
" labelfile,"
1797+
" spcmapfile"
1798+
" FROM pg_catalog.pg_stop_backup(false)";
1799+
else
1800+
stop_backup_query = "SELECT"
1801+
" pg_catalog.txid_snapshot_xmax(pg_catalog.txid_current_snapshot()),"
1802+
" current_timestamp(0)::timestamptz,"
1803+
" lsn,"
1804+
" labelfile,"
1805+
" spcmapfile"
1806+
" FROM pg_catalog.pg_stop_backup(false)";
17911807

17921808
}
17931809
else
@@ -1873,14 +1889,14 @@ pg_stop_backup(pgBackup *backup)
18731889
/* Calculate LSN */
18741890
stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
18751891

1876-
//if (!XRecOffIsValid(stop_backup_lsn))
1877-
//{
1878-
// stop_backup_lsn = restore_lsn;
1879-
//}
1880-
18811892
if (!XRecOffIsValid(stop_backup_lsn))
1882-
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
1883-
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1893+
{
1894+
if (XRecOffIsNull(stop_backup_lsn))
1895+
stop_backup_lsn = stop_backup_lsn + SizeOfXLogLongPHD;
1896+
else
1897+
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
1898+
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1899+
}
18841900

18851901
/* Write backup_label and tablespace_map */
18861902
if (!exclusive_backup)

Diff for: src/pg_probackup.h

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@
5757
#define XID_FMT "%u"
5858
#endif
5959

60+
/* Check if an XLogRecPtr value is pointed to 0 offset */
61+
#define XRecOffIsNull(xlrp) \
62+
((xlrp) % XLOG_BLCKSZ == 0)
63+
6064
typedef enum CompressAlg
6165
{
6266
NOT_DEFINED_COMPRESS = 0,

Diff for: src/util.c

+15-15
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ writeControlFile(ControlFileData *ControlFile, char *path)
119119

120120
/* copy controlFileSize */
121121
buffer = pg_malloc(ControlFileSize);
122-
memcpy(buffer, &ControlFile, sizeof(ControlFileData));
122+
memcpy(buffer, ControlFile, sizeof(ControlFileData));
123123

124124
/* Write pg_control */
125125
unlink(path);
@@ -136,8 +136,8 @@ writeControlFile(ControlFileData *ControlFile, char *path)
136136
if (fsync(fd) != 0)
137137
elog(ERROR, "Failed to fsync file: %s", path);
138138

139-
pg_free(buffer);
140139
close(fd);
140+
pg_free(buffer);
141141
}
142142

143143
/*
@@ -290,9 +290,7 @@ get_data_checksum_version(bool safe)
290290
return ControlFile.data_checksum_version;
291291
}
292292

293-
/* MinRecoveryPoint 'as-is' is not to be trusted
294-
* Use STOP LSN instead
295-
*/
293+
/* MinRecoveryPoint 'as-is' is not to be trusted */
296294
void
297295
set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn)
298296
{
@@ -301,20 +299,21 @@ set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_ba
301299
size_t size;
302300
char fullpath[MAXPGPATH];
303301

304-
elog(LOG, "Setting minRecPoint to STOP LSN: %X/%X",
305-
(uint32) (stop_backup_lsn >> 32),
306-
(uint32) stop_backup_lsn);
307-
308-
/* Path to pg_control in backup */
309-
snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE);
310-
311-
/* First fetch file... */
312-
buffer = slurpFile(backup_path, XLOG_CONTROL_FILE, &size, false);
302+
/* First fetch file content */
303+
buffer = slurpFile(pgdata, XLOG_CONTROL_FILE, &size, false);
313304
if (buffer == NULL)
314305
elog(ERROR, "ERROR");
315306

316307
digestControlFile(&ControlFile, buffer, size);
317308

309+
elog(LOG, "Current minRecPoint %X/%X",
310+
(uint32) (ControlFile.minRecoveryPoint >> 32),
311+
(uint32) ControlFile.minRecoveryPoint);
312+
313+
elog(LOG, "Setting minRecPoint to %X/%X",
314+
(uint32) (stop_backup_lsn >> 32),
315+
(uint32) stop_backup_lsn);
316+
318317
ControlFile.minRecoveryPoint = stop_backup_lsn;
319318

320319
/* Update checksum in pg_control header */
@@ -327,7 +326,8 @@ set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_ba
327326
/* paranoia */
328327
checkControlFile(&ControlFile);
329328

330-
/* update pg_control */
329+
/* overwrite pg_control */
330+
snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE);
331331
writeControlFile(&ControlFile, fullpath);
332332

333333
/* Update pg_control checksum in backup_list */

0 commit comments

Comments
 (0)