Skip to content

Commit d29aa8b

Browse files
committed
PGPRO-2095: backup from replica without connection to master for PostgreSQL >= 9.6
1 parent bf1c879 commit d29aa8b

File tree

3 files changed

+150
-18
lines changed

3 files changed

+150
-18
lines changed

Diff for: src/backup.c

+63-18
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ do_backup_instance(void)
475475
pgBackup *prev_backup = NULL;
476476
parray *prev_backup_filelist = NULL;
477477

478+
pgFile *pg_control = NULL;
479+
478480
elog(LOG, "Database backup start");
479481

480482
/* Initialize size summary */
@@ -754,9 +756,37 @@ do_backup_instance(void)
754756
parray_free(prev_backup_filelist);
755757
}
756758

759+
/* Copy pg_control in case of backup from replica >= 9.6 */
760+
if (current.from_replica && !exclusive_backup)
761+
{
762+
for (i = 0; i < parray_num(backup_files_list); i++)
763+
{
764+
pgFile *tmp_file = (pgFile *) parray_get(backup_files_list, i);
765+
766+
if (strcmp(tmp_file->name, "pg_control") == 0)
767+
{
768+
pg_control = tmp_file;
769+
break;
770+
}
771+
}
772+
773+
if (!pg_control)
774+
elog(ERROR, "Failed to locate pg_control in copied files");
775+
776+
if (is_remote_backup)
777+
remote_copy_file(NULL, pg_control);
778+
else
779+
if (!copy_file(pgdata, database_path, pg_control))
780+
elog(ERROR, "Failed to copy pg_control");
781+
}
782+
783+
757784
/* Notify end of backup */
758785
pg_stop_backup(&current);
759786

787+
if (current.from_replica && !exclusive_backup)
788+
set_min_recovery_point(pg_control, database_path, current.stop_lsn);
789+
760790
/* Add archived xlog files into the list of files of this backup */
761791
if (stream_wal)
762792
{
@@ -883,7 +913,7 @@ do_backup(time_t start_time)
883913
}
884914
}
885915

886-
if (current.from_replica)
916+
if (current.from_replica && exclusive_backup)
887917
{
888918
/* Check master connection options */
889919
if (master_host == NULL)
@@ -1089,8 +1119,11 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
10891119

10901120
params[0] = label;
10911121

1092-
/* For replica we call pg_start_backup() on master */
1093-
conn = (backup->from_replica) ? master_conn : backup_conn;
1122+
/* For 9.5 replica we call pg_start_backup() on master */
1123+
if (backup->from_replica && exclusive_backup)
1124+
conn = master_conn;
1125+
else
1126+
conn = backup_conn;
10941127

10951128
/* 2nd argument is 'fast'*/
10961129
params[1] = smooth ? "false" : "true";
@@ -1118,16 +1151,21 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup)
11181151

11191152
PQclear(res);
11201153

1121-
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
1154+
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE &&
1155+
(!(backup->from_replica && !exclusive_backup)))
11221156
/*
11231157
* Switch to a new WAL segment. It is necessary to get archived WAL
11241158
* segment, which includes start LSN of current backup.
1159+
* Don`t do this for replica backups unless it`s PG 9.5
11251160
*/
11261161
pg_switch_wal(conn);
11271162

1163+
//elog(INFO, "START LSN: %X/%X",
1164+
// (uint32) (backup->start_lsn >> 32), (uint32) (backup->start_lsn));
1165+
11281166
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
11291167
/* In PAGE mode wait for current segment... */
1130-
wait_wal_lsn(backup->start_lsn, true, false);
1168+
wait_wal_lsn(backup->start_lsn, true, false);
11311169
/*
11321170
* Do not wait start_lsn for stream backup.
11331171
* Because WAL streaming will start after pg_start_backup() in stream
@@ -1669,7 +1707,7 @@ pg_stop_backup(pgBackup *backup)
16691707
PGresult *tablespace_map_content = NULL;
16701708
uint32 lsn_hi;
16711709
uint32 lsn_lo;
1672-
XLogRecPtr restore_lsn = InvalidXLogRecPtr;
1710+
//XLogRecPtr restore_lsn = InvalidXLogRecPtr;
16731711
int pg_stop_backup_timeout = 0;
16741712
char path[MAXPGPATH];
16751713
char backup_label[MAXPGPATH];
@@ -1689,16 +1727,21 @@ pg_stop_backup(pgBackup *backup)
16891727
if (!backup_in_progress)
16901728
elog(ERROR, "backup is not in progress");
16911729

1692-
/* For replica we call pg_stop_backup() on master */
1693-
conn = (current.from_replica) ? master_conn : backup_conn;
1730+
/* For 9.5 replica we call pg_stop_backup() on master */
1731+
if (current.from_replica && exclusive_backup)
1732+
conn = master_conn;
1733+
else
1734+
conn = backup_conn;
16941735

16951736
/* Remove annoying NOTICE messages generated by backend */
16961737
res = pgut_execute(conn, "SET client_min_messages = warning;",
16971738
0, NULL);
16981739
PQclear(res);
16991740

1700-
/* Create restore point */
1701-
if (backup != NULL)
1741+
/* Create restore point
1742+
* only if it`s backup from master, or exclusive replica(wich connects to master)
1743+
*/
1744+
if (backup != NULL && (!current.from_replica || (current.from_replica && exclusive_backup)))
17021745
{
17031746
const char *params[1];
17041747
char name[1024];
@@ -1716,7 +1759,7 @@ pg_stop_backup(pgBackup *backup)
17161759
/* Extract timeline and LSN from the result */
17171760
XLogDataFromLSN(PQgetvalue(res, 0, 0), &lsn_hi, &lsn_lo);
17181761
/* Calculate LSN */
1719-
restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
1762+
//restore_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
17201763
PQclear(res);
17211764
}
17221765

@@ -1830,10 +1873,10 @@ pg_stop_backup(pgBackup *backup)
18301873
/* Calculate LSN */
18311874
stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
18321875

1833-
if (!XRecOffIsValid(stop_backup_lsn))
1834-
{
1835-
stop_backup_lsn = restore_lsn;
1836-
}
1876+
//if (!XRecOffIsValid(stop_backup_lsn))
1877+
//{
1878+
// stop_backup_lsn = restore_lsn;
1879+
//}
18371880

18381881
if (!XRecOffIsValid(stop_backup_lsn))
18391882
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
@@ -1939,7 +1982,7 @@ pg_stop_backup(pgBackup *backup)
19391982
stream_xlog_path[MAXPGPATH];
19401983

19411984
/* Wait for stop_lsn to be received by replica */
1942-
if (backup->from_replica)
1985+
if (current.from_replica)
19431986
wait_replica_wal_lsn(stop_backup_lsn, false);
19441987
/*
19451988
* Wait for stop_lsn to be archived or streamed.
@@ -1962,10 +2005,12 @@ pg_stop_backup(pgBackup *backup)
19622005

19632006
elog(LOG, "Getting the Recovery Time from WAL");
19642007

2008+
/* iterate over WAL from stop_backup lsn to start_backup lsn */
19652009
if (!read_recovery_info(xlog_path, backup->tli, xlog_seg_size,
19662010
backup->start_lsn, backup->stop_lsn,
19672011
&backup->recovery_time, &backup->recovery_xid))
19682012
{
2013+
elog(LOG, "Failed to find Recovery Time in WAL. Forced to trust current_timestamp");
19692014
backup->recovery_time = recovery_time;
19702015
backup->recovery_xid = recovery_xid;
19712016
}
@@ -2074,7 +2119,7 @@ backup_files(void *arg)
20742119
elog(ERROR, "interrupted during backup");
20752120

20762121
if (progress)
2077-
elog(LOG, "Progress: (%d/%d). Process file \"%s\"",
2122+
elog(INFO, "Progress: (%d/%d). Process file \"%s\"",
20782123
i + 1, n_backup_files_list, file->path);
20792124

20802125
/* stat file to check its current state */
@@ -2168,7 +2213,7 @@ backup_files(void *arg)
21682213
file->path, file->write_size);
21692214
}
21702215
else
2171-
elog(LOG, "unexpected file type %d", buf.st_mode);
2216+
elog(WARNING, "unexpected file type %d", buf.st_mode);
21722217
}
21732218

21742219
/* Close connection */

Diff for: src/pg_probackup.h

+1
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ extern uint64 get_system_identifier(char *pgdata);
555555
extern uint64 get_remote_system_identifier(PGconn *conn);
556556
extern uint32 get_data_checksum_version(bool safe);
557557
extern uint32 get_xlog_seg_size(char *pgdata_path);
558+
extern void set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn);
558559

559560
extern void sanityChecks(void);
560561
extern void time2iso(char *buf, size_t len, time_t time);

Diff for: src/util.c

+86
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include <time.h>
1616

17+
#include <unistd.h>
18+
1719
const char *
1820
base36enc(long unsigned int value)
1921
{
@@ -100,6 +102,44 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
100102
checkControlFile(ControlFile);
101103
}
102104

105+
/*
106+
* Write ControlFile to pg_control
107+
*/
108+
static void
109+
writeControlFile(ControlFileData *ControlFile, char *path)
110+
{
111+
int fd;
112+
char *buffer = NULL;
113+
114+
#if PG_VERSION_NUM >= 100000
115+
int ControlFileSize = PG_CONTROL_FILE_SIZE;
116+
#else
117+
int ControlFileSize = PG_CONTROL_SIZE;
118+
#endif
119+
120+
/* copy controlFileSize */
121+
buffer = pg_malloc(ControlFileSize);
122+
memcpy(buffer, &ControlFile, sizeof(ControlFileData));
123+
124+
/* Write pg_control */
125+
unlink(path);
126+
fd = open(path,
127+
O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
128+
S_IRUSR | S_IWUSR);
129+
130+
if (fd < 0)
131+
elog(ERROR, "Failed to open file: %s", path);
132+
133+
if (write(fd, buffer, ControlFileSize) != ControlFileSize)
134+
elog(ERROR, "Failed to overwrite file: %s", path);
135+
136+
if (fsync(fd) != 0)
137+
elog(ERROR, "Failed to fsync file: %s", path);
138+
139+
pg_free(buffer);
140+
close(fd);
141+
}
142+
103143
/*
104144
* Utility shared by backup and restore to fetch the current timeline
105145
* used by a node.
@@ -250,6 +290,52 @@ get_data_checksum_version(bool safe)
250290
return ControlFile.data_checksum_version;
251291
}
252292

293+
/* MinRecoveryPoint 'as-is' is not to be trusted
294+
* Use STOP LSN instead
295+
*/
296+
void
297+
set_min_recovery_point(pgFile *file, const char *backup_path, XLogRecPtr stop_backup_lsn)
298+
{
299+
ControlFileData ControlFile;
300+
char *buffer;
301+
size_t size;
302+
char fullpath[MAXPGPATH];
303+
304+
elog(LOG, "Setting minRecPoint to STOP LSN: %X/%X",
305+
(uint32) (stop_backup_lsn >> 32),
306+
(uint32) stop_backup_lsn);
307+
308+
/* Path to pg_control in backup */
309+
snprintf(fullpath, sizeof(fullpath), "%s/%s", backup_path, XLOG_CONTROL_FILE);
310+
311+
/* First fetch file... */
312+
buffer = slurpFile(backup_path, XLOG_CONTROL_FILE, &size, false);
313+
if (buffer == NULL)
314+
elog(ERROR, "ERROR");
315+
316+
digestControlFile(&ControlFile, buffer, size);
317+
318+
ControlFile.minRecoveryPoint = stop_backup_lsn;
319+
320+
/* Update checksum in pg_control header */
321+
INIT_CRC32C(ControlFile.crc);
322+
COMP_CRC32C(ControlFile.crc,
323+
(char *) &ControlFile,
324+
offsetof(ControlFileData, crc));
325+
FIN_CRC32C(ControlFile.crc);
326+
327+
/* paranoia */
328+
checkControlFile(&ControlFile);
329+
330+
/* update pg_control */
331+
writeControlFile(&ControlFile, fullpath);
332+
333+
/* Update pg_control checksum in backup_list */
334+
file->crc = pgFileGetCRC(fullpath, false);
335+
336+
pg_free(buffer);
337+
}
338+
253339

254340
/*
255341
* Convert time_t value to ISO-8601 format string. Always set timezone offset.

0 commit comments

Comments
 (0)