Skip to content

Commit dd6fdc0

Browse files
committed
Resolve issue#5: store update_lsn of each block into two independent slots.
Previously we thought that 1 MB can track changes page-to-page in the 1 GB of data files. However, recently it became evident that our ptrack map or basic hash table behaves more like a Bloom filter with a number of hash functions k = 1. See more here: https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives. Such filter has naturally more collisions. By storing update_lsn of each block in the additional slot we perform as a Bloom filter with k = 2, which significatly reduces collision rate.
1 parent de8d655 commit dd6fdc0

File tree

4 files changed

+85
-51
lines changed

4 files changed

+85
-51
lines changed

engine.c

+57-38
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ ptrackMapInit(void)
156156
sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH);
157157
sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH);
158158

159+
ptrack_map_reinit:
160+
159161
/* Remove old PTRACK_MMAP_PATH file, if exists */
160162
if (ptrack_file_exists(ptrack_mmap_path))
161163
durable_unlink(ptrack_mmap_path, LOG);
@@ -175,18 +177,15 @@ ptrackMapInit(void)
175177
if (stat(ptrack_path, &stat_buf) == 0)
176178
{
177179
copy_file(ptrack_path, ptrack_mmap_path);
178-
is_new_map = false; /* flag to check checksum */
180+
is_new_map = false; /* flag to check map file format and checksum */
179181
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY);
180-
if (ptrack_fd < 0)
181-
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
182182
}
183183
else
184-
{
185184
/* Create new file for PTRACK_MMAP_PATH */
186185
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY);
187-
if (ptrack_fd < 0)
188-
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
189-
}
186+
187+
if (ptrack_fd < 0)
188+
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
190189

191190
#ifdef WIN32
192191
{
@@ -227,7 +226,19 @@ ptrackMapInit(void)
227226
elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path);
228227

229228
/* Check ptrack version inside old ptrack map */
230-
/* No-op for now, but may be used for future compatibility checks */
229+
if (ptrack_map->version_num != PTRACK_VERSION_NUM)
230+
{
231+
ereport(WARNING,
232+
(errcode(ERRCODE_DATA_CORRUPTED),
233+
errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d",
234+
ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM),
235+
errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path)));
236+
237+
/* Delete and try again */
238+
durable_unlink(ptrack_path, LOG);
239+
is_new_map = true;
240+
goto ptrack_map_reinit;
241+
}
231242

232243
/* Check CRC */
233244
INIT_CRC32C(crc);
@@ -641,48 +652,56 @@ void
641652
ptrack_mark_block(RelFileNodeBackend smgr_rnode,
642653
ForkNumber forknum, BlockNumber blocknum)
643654
{
655+
PtBlockId bid;
644656
size_t hash;
657+
size_t slot1;
658+
size_t slot2;
645659
XLogRecPtr new_lsn;
646-
PtBlockId bid;
647660
/*
648661
* We use pg_atomic_uint64 here only for alignment purposes, because
649-
* pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build.
662+
* pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build.
650663
*/
651664
pg_atomic_uint64 old_lsn;
652665
pg_atomic_uint64 old_init_lsn;
653666

654-
if (ptrack_map_size != 0 && (ptrack_map != NULL) &&
655-
smgr_rnode.backend == InvalidBackendId) /* do not track temporary
656-
* relations */
657-
{
658-
bid.relnode = smgr_rnode.node;
659-
bid.forknum = forknum;
660-
bid.blocknum = blocknum;
661-
hash = BID_HASH_FUNC(bid);
662-
663-
if (RecoveryInProgress())
664-
new_lsn = GetXLogReplayRecPtr(NULL);
665-
else
666-
new_lsn = GetXLogInsertRecPtr();
667+
if (ptrack_map_size == 0
668+
|| ptrack_map == NULL
669+
|| smgr_rnode.backend != InvalidBackendId) /* do not track temporary
670+
* relations */
671+
return;
667672

668-
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]);
673+
bid.relnode = smgr_rnode.node;
674+
bid.forknum = forknum;
675+
bid.blocknum = blocknum;
669676

670-
/* Atomically assign new init LSN value */
671-
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
677+
hash = BID_HASH_FUNC(bid);
678+
slot1 = hash % PtrackContentNblocks;
679+
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;
672680

673-
if (old_init_lsn.value == InvalidXLogRecPtr)
674-
{
675-
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);
676-
677-
while (old_init_lsn.value < new_lsn &&
678-
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
679-
}
681+
if (RecoveryInProgress())
682+
new_lsn = GetXLogReplayRecPtr(NULL);
683+
else
684+
new_lsn = GetXLogInsertRecPtr();
680685

681-
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn);
686+
/* Atomically assign new init LSN value */
687+
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
688+
if (old_init_lsn.value == InvalidXLogRecPtr)
689+
{
690+
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);
682691

683-
/* Atomically assign new LSN value */
684-
while (old_lsn.value < new_lsn &&
685-
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn));
686-
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash]));
692+
while (old_init_lsn.value < new_lsn &&
693+
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
687694
}
695+
696+
/* Atomically assign new LSN value to the first slot */
697+
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]);
698+
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn);
699+
while (old_lsn.value < new_lsn &&
700+
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn));
701+
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1]));
702+
703+
/* And to the second */
704+
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]);
705+
while (old_lsn.value < new_lsn &&
706+
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn));
688707
}

engine.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ typedef struct PtrackMapHdr
5050
{
5151
/*
5252
* Three magic bytes (+ \0) to be sure, that we are reading ptrack.map
53-
* with a right PtrackMapHdr strucutre.
53+
* with a right PtrackMapHdr structure.
5454
*/
5555
char magic[PTRACK_MAGIC_SIZE];
5656

@@ -72,7 +72,6 @@ typedef struct PtrackMapHdr
7272

7373
typedef PtrackMapHdr * PtrackMap;
7474

75-
/* TODO: check MAXALIGN usage below */
7675
/* Number of elements in ptrack map (LSN array) */
7776
#define PtrackContentNblocks \
7877
((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64))
@@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap;
8483
/* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */
8584
#define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c))
8685

87-
/* Map block address 'bid' to map slot */
86+
/* Block address 'bid' to hash. To get slot position in map should be divided
87+
* with '% PtrackContentNblocks' */
8888
#define BID_HASH_FUNC(bid) \
89-
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks)
89+
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)))
9090

9191
/*
9292
* Per process pointer to shared ptrack_map

ptrack.c

+23-8
Original file line numberDiff line numberDiff line change
@@ -420,10 +420,9 @@ PG_FUNCTION_INFO_V1(ptrack_get_pagemapset);
420420
Datum
421421
ptrack_get_pagemapset(PG_FUNCTION_ARGS)
422422
{
423+
PtScanCtx *ctx;
423424
FuncCallContext *funcctx;
424-
PtScanCtx *ctx;
425425
MemoryContext oldcontext;
426-
XLogRecPtr update_lsn;
427426
datapagemap_t pagemap;
428427
char gather_path[MAXPGPATH];
429428

@@ -486,6 +485,12 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS)
486485

487486
while (true)
488487
{
488+
size_t hash;
489+
size_t slot1;
490+
size_t slot2;
491+
XLogRecPtr update_lsn1;
492+
XLogRecPtr update_lsn2;
493+
489494
/* Stop traversal if there are no more segments */
490495
if (ctx->bid.blocknum > ctx->relsize)
491496
{
@@ -525,15 +530,25 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS)
525530
}
526531
}
527532

528-
update_lsn = pg_atomic_read_u64(&ptrack_map->entries[BID_HASH_FUNC(ctx->bid)]);
533+
hash = BID_HASH_FUNC(ctx->bid);
534+
slot1 = hash % PtrackContentNblocks;
535+
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;
536+
537+
update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]);
538+
update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]);
539+
540+
if (update_lsn1 != InvalidXLogRecPtr)
541+
elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s",
542+
(uint32) (update_lsn1 >> 32), (uint32) update_lsn1,
543+
ctx->bid.blocknum, ctx->relpath);
529544

530-
if (update_lsn != InvalidXLogRecPtr)
531-
elog(DEBUG3, "ptrack: update_lsn %X/%X of blckno %u of file %s",
532-
(uint32) (update_lsn >> 32), (uint32) update_lsn,
545+
if (update_lsn2 != InvalidXLogRecPtr)
546+
elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s",
547+
(uint32) (update_lsn1 >> 32), (uint32) update_lsn2,
533548
ctx->bid.blocknum, ctx->relpath);
534549

535-
/* Block has been changed since specified LSN. Mark it in the bitmap */
536-
if (update_lsn >= ctx->lsn)
550+
/* Block has been changed since specified LSN. Mark it in the bitmap */
551+
if (update_lsn1 >= ctx->lsn && update_lsn2 >= ctx->lsn)
537552
datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE));
538553

539554
ctx->bid.blocknum += 1;

ptrack.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
/* Ptrack version as a string */
2525
#define PTRACK_VERSION "2.1"
2626
/* Ptrack version as a number */
27-
#define PTRACK_VERSION_NUM 210
27+
#define PTRACK_VERSION_NUM 220
2828

2929
/*
3030
* Structure identifying block on the disk.

0 commit comments

Comments
 (0)