Skip to content

Commit 3985b60

Browse files
committed
Support PrefetchBuffer() in recovery.
Provide PrefetchSharedBuffer(), a variant that takes SMgrRelation, for use in recovery. Rename LocalPrefetchBuffer() to PrefetchLocalBuffer() for consistency. Add a return value to all of these. In recovery, tolerate and report missing files, so we can handle relations unlinked before crash recovery began. Also report cache hits and misses, so that callers can do faster buffer lookups and better I/O accounting. Reviewed-by: Alvaro Herrera <[email protected]> Reviewed-by: Andres Freund <[email protected]> Discussion: https://postgr.es/m/CA%2BhUKGJ4VJN8ttxScUFM8dOKX0BrBiboo5uz1cq%3DAovOddfHpA%40mail.gmail.com
1 parent 981643d commit 3985b60

File tree

8 files changed

+134
-57
lines changed

8 files changed

+134
-57
lines changed

Diff for: src/backend/storage/buffer/bufmgr.c

+87-38
Original file line numberDiff line numberDiff line change
@@ -480,18 +480,99 @@ static int ckpt_buforder_comparator(const void *pa, const void *pb);
480480
static int ts_ckpt_progress_comparator(Datum a, Datum b, void *arg);
481481

482482

483+
/*
484+
* Implementation of PrefetchBuffer() for shared buffers.
485+
*/
486+
PrefetchBufferResult
487+
PrefetchSharedBuffer(SMgrRelation smgr_reln,
488+
ForkNumber forkNum,
489+
BlockNumber blockNum)
490+
{
491+
PrefetchBufferResult result = {InvalidBuffer, false};
492+
BufferTag newTag; /* identity of requested block */
493+
uint32 newHash; /* hash value for newTag */
494+
LWLock *newPartitionLock; /* buffer partition lock for it */
495+
int buf_id;
496+
497+
Assert(BlockNumberIsValid(blockNum));
498+
499+
/* create a tag so we can lookup the buffer */
500+
INIT_BUFFERTAG(newTag, smgr_reln->smgr_rnode.node,
501+
forkNum, blockNum);
502+
503+
/* determine its hash code and partition lock ID */
504+
newHash = BufTableHashCode(&newTag);
505+
newPartitionLock = BufMappingPartitionLock(newHash);
506+
507+
/* see if the block is in the buffer pool already */
508+
LWLockAcquire(newPartitionLock, LW_SHARED);
509+
buf_id = BufTableLookup(&newTag, newHash);
510+
LWLockRelease(newPartitionLock);
511+
512+
/* If not in buffers, initiate prefetch */
513+
if (buf_id < 0)
514+
{
515+
#ifdef USE_PREFETCH
516+
/*
517+
* Try to initiate an asynchronous read. This returns false in
518+
* recovery if the relation file doesn't exist.
519+
*/
520+
if (smgrprefetch(smgr_reln, forkNum, blockNum))
521+
result.initiated_io = true;
522+
#endif /* USE_PREFETCH */
523+
}
524+
else
525+
{
526+
/*
527+
* Report the buffer it was in at that time. The caller may be able
528+
* to avoid a buffer table lookup, but it's not pinned and it must be
529+
* rechecked!
530+
*/
531+
result.recent_buffer = buf_id + 1;
532+
}
533+
534+
/*
535+
* If the block *is* in buffers, we do nothing. This is not really ideal:
536+
* the block might be just about to be evicted, which would be stupid
537+
* since we know we are going to need it soon. But the only easy answer
538+
* is to bump the usage_count, which does not seem like a great solution:
539+
* when the caller does ultimately touch the block, usage_count would get
540+
* bumped again, resulting in too much favoritism for blocks that are
541+
* involved in a prefetch sequence. A real fix would involve some
542+
* additional per-buffer state, and it's not clear that there's enough of
543+
* a problem to justify that.
544+
*/
545+
546+
return result;
547+
}
548+
483549
/*
484550
* PrefetchBuffer -- initiate asynchronous read of a block of a relation
485551
*
486552
* This is named by analogy to ReadBuffer but doesn't actually allocate a
487553
* buffer. Instead it tries to ensure that a future ReadBuffer for the given
488554
* block will not be delayed by the I/O. Prefetching is optional.
489-
* No-op if prefetching isn't compiled in.
555+
*
556+
* There are three possible outcomes:
557+
*
558+
* 1. If the block is already cached, the result includes a valid buffer that
559+
* could be used by the caller to avoid the need for a later buffer lookup, but
560+
* it's not pinned, so the caller must recheck it.
561+
*
562+
* 2. If the kernel has been asked to initiate I/O, the initated_io member is
563+
* true. Currently there is no way to know if the data was already cached by
564+
* the kernel and therefore didn't really initiate I/O, and no way to know when
565+
* the I/O completes other than using synchronous ReadBuffer().
566+
*
567+
* 3. Otherwise, the buffer wasn't already cached by PostgreSQL, and either
568+
* USE_PREFETCH is not defined (this build doesn't support prefetching due to
569+
* lack of a kernel facility), or the underlying relation file wasn't found and
570+
* we are in recovery. (If the relation file wasn't found and we are not in
571+
* recovery, an error is raised).
490572
*/
491-
void
573+
PrefetchBufferResult
492574
PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
493575
{
494-
#ifdef USE_PREFETCH
495576
Assert(RelationIsValid(reln));
496577
Assert(BlockNumberIsValid(blockNum));
497578

@@ -507,45 +588,13 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
507588
errmsg("cannot access temporary tables of other sessions")));
508589

509590
/* pass it off to localbuf.c */
510-
LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum);
591+
return PrefetchLocalBuffer(reln->rd_smgr, forkNum, blockNum);
511592
}
512593
else
513594
{
514-
BufferTag newTag; /* identity of requested block */
515-
uint32 newHash; /* hash value for newTag */
516-
LWLock *newPartitionLock; /* buffer partition lock for it */
517-
int buf_id;
518-
519-
/* create a tag so we can lookup the buffer */
520-
INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode.node,
521-
forkNum, blockNum);
522-
523-
/* determine its hash code and partition lock ID */
524-
newHash = BufTableHashCode(&newTag);
525-
newPartitionLock = BufMappingPartitionLock(newHash);
526-
527-
/* see if the block is in the buffer pool already */
528-
LWLockAcquire(newPartitionLock, LW_SHARED);
529-
buf_id = BufTableLookup(&newTag, newHash);
530-
LWLockRelease(newPartitionLock);
531-
532-
/* If not in buffers, initiate prefetch */
533-
if (buf_id < 0)
534-
smgrprefetch(reln->rd_smgr, forkNum, blockNum);
535-
536-
/*
537-
* If the block *is* in buffers, we do nothing. This is not really
538-
* ideal: the block might be just about to be evicted, which would be
539-
* stupid since we know we are going to need it soon. But the only
540-
* easy answer is to bump the usage_count, which does not seem like a
541-
* great solution: when the caller does ultimately touch the block,
542-
* usage_count would get bumped again, resulting in too much
543-
* favoritism for blocks that are involved in a prefetch sequence. A
544-
* real fix would involve some additional per-buffer state, and it's
545-
* not clear that there's enough of a problem to justify that.
546-
*/
595+
/* pass it to the shared buffer version */
596+
return PrefetchSharedBuffer(reln->rd_smgr, forkNum, blockNum);
547597
}
548-
#endif /* USE_PREFETCH */
549598
}
550599

551600

Diff for: src/backend/storage/buffer/localbuf.c

+14-8
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,17 @@ static Block GetLocalBufferStorage(void);
5454

5555

5656
/*
57-
* LocalPrefetchBuffer -
57+
* PrefetchLocalBuffer -
5858
* initiate asynchronous read of a block of a relation
5959
*
6060
* Do PrefetchBuffer's work for temporary relations.
6161
* No-op if prefetching isn't compiled in.
6262
*/
63-
void
64-
LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
63+
PrefetchBufferResult
64+
PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
6565
BlockNumber blockNum)
6666
{
67-
#ifdef USE_PREFETCH
67+
PrefetchBufferResult result = {InvalidBuffer, false};
6868
BufferTag newTag; /* identity of requested block */
6969
LocalBufferLookupEnt *hresult;
7070

@@ -81,12 +81,18 @@ LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
8181
if (hresult)
8282
{
8383
/* Yes, so nothing to do */
84-
return;
84+
result.recent_buffer = -hresult->id - 1;
8585
}
86-
87-
/* Not in buffers, so initiate prefetch */
88-
smgrprefetch(smgr, forkNum, blockNum);
86+
else
87+
{
88+
#ifdef USE_PREFETCH
89+
/* Not in buffers, so initiate prefetch */
90+
smgrprefetch(smgr, forkNum, blockNum);
91+
result.initiated_io = true;
8992
#endif /* USE_PREFETCH */
93+
}
94+
95+
return result;
9096
}
9197

9298

Diff for: src/backend/storage/smgr/md.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -524,21 +524,26 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
524524
/*
525525
* mdprefetch() -- Initiate asynchronous read of the specified block of a relation
526526
*/
527-
void
527+
bool
528528
mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
529529
{
530530
#ifdef USE_PREFETCH
531531
off_t seekpos;
532532
MdfdVec *v;
533533

534-
v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
534+
v = _mdfd_getseg(reln, forknum, blocknum, false,
535+
InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL);
536+
if (v == NULL)
537+
return false;
535538

536539
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
537540

538541
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
539542

540543
(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ, WAIT_EVENT_DATA_FILE_PREFETCH);
541544
#endif /* USE_PREFETCH */
545+
546+
return true;
542547
}
543548

544549
/*

Diff for: src/backend/storage/smgr/smgr.c

+7-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ typedef struct f_smgr
4949
bool isRedo);
5050
void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
5151
BlockNumber blocknum, char *buffer, bool skipFsync);
52-
void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
52+
bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
5353
BlockNumber blocknum);
5454
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
5555
BlockNumber blocknum, char *buffer);
@@ -524,11 +524,15 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
524524

525525
/*
526526
* smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
527+
*
528+
* In recovery only, this can return false to indicate that a file
529+
* doesn't exist (presumably it has been dropped by a later WAL
530+
* record).
527531
*/
528-
void
532+
bool
529533
smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
530534
{
531-
smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
535+
return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum);
532536
}
533537

534538
/*

Diff for: src/include/storage/buf_internals.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,9 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
327327
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
328328

329329
/* localbuf.c */
330-
extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
331-
BlockNumber blockNum);
330+
extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
331+
ForkNumber forkNum,
332+
BlockNumber blockNum);
332333
extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
333334
BlockNumber blockNum, bool *foundPtr);
334335
extern void MarkLocalBufferDirty(Buffer buffer);

Diff for: src/include/storage/bufmgr.h

+14-2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ typedef enum
4646
* replay; otherwise same as RBM_NORMAL */
4747
} ReadBufferMode;
4848

49+
/*
50+
* Type returned by PrefetchBuffer().
51+
*/
52+
typedef struct PrefetchBufferResult
53+
{
54+
Buffer recent_buffer; /* If valid, a hit (recheck needed!) */
55+
bool initiated_io; /* If true, a miss resulting in async I/O */
56+
} PrefetchBufferResult;
57+
4958
/* forward declared, to avoid having to expose buf_internals.h here */
5059
struct WritebackContext;
5160

@@ -162,8 +171,11 @@ extern PGDLLIMPORT int32 *LocalRefCount;
162171
/*
163172
* prototypes for functions in bufmgr.c
164173
*/
165-
extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
166-
BlockNumber blockNum);
174+
extern PrefetchBufferResult PrefetchSharedBuffer(struct SMgrRelationData *smgr_reln,
175+
ForkNumber forkNum,
176+
BlockNumber blockNum);
177+
extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
178+
BlockNumber blockNum);
167179
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
168180
extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
169181
BlockNumber blockNum, ReadBufferMode mode,

Diff for: src/include/storage/md.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
2828
extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
2929
extern void mdextend(SMgrRelation reln, ForkNumber forknum,
3030
BlockNumber blocknum, char *buffer, bool skipFsync);
31-
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
31+
extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
3232
BlockNumber blocknum);
3333
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
3434
char *buffer);

Diff for: src/include/storage/smgr.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ extern void smgrdosyncall(SMgrRelation *rels, int nrels);
9393
extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
9494
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
9595
BlockNumber blocknum, char *buffer, bool skipFsync);
96-
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
96+
extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
9797
BlockNumber blocknum);
9898
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
9999
BlockNumber blocknum, char *buffer);

0 commit comments

Comments
 (0)