Skip to content

Commit

Permalink
postgres_ffi: find_end_of_wal_segment: clarify code around xl_crc ret…
Browse files Browse the repository at this point in the history
…rieval

It would be better to not update xl_crc/rec_hdr at all when skipping contrecord,
but I would prefer to keep PR #1574 small.
Better audit of `find_end_of_wal_segment` is coming anyway in #544.
  • Loading branch information
yeputons committed May 20, 2022
1 parent 2c4a86e commit aeac127
Showing 1 changed file with 29 additions and 2 deletions.
31 changes: 29 additions & 2 deletions libs/postgres_ffi/src/xlog_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use std::path::{Path, PathBuf};
use std::time::SystemTime;
use utils::bin_ser::DeserializeError;
use utils::bin_ser::SerializeError;
use utils::const_assert;
use utils::lsn::Lsn;

pub const XLOG_FNAME_LEN: usize = 24;
Expand Down Expand Up @@ -159,6 +160,8 @@ fn find_end_of_wal_segment(
let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
file.seek(SeekFrom::Start(offs as u64))?;
// xl_crc is the last field in XLogRecord, will not be read into rec_hdr
const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];

trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
Expand Down Expand Up @@ -267,7 +270,7 @@ fn find_end_of_wal_segment(
pageleft,
contlen
);
// fill rec_hdr (header up to (but not including) xl_crc field)
// fill rec_hdr header up to (but not including) xl_crc field
trace!(
" rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
rec_offs,
Expand All @@ -287,6 +290,14 @@ fn find_end_of_wal_segment(
}
if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
// All records are aligned on 8-byte boundary, so their 8-byte frames
// cannot be split between pages. As xl_crc is the last field,
// its content is always on the same page.
const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
// We should always start reading aligned records even in incorrect WALs so if
// the condition is false it is likely a bug. However, it is localized somewhere
// in this function, hence we do not crash and just report failure instead.
ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
trace!(
" reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
Expand All @@ -301,7 +312,9 @@ fn find_end_of_wal_segment(
page_offs + n,
crc
);
} else {
} else if rec_offs > XLOG_RECORD_CRC_OFFS {
// As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
let old_crc = crc;
crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
trace!(
Expand All @@ -311,6 +324,20 @@ fn find_end_of_wal_segment(
old_crc,
crc
);
} else {
// Correct because of the way conditions are written above.
assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
// If `skipping_first_contrecord == true`, we may be reading from a middle of a record
// which started in the previous segment. Hence there is no point in validating the header.
if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
info!(
"Curiously corrupted WAL: a record stops inside the header; \
offs=0x{:x}, record continuation, pageleft={}, contlen={}",
offs, pageleft, contlen
);
break;
}
// Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
}
rec_offs += n;
offs += n;
Expand Down

0 comments on commit aeac127

Please sign in to comment.