Skip to content

Commit b04b71a

Browse files
committed
merge bitcoin#24858: incorrect blk file size calculation during reindex results in recoverable blk file corruption
1 parent 9e75b99 commit b04b71a

File tree

5 files changed

+65
-5
lines changed

5 files changed

+65
-5
lines changed

src/Makefile.test.include

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ BITCOIN_TESTS =\
8989
test/blockencodings_tests.cpp \
9090
test/blockfilter_tests.cpp \
9191
test/blockfilter_index_tests.cpp \
92+
test/blockmanager_tests.cpp \
9293
test/bloom_tests.cpp \
9394
test/bls_tests.cpp \
9495
test/bswap_tests.cpp \

src/node/blockstorage.cpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -790,19 +790,24 @@ bool ReadBlockFromDisk(CBlock& block, const CBlockIndex* pindex, const Consensus
790790
return true;
791791
}
792792

793-
/** Store block on disk. If dbp is non-nullptr, the file is known to already reside on disk */
794793
FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp)
795794
{
796795
unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION);
797796
FlatFilePos blockPos;
798-
if (dbp != nullptr) {
797+
const auto position_known {dbp != nullptr};
798+
if (position_known) {
799799
blockPos = *dbp;
800+
} else {
801+
// when known, blockPos.nPos points at the offset of the block data in the blk file. that already accounts for
802+
// the serialization header present in the file (the 4 magic message start bytes + the 4 length bytes = 8 bytes = BLOCK_SERIALIZATION_HEADER_SIZE).
803+
// we add BLOCK_SERIALIZATION_HEADER_SIZE only for new blocks since they will have the serialization header added when written to disk.
804+
nBlockSize += static_cast<unsigned int>(BLOCK_SERIALIZATION_HEADER_SIZE);
800805
}
801-
if (!FindBlockPos(blockPos, nBlockSize + 8, nHeight, active_chain, block.GetBlockTime(), dbp != nullptr)) {
806+
if (!FindBlockPos(blockPos, nBlockSize, nHeight, active_chain, block.GetBlockTime(), position_known)) {
802807
error("%s: FindBlockPos failed", __func__);
803808
return FlatFilePos();
804809
}
805-
if (dbp == nullptr) {
810+
if (!position_known) {
806811
if (!WriteBlockToDisk(block, blockPos, chainparams.MessageStart())) {
807812
AbortNode("Failed to write block");
808813
return FlatFilePos();

src/node/blockstorage.h

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB
4747
/** The maximum size of a blk?????.dat file (since 0.8) */
4848
static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB
4949

50+
/** Size of header written by WriteBlockToDisk before a serialized CBlock */
51+
static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE = CMessageHeader::MESSAGE_START_SIZE + sizeof(unsigned int);
52+
5053
extern std::atomic_bool fImporting;
5154
extern std::atomic_bool fReindex;
5255
/** Pruning-related variables and constants */
@@ -185,6 +188,7 @@ class BlockManager
185188
bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex* pindex, const CChainParams& chainparams)
186189
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
187190

191+
/** Store block on disk. If dbp is not nullptr, then it provides the known position of the block within a block file on disk. */
188192
FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp);
189193

190194
/** Calculate the amount of disk space the block & undo files currently use */

src/test/blockmanager_tests.cpp

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Copyright (c) 2022 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <chainparams.h>
6+
#include <node/blockstorage.h>
7+
#include <node/context.h>
8+
#include <validation.h>
9+
10+
#include <boost/test/unit_test.hpp>
11+
#include <test/util/setup_common.h>
12+
13+
// use BasicTestingSetup here for the data directory configuration, setup, and cleanup
14+
BOOST_FIXTURE_TEST_SUITE(blockmanager_tests, BasicTestingSetup)
15+
16+
BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos)
17+
{
18+
const auto params {CreateChainParams(ArgsManager{}, CBaseChainParams::MAIN)};
19+
BlockManager blockman {};
20+
CChain chain {};
21+
// simulate adding a genesis block normally
22+
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, nullptr).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
23+
// simulate what happens during reindex
24+
// simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file
25+
// the block is found at offset 8 because there is an 8 byte serialization header
26+
// consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file.
27+
FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE};
28+
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, &pos).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
29+
// now simulate what happens after reindex for the first new block processed
30+
// the actual block contents don't matter, just that it's a block.
31+
// verify that the write position is at offset 0x12d.
32+
// this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur
33+
// 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293
34+
// add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301
35+
FlatFilePos actual{blockman.SaveBlockToDisk(params->GenesisBlock(), 1, chain, *params, nullptr)};
36+
BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(params->GenesisBlock(), CLIENT_VERSION) + BLOCK_SERIALIZATION_HEADER_SIZE);
37+
}
38+
39+
BOOST_AUTO_TEST_SUITE_END()

src/validation.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -4705,7 +4705,18 @@ void CChainState::LoadExternalBlockFile(
47054705
}
47064706
}
47074707
} catch (const std::exception& e) {
4708-
LogPrintf("%s: Deserialize or I/O error - %s\n", __func__, e.what());
4708+
// historical bugs added extra data to the block files that does not deserialize cleanly.
4709+
// commonly this data is between readable blocks, but it does not really matter. such data is not fatal to the import process.
4710+
// the code that reads the block files deals with invalid data by simply ignoring it.
4711+
// it continues to search for the next {4 byte magic message start bytes + 4 byte length + block} that does deserialize cleanly
4712+
// and passes all of the other block validation checks dealing with POW and the merkle root, etc...
4713+
// we merely note with this informational log message when unexpected data is encountered.
4714+
// we could also be experiencing a storage system read error, or a read of a previous bad write. these are possible, but
4715+
// less likely scenarios. we don't have enough information to tell a difference here.
4716+
// the reindex process is not the place to attempt to clean and/or compact the block files. if so desired, a studious node operator
4717+
// may use knowledge of the fact that the block files are not entirely pristine in order to prepare a set of pristine, and
4718+
// perhaps ordered, block files for later reindexing.
4719+
LogPrint(BCLog::REINDEX, "%s: unexpected data at file offset 0x%x - %s. continuing\n", __func__, (nRewind - 1), e.what());
47094720
}
47104721
}
47114722
} catch (const std::runtime_error& e) {

0 commit comments

Comments
 (0)