Skip to content

Commit d755b69

Browse files
committed
Break into subcommands, add a lot of --help messages
1 parent b8589e9 commit d755b69

File tree

1 file changed

+167
-84
lines changed

1 file changed

+167
-84
lines changed

scripts/gethimport.py

Lines changed: 167 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -95,24 +95,13 @@ def __init__(self, ancient_path, name, uses_compression):
9595
self.ancient_path = ancient_path
9696
self.name = name
9797
self.uses_compression = uses_compression
98-
logger.debug(f'opening freezer table. name={self.name}')
9998

10099
self.index_file = open(os.path.join(ancient_path, self.index_file_name), 'rb')
101100
stat_result = os.stat(self.index_file.fileno())
102101
index_file_size = stat_result.st_size
103102
assert index_file_size % 6 == 0, index_file_size
104-
logger.debug(f'index_size={index_file_size} ({index_file_size // 6} entries)')
105103
self.entries = index_file_size // 6
106104

107-
first_index_bytes = self.index_file.read(6)
108-
first_index = GethFreezerIndexEntry.from_bytes(first_index_bytes)
109-
logger.debug(f'first_index={first_index}')
110-
111-
self.index_file.seek(-6, 2)
112-
last_index_bytes = self.index_file.read(6)
113-
last_index = GethFreezerIndexEntry.from_bytes(last_index_bytes)
114-
logger.debug(f'last_index={last_index}')
115-
116105
self._data_files = dict()
117106

118107
@property
@@ -163,6 +152,19 @@ def __del__(self) -> None:
163152
f.close()
164153
self.index_file.close()
165154

155+
@property
156+
def last_index(self):
157+
self.index_file.seek(-6, 2)
158+
last_index_bytes = self.index_file.read(6)
159+
return GethFreezerIndexEntry.from_bytes(last_index_bytes)
160+
161+
@property
162+
def first_index(self):
163+
self.index_file.seek(0)
164+
first_index_bytes = self.index_file.read(6)
165+
return GethFreezerIndexEntry.from_bytes(first_index_bytes)
166+
167+
166168

167169
class BlockBody(rlp.Serializable):
168170
"This is how geth stores block bodies"
@@ -274,7 +276,6 @@ def open_gethdb(location):
274276

275277
last_block = gethdb.last_block_hash
276278
last_block_num = gethdb.block_num_for_hash(last_block)
277-
logger.info('geth database opened')
278279
logger.info(f'found geth chain tip: header_hash={humanize_hash(last_block)} block_number={last_block_num}')
279280

280281
genesis_hash = gethdb.header_hash_for_block_number(0)
@@ -297,17 +298,18 @@ def open_trinitydb(location):
297298
logger.info(f'Trinity database did not already exist, initializing it now')
298299
chain = MainnetChain.from_genesis_header(leveldb, MAINNET_GENESIS_HEADER)
299300

300-
# from_genesis_header copied the header over to our trinity db but not the state
301+
logger.warining('The new db contains the genesis header but not the genesis state.')
302+
logger.warining('Attempts to full sync will fail.')
301303

302304
return chain
303305

304306

305-
def main(args):
306-
gethdb = open_gethdb(args.gethdb)
307-
chain = open_trinitydb(args.destdb)
307+
def import_headers(gethdb, chain):
308308
headerdb = chain.headerdb
309309

310-
# 3. Import headers + bodies
310+
logger.warning('Some features are not yet implemented:')
311+
logger.warning('- This only supports importing the mainnet chain')
312+
logger.warning('- This script will not verify that geth is using the mainnet chain')
311313

312314
canonical_head = headerdb.get_canonical_head()
313315
logger.info(f'starting import from trinity\'s canonical head: {canonical_head}')
@@ -324,13 +326,9 @@ def main(args):
324326
final_block_to_sync = min(args.syncuntil, final_block_to_sync)
325327

326328
for i in range(canonical_head.block_number, final_block_to_sync + 1):
327-
328-
if not args.nobodies:
329-
import_block_body(gethdb, chain, i)
330-
else:
331-
header_hash = gethdb.header_hash_for_block_number(i)
332-
header = gethdb.block_header(i, header_hash)
333-
headerdb.persist_header(header)
329+
header_hash = gethdb.header_hash_for_block_number(i)
330+
header = gethdb.block_header(i, header_hash)
331+
headerdb.persist_header(header)
334332

335333
if i % 1000 == 0:
336334
logger.debug(f'current canonical header: {headerdb.get_canonical_head()}')
@@ -344,54 +342,14 @@ def main(args):
344342

345343
logger.info('finished importing headers + bodies')
346344

347-
if args.justblocks:
348-
return
349-
350-
scan_state(gethdb, leveldb)
351-
return
352-
353-
state_root = canonical_head.state_root
354-
logger.info(f'starting state trie import: {humanize_hash(state_root)}')
355-
356-
# 4. Import the state trie + storage tries
357-
# Write something which iterates over the entire trie, from left to right
358-
# Pass it a database which first looks in the trinity db, and if nothing is there
359-
# copies the requested node from geth->trinity before returning it
360-
361-
imported_leaf_count = 0
362-
importdb = ImportDatabase(gethdb=gethdb.db, trinitydb=leveldb.db)
363-
for path, leaf_data in iterate_leaves(importdb, state_root):
364-
account = rlp.decode(leaf_data, sedes=Account)
365-
addr_hash = nibbles_to_bytes(path)
366345

367-
368-
if account.code_hash != EMPTY_SHA3:
369-
bytecode = importdb.get(account.code_hash)
370-
371-
if account.storage_root == BLANK_ROOT_HASH:
372-
imported_leaf_count += 1
373-
374-
if imported_leaf_count % 1000 == 0:
375-
logger.debug(f'progress sha(addr)={addr_hash.hex()}')
376-
continue
377-
378-
for path, leaf_data in iterate_leaves(importdb, account.storage_root):
379-
item_addr = nibbles_to_bytes(path)
380-
imported_leaf_count += 1
381-
382-
if imported_leaf_count % 1000 == 0:
383-
logger.debug(f'progress sha(addr)={addr_hash.hex()} sha(item)={item_addr.hex()}')
384-
385-
loger.info('successfully imported state trie and all storage tries')
386-
387-
388-
def scan_state(gethdb: GethDatabase, trinitydb: LevelDB):
346+
def sweep_state(gethdb: GethDatabase, trinitydb: LevelDB):
389347
"""
390348
Imports state, but by indiscriminately copying over everything which might be part of
391349
the state trie. This copies more data than necessary, but is likely to be much faster
392350
than iterating all state.
393351
"""
394-
logger.debug('scan_state: bulk-importing state entries')
352+
logger.debug('sweep_state: bulk-importing state entries')
395353

396354
iterator = gethdb.db.iterator(
397355
start=b'\x00'*32,
@@ -416,7 +374,43 @@ def scan_state(gethdb: GethDatabase, trinitydb: LevelDB):
416374
break
417375
bucket = (int.from_bytes(bucket, 'big') + 1).to_bytes(2, 'big')
418376

419-
logger.info(f'scan_state: successfully imported {imported_entries} state entries')
377+
logger.info(f'sweep_state: successfully imported {imported_entries} state entries')
378+
379+
380+
def import_state(gethdb: GethDatabase, chain):
381+
headerdb = chain.headerdb
382+
canonical_head = headerdb.get_canonical_head()
383+
state_root = canonical_head.state_root
384+
385+
logger.info(
386+
f'starting state trie import. canonical_head={canonical_head} '
387+
f'state_root={humanize_hash(state_root)}'
388+
)
389+
390+
imported_leaf_count = 0
391+
importdb = ImportDatabase(gethdb=gethdb.db, trinitydb=leveldb.db)
392+
for path, leaf_data in iterate_leaves(importdb, state_root):
393+
account = rlp.decode(leaf_data, sedes=Account)
394+
addr_hash = nibbles_to_bytes(path)
395+
396+
if account.code_hash != EMPTY_SHA3:
397+
bytecode = importdb.get(account.code_hash)
398+
399+
if account.storage_root == BLANK_ROOT_HASH:
400+
imported_leaf_count += 1
401+
402+
if imported_leaf_count % 1000 == 0:
403+
logger.debug(f'progress sha(addr)={addr_hash.hex()}')
404+
continue
405+
406+
for path, leaf_data in iterate_leaves(importdb, account.storage_root):
407+
item_addr = nibbles_to_bytes(path)
408+
imported_leaf_count += 1
409+
410+
if imported_leaf_count % 1000 == 0:
411+
logger.debug(f'progress sha(addr)={addr_hash.hex()} sha(item)={item_addr.hex()}')
412+
413+
loger.info('successfully imported state trie and all storage tries')
420414

421415

422416
def import_block_body(gethdb, chain, block_number: int):
@@ -429,7 +423,7 @@ def import_block_body(gethdb, chain, block_number: int):
429423
chain.chaindb.persist_block(block)
430424

431425
# persist_block saves the transactions into an index, but doesn't actually persist the
432-
# transaction trie, meaning that without this next block attempts to read out the
426+
# transaction trie, meaning that without this next section attempts to read out the
433427
# block will throw an exception
434428
tx_root_hash, tx_kv_nodes = make_trie_root_and_nodes(body.transactions)
435429
assert tx_root_hash == block.header.transaction_root
@@ -489,32 +483,112 @@ def read_receipts(gethdb, block_number):
489483
logger.info(f'- post_state_or_status={post_state} gas_used={gas_used} len(logs)={len(logs)}')
490484

491485

486+
def read_geth(gethdb):
487+
logger.info(f'database_version={gethdb.database_version}')
488+
489+
ancient_entry_count = gethdb.ancient_hashes.entries
490+
logger.info(f'entries_in_ancient_db={ancient_entry_count}')
491+
492+
493+
def read_trinity(location):
494+
if not os.path.exists(location):
495+
logger.error(f'There is no database at {location}')
496+
return
497+
498+
chain = open_trinitydb(location)
499+
headerdb = chain.headerdb
500+
501+
canonical_head = headerdb.get_canonical_head()
502+
logger.info(f'canonical_head={canonical_head}')
503+
504+
492505
if __name__ == "__main__":
493506
logging.basicConfig(
494507
level=logging.DEBUG,
495508
format='%(asctime)s.%(msecs)03d %(levelname)s: %(message)s',
496509
datefmt='%H:%M:%S'
497510
)
498511

499-
parser = argparse.ArgumentParser()
500-
parser.add_argument('-gethdb', type=str, required=True)
501-
parser.add_argument('-destdb', type=str, required=True)
502-
parser.add_argument('-justblocks', action='store_true')
503-
parser.add_argument('-nobodies', action='store_true')
504-
parser.add_argument('-syncuntil', type=int, action='store')
505-
506-
subparsers = parser.add_subparsers(dest="command")
512+
parser = argparse.ArgumentParser(
513+
description="Import chaindata from geth: builds a database py-evm understands.",
514+
epilog="For more information on using a subcommand: 'subcommand --help'"
515+
)
516+
subparsers = parser.add_subparsers(dest="command", title="subcommands")
517+
518+
import_headers_parser = subparsers.add_parser(
519+
'import_headers',
520+
help="Copies over headers from geth into trinity",
521+
description="""
522+
copies every header, starting from trinity's canonical chain tip,
523+
continuing up to geth's canonical chain tip
524+
"""
525+
)
526+
import_headers_parser.add_argument('-gethdb', type=str, required=True)
527+
import_headers_parser.add_argument('-destdb', type=str, required=True)
528+
import_headers_parser.add_argument(
529+
'-syncuntil', type=int, action='store',
530+
help="Only import headers up to this block number"
531+
)
507532

508-
import_body_range_parser = subparsers.add_parser('import_body_range')
533+
sweep_state_parser = subparsers.add_parser(
534+
'sweep_state',
535+
help="Does a (very fast) bulk copy of state entries from the gethdb",
536+
description="""
537+
Scans over every key:value pair in the geth database, and copies over
538+
everything which looks like a state node (has a 32-byte key). This is
539+
much faster than iterating over the state trie (as import_state does)
540+
but imports too much. If a geth node has been running for a while (and
541+
started and stopped a lot) then there will be a lot of unimportant
542+
state entries.
543+
"""
544+
)
545+
sweep_state_parser.add_argument('-gethdb', type=str, required=True)
546+
sweep_state_parser.add_argument('-destdb', type=str, required=True)
547+
548+
import_body_range_parser = subparsers.add_parser(
549+
'import_body_range',
550+
help="Imports block bodies (transactions and uncles, but not receipts)",
551+
description="""
552+
block bodies take a while to import so this command lets you import
553+
just the segment you need. -startblock and -endblock are inclusive.
554+
"""
555+
)
556+
import_body_range_parser.add_argument('-gethdb', type=str, required=True)
557+
import_body_range_parser.add_argument('-destdb', type=str, required=True)
509558
import_body_range_parser.add_argument('-startblock', type=int, required=True)
510559
import_body_range_parser.add_argument('-endblock', type=int, required=True)
511560

512-
process_blocks_parser = subparsers.add_parser('process_blocks')
561+
process_blocks_parser = subparsers.add_parser(
562+
'process_blocks',
563+
help="Simulates a full sync, runs each block.",
564+
description="""
565+
Starting from trinity's canonical chain tip this fetches block bodies
566+
from the gethdb and runs each of them.
567+
"""
568+
)
569+
process_blocks_parser.add_argument('-gethdb', type=str, required=True)
570+
process_blocks_parser.add_argument('-destdb', type=str, required=True)
513571
process_blocks_parser.add_argument('-endblock', type=int, required=True)
514572

515-
read_receipts_parser = subparsers.add_parser('read_receipts')
573+
read_receipts_parser = subparsers.add_parser(
574+
'read_receipts',
575+
help="Helper to inspect all the receipts for a given block"
576+
)
577+
read_receipts_parser.add_argument('-gethdb', type=str, required=True)
516578
read_receipts_parser.add_argument('-block', type=int, required=True)
517579

580+
read_trinity_parser = subparsers.add_parser(
581+
'read_trinity',
582+
help="Helper to print summary statistics for a given trinitydb"
583+
)
584+
read_trinity_parser.add_argument('-destdb', type=str, required=True)
585+
586+
read_geth_parser = subparsers.add_parser(
587+
'read_geth',
588+
help="Helper to print summary statistics for a given gethdb"
589+
)
590+
read_geth_parser.add_argument('-gethdb', type=str, required=True)
591+
518592
args = parser.parse_args()
519593

520594
if args.command == 'import_body_range':
@@ -528,9 +602,18 @@ def read_receipts(gethdb, block_number):
528602
elif args.command == 'read_receipts':
529603
gethdb = open_gethdb(args.gethdb)
530604
read_receipts(gethdb, args.block)
605+
elif args.command == 'read_geth':
606+
gethdb = open_gethdb(args.gethdb)
607+
read_geth(gethdb)
608+
elif args.command == 'read_trinity':
609+
read_trinity(args.destdb)
610+
elif args.command == 'import_headers':
611+
gethdb = open_gethdb(args.gethdb)
612+
chain = open_trinitydb(args.destdb)
613+
import_headers(gethdb, chain)
614+
elif args.command == 'sweep_state':
615+
gethdb = open_gethdb(args.gethdb)
616+
chain = open_trinitydb(args.destdb)
617+
sweep_state(gethdb, chain.headerdb.db)
531618
else:
532-
main(args)
533-
534-
logger.warning('Some features are not yet implemented:')
535-
logger.warning('- Receipts were not imported')
536-
logger.warning('- This script did not verify that the chain configs match')
619+
logger.error(f'unrecognized command. command={args.command}')

0 commit comments

Comments
 (0)