@@ -95,24 +95,13 @@ def __init__(self, ancient_path, name, uses_compression):
95
95
self .ancient_path = ancient_path
96
96
self .name = name
97
97
self .uses_compression = uses_compression
98
- logger .debug (f'opening freezer table. name={ self .name } ' )
99
98
100
99
self .index_file = open (os .path .join (ancient_path , self .index_file_name ), 'rb' )
101
100
stat_result = os .stat (self .index_file .fileno ())
102
101
index_file_size = stat_result .st_size
103
102
assert index_file_size % 6 == 0 , index_file_size
104
- logger .debug (f'index_size={ index_file_size } ({ index_file_size // 6 } entries)' )
105
103
self .entries = index_file_size // 6
106
104
107
- first_index_bytes = self .index_file .read (6 )
108
- first_index = GethFreezerIndexEntry .from_bytes (first_index_bytes )
109
- logger .debug (f'first_index={ first_index } ' )
110
-
111
- self .index_file .seek (- 6 , 2 )
112
- last_index_bytes = self .index_file .read (6 )
113
- last_index = GethFreezerIndexEntry .from_bytes (last_index_bytes )
114
- logger .debug (f'last_index={ last_index } ' )
115
-
116
105
self ._data_files = dict ()
117
106
118
107
@property
@@ -163,6 +152,19 @@ def __del__(self) -> None:
163
152
f .close ()
164
153
self .index_file .close ()
165
154
155
+ @property
156
+ def last_index (self ):
157
+ self .index_file .seek (- 6 , 2 )
158
+ last_index_bytes = self .index_file .read (6 )
159
+ return GethFreezerIndexEntry .from_bytes (last_index_bytes )
160
+
161
+ @property
162
+ def first_index (self ):
163
+ self .index_file .seek (0 )
164
+ first_index_bytes = self .index_file .read (6 )
165
+ return GethFreezerIndexEntry .from_bytes (first_index_bytes )
166
+
167
+
166
168
167
169
class BlockBody (rlp .Serializable ):
168
170
"This is how geth stores block bodies"
@@ -274,7 +276,6 @@ def open_gethdb(location):
274
276
275
277
last_block = gethdb .last_block_hash
276
278
last_block_num = gethdb .block_num_for_hash (last_block )
277
- logger .info ('geth database opened' )
278
279
logger .info (f'found geth chain tip: header_hash={ humanize_hash (last_block )} block_number={ last_block_num } ' )
279
280
280
281
genesis_hash = gethdb .header_hash_for_block_number (0 )
@@ -297,17 +298,18 @@ def open_trinitydb(location):
297
298
logger .info (f'Trinity database did not already exist, initializing it now' )
298
299
chain = MainnetChain .from_genesis_header (leveldb , MAINNET_GENESIS_HEADER )
299
300
300
- # from_genesis_header copied the header over to our trinity db but not the state
301
+ logger .warining ('The new db contains the genesis header but not the genesis state.' )
302
+ logger .warining ('Attempts to full sync will fail.' )
301
303
302
304
return chain
303
305
304
306
305
- def main (args ):
306
- gethdb = open_gethdb (args .gethdb )
307
- chain = open_trinitydb (args .destdb )
307
+ def import_headers (gethdb , chain ):
308
308
headerdb = chain .headerdb
309
309
310
- # 3. Import headers + bodies
310
+ logger .warning ('Some features are not yet implemented:' )
311
+ logger .warning ('- This only supports importing the mainnet chain' )
312
+ logger .warning ('- This script will not verify that geth is using the mainnet chain' )
311
313
312
314
canonical_head = headerdb .get_canonical_head ()
313
315
logger .info (f'starting import from trinity\' s canonical head: { canonical_head } ' )
@@ -324,13 +326,9 @@ def main(args):
324
326
final_block_to_sync = min (args .syncuntil , final_block_to_sync )
325
327
326
328
for i in range (canonical_head .block_number , final_block_to_sync + 1 ):
327
-
328
- if not args .nobodies :
329
- import_block_body (gethdb , chain , i )
330
- else :
331
- header_hash = gethdb .header_hash_for_block_number (i )
332
- header = gethdb .block_header (i , header_hash )
333
- headerdb .persist_header (header )
329
+ header_hash = gethdb .header_hash_for_block_number (i )
330
+ header = gethdb .block_header (i , header_hash )
331
+ headerdb .persist_header (header )
334
332
335
333
if i % 1000 == 0 :
336
334
logger .debug (f'current canonical header: { headerdb .get_canonical_head ()} ' )
@@ -344,54 +342,14 @@ def main(args):
344
342
345
343
logger .info ('finished importing headers + bodies' )
346
344
347
- if args .justblocks :
348
- return
349
-
350
- scan_state (gethdb , leveldb )
351
- return
352
-
353
- state_root = canonical_head .state_root
354
- logger .info (f'starting state trie import: { humanize_hash (state_root )} ' )
355
-
356
- # 4. Import the state trie + storage tries
357
- # Write something which iterates over the entire trie, from left to right
358
- # Pass it a database which first looks in the trinity db, and if nothing is there
359
- # copies the requested node from geth->trinity before returning it
360
-
361
- imported_leaf_count = 0
362
- importdb = ImportDatabase (gethdb = gethdb .db , trinitydb = leveldb .db )
363
- for path , leaf_data in iterate_leaves (importdb , state_root ):
364
- account = rlp .decode (leaf_data , sedes = Account )
365
- addr_hash = nibbles_to_bytes (path )
366
345
367
-
368
- if account .code_hash != EMPTY_SHA3 :
369
- bytecode = importdb .get (account .code_hash )
370
-
371
- if account .storage_root == BLANK_ROOT_HASH :
372
- imported_leaf_count += 1
373
-
374
- if imported_leaf_count % 1000 == 0 :
375
- logger .debug (f'progress sha(addr)={ addr_hash .hex ()} ' )
376
- continue
377
-
378
- for path , leaf_data in iterate_leaves (importdb , account .storage_root ):
379
- item_addr = nibbles_to_bytes (path )
380
- imported_leaf_count += 1
381
-
382
- if imported_leaf_count % 1000 == 0 :
383
- logger .debug (f'progress sha(addr)={ addr_hash .hex ()} sha(item)={ item_addr .hex ()} ' )
384
-
385
- loger .info ('successfully imported state trie and all storage tries' )
386
-
387
-
388
- def scan_state (gethdb : GethDatabase , trinitydb : LevelDB ):
346
+ def sweep_state (gethdb : GethDatabase , trinitydb : LevelDB ):
389
347
"""
390
348
Imports state, but by indiscriminately copying over everything which might be part of
391
349
the state trie. This copies more data than necessary, but is likely to be much faster
392
350
than iterating all state.
393
351
"""
394
- logger .debug ('scan_state : bulk-importing state entries' )
352
+ logger .debug ('sweep_state : bulk-importing state entries' )
395
353
396
354
iterator = gethdb .db .iterator (
397
355
start = b'\x00 ' * 32 ,
@@ -416,7 +374,43 @@ def scan_state(gethdb: GethDatabase, trinitydb: LevelDB):
416
374
break
417
375
bucket = (int .from_bytes (bucket , 'big' ) + 1 ).to_bytes (2 , 'big' )
418
376
419
- logger .info (f'scan_state: successfully imported { imported_entries } state entries' )
377
+ logger .info (f'sweep_state: successfully imported { imported_entries } state entries' )
378
+
379
+
380
+ def import_state (gethdb : GethDatabase , chain ):
381
+ headerdb = chain .headerdb
382
+ canonical_head = headerdb .get_canonical_head ()
383
+ state_root = canonical_head .state_root
384
+
385
+ logger .info (
386
+ f'starting state trie import. canonical_head={ canonical_head } '
387
+ f'state_root={ humanize_hash (state_root )} '
388
+ )
389
+
390
+ imported_leaf_count = 0
391
+ importdb = ImportDatabase (gethdb = gethdb .db , trinitydb = leveldb .db )
392
+ for path , leaf_data in iterate_leaves (importdb , state_root ):
393
+ account = rlp .decode (leaf_data , sedes = Account )
394
+ addr_hash = nibbles_to_bytes (path )
395
+
396
+ if account .code_hash != EMPTY_SHA3 :
397
+ bytecode = importdb .get (account .code_hash )
398
+
399
+ if account .storage_root == BLANK_ROOT_HASH :
400
+ imported_leaf_count += 1
401
+
402
+ if imported_leaf_count % 1000 == 0 :
403
+ logger .debug (f'progress sha(addr)={ addr_hash .hex ()} ' )
404
+ continue
405
+
406
+ for path , leaf_data in iterate_leaves (importdb , account .storage_root ):
407
+ item_addr = nibbles_to_bytes (path )
408
+ imported_leaf_count += 1
409
+
410
+ if imported_leaf_count % 1000 == 0 :
411
+ logger .debug (f'progress sha(addr)={ addr_hash .hex ()} sha(item)={ item_addr .hex ()} ' )
412
+
413
+ loger .info ('successfully imported state trie and all storage tries' )
420
414
421
415
422
416
def import_block_body (gethdb , chain , block_number : int ):
@@ -429,7 +423,7 @@ def import_block_body(gethdb, chain, block_number: int):
429
423
chain .chaindb .persist_block (block )
430
424
431
425
# persist_block saves the transactions into an index, but doesn't actually persist the
432
- # transaction trie, meaning that without this next block attempts to read out the
426
+ # transaction trie, meaning that without this next section attempts to read out the
433
427
# block will throw an exception
434
428
tx_root_hash , tx_kv_nodes = make_trie_root_and_nodes (body .transactions )
435
429
assert tx_root_hash == block .header .transaction_root
@@ -489,32 +483,112 @@ def read_receipts(gethdb, block_number):
489
483
logger .info (f'- post_state_or_status={ post_state } gas_used={ gas_used } len(logs)={ len (logs )} ' )
490
484
491
485
486
+ def read_geth (gethdb ):
487
+ logger .info (f'database_version={ gethdb .database_version } ' )
488
+
489
+ ancient_entry_count = gethdb .ancient_hashes .entries
490
+ logger .info (f'entries_in_ancient_db={ ancient_entry_count } ' )
491
+
492
+
493
+ def read_trinity (location ):
494
+ if not os .path .exists (location ):
495
+ logger .error (f'There is no database at { location } ' )
496
+ return
497
+
498
+ chain = open_trinitydb (location )
499
+ headerdb = chain .headerdb
500
+
501
+ canonical_head = headerdb .get_canonical_head ()
502
+ logger .info (f'canonical_head={ canonical_head } ' )
503
+
504
+
492
505
if __name__ == "__main__" :
493
506
logging .basicConfig (
494
507
level = logging .DEBUG ,
495
508
format = '%(asctime)s.%(msecs)03d %(levelname)s: %(message)s' ,
496
509
datefmt = '%H:%M:%S'
497
510
)
498
511
499
- parser = argparse .ArgumentParser ()
500
- parser .add_argument ('-gethdb' , type = str , required = True )
501
- parser .add_argument ('-destdb' , type = str , required = True )
502
- parser .add_argument ('-justblocks' , action = 'store_true' )
503
- parser .add_argument ('-nobodies' , action = 'store_true' )
504
- parser .add_argument ('-syncuntil' , type = int , action = 'store' )
505
-
506
- subparsers = parser .add_subparsers (dest = "command" )
512
+ parser = argparse .ArgumentParser (
513
+ description = "Import chaindata from geth: builds a database py-evm understands." ,
514
+ epilog = "For more information on using a subcommand: 'subcommand --help'"
515
+ )
516
+ subparsers = parser .add_subparsers (dest = "command" , title = "subcommands" )
517
+
518
+ import_headers_parser = subparsers .add_parser (
519
+ 'import_headers' ,
520
+ help = "Copies over headers from geth into trinity" ,
521
+ description = """
522
+ copies every header, starting from trinity's canonical chain tip,
523
+ continuing up to geth's canonical chain tip
524
+ """
525
+ )
526
+ import_headers_parser .add_argument ('-gethdb' , type = str , required = True )
527
+ import_headers_parser .add_argument ('-destdb' , type = str , required = True )
528
+ import_headers_parser .add_argument (
529
+ '-syncuntil' , type = int , action = 'store' ,
530
+ help = "Only import headers up to this block number"
531
+ )
507
532
508
- import_body_range_parser = subparsers .add_parser ('import_body_range' )
533
+ sweep_state_parser = subparsers .add_parser (
534
+ 'sweep_state' ,
535
+ help = "Does a (very fast) bulk copy of state entries from the gethdb" ,
536
+ description = """
537
+ Scans over every key:value pair in the geth database, and copies over
538
+ everything which looks like a state node (has a 32-byte key). This is
539
+ much faster than iterating over the state trie (as import_state does)
540
+ but imports too much. If a geth node has been running for a while (and
541
+ started and stopped a lot) then there will be a lot of unimportant
542
+ state entries.
543
+ """
544
+ )
545
+ sweep_state_parser .add_argument ('-gethdb' , type = str , required = True )
546
+ sweep_state_parser .add_argument ('-destdb' , type = str , required = True )
547
+
548
+ import_body_range_parser = subparsers .add_parser (
549
+ 'import_body_range' ,
550
+ help = "Imports block bodies (transactions and uncles, but not receipts)" ,
551
+ description = """
552
+ block bodies take a while to import so this command lets you import
553
+ just the segment you need. -startblock and -endblock are inclusive.
554
+ """
555
+ )
556
+ import_body_range_parser .add_argument ('-gethdb' , type = str , required = True )
557
+ import_body_range_parser .add_argument ('-destdb' , type = str , required = True )
509
558
import_body_range_parser .add_argument ('-startblock' , type = int , required = True )
510
559
import_body_range_parser .add_argument ('-endblock' , type = int , required = True )
511
560
512
- process_blocks_parser = subparsers .add_parser ('process_blocks' )
561
+ process_blocks_parser = subparsers .add_parser (
562
+ 'process_blocks' ,
563
+ help = "Simulates a full sync, runs each block." ,
564
+ description = """
565
+ Starting from trinity's canonical chain tip this fetches block bodies
566
+ from the gethdb and runs each of them.
567
+ """
568
+ )
569
+ process_blocks_parser .add_argument ('-gethdb' , type = str , required = True )
570
+ process_blocks_parser .add_argument ('-destdb' , type = str , required = True )
513
571
process_blocks_parser .add_argument ('-endblock' , type = int , required = True )
514
572
515
- read_receipts_parser = subparsers .add_parser ('read_receipts' )
573
+ read_receipts_parser = subparsers .add_parser (
574
+ 'read_receipts' ,
575
+ help = "Helper to inspect all the receipts for a given block"
576
+ )
577
+ read_receipts_parser .add_argument ('-gethdb' , type = str , required = True )
516
578
read_receipts_parser .add_argument ('-block' , type = int , required = True )
517
579
580
+ read_trinity_parser = subparsers .add_parser (
581
+ 'read_trinity' ,
582
+ help = "Helper to print summary statistics for a given trinitydb"
583
+ )
584
+ read_trinity_parser .add_argument ('-destdb' , type = str , required = True )
585
+
586
+ read_geth_parser = subparsers .add_parser (
587
+ 'read_geth' ,
588
+ help = "Helper to print summary statistics for a given gethdb"
589
+ )
590
+ read_geth_parser .add_argument ('-gethdb' , type = str , required = True )
591
+
518
592
args = parser .parse_args ()
519
593
520
594
if args .command == 'import_body_range' :
@@ -528,9 +602,18 @@ def read_receipts(gethdb, block_number):
528
602
elif args .command == 'read_receipts' :
529
603
gethdb = open_gethdb (args .gethdb )
530
604
read_receipts (gethdb , args .block )
605
+ elif args .command == 'read_geth' :
606
+ gethdb = open_gethdb (args .gethdb )
607
+ read_geth (gethdb )
608
+ elif args .command == 'read_trinity' :
609
+ read_trinity (args .destdb )
610
+ elif args .command == 'import_headers' :
611
+ gethdb = open_gethdb (args .gethdb )
612
+ chain = open_trinitydb (args .destdb )
613
+ import_headers (gethdb , chain )
614
+ elif args .command == 'sweep_state' :
615
+ gethdb = open_gethdb (args .gethdb )
616
+ chain = open_trinitydb (args .destdb )
617
+ sweep_state (gethdb , chain .headerdb .db )
531
618
else :
532
- main (args )
533
-
534
- logger .warning ('Some features are not yet implemented:' )
535
- logger .warning ('- Receipts were not imported' )
536
- logger .warning ('- This script did not verify that the chain configs match' )
619
+ logger .error (f'unrecognized command. command={ args .command } ' )
0 commit comments