Skip to content

Commit 571c579

Browse files
committed
feat(shred): better shred-repair encoding and update shred tile publish
1 parent 6904159 commit 571c579

File tree

2 files changed

+95
-40
lines changed

2 files changed

+95
-40
lines changed

src/disco/fd_disco_base.h

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -124,29 +124,57 @@ fd_disco_replay_old_sig( ulong slot,
124124
FD_FN_CONST static inline ulong fd_disco_replay_old_sig_flags( ulong sig ) { return (sig & 0xFFUL); }
125125
FD_FN_CONST static inline ulong fd_disco_replay_old_sig_slot( ulong sig ) { return (sig >> 8); }
126126

127+
/* fd_disco_shred_repair_sig constructs a sig for the shred_repair link.
128+
The encoded fields vary depending on the type of the sig. The
129+
diagram below describes the encoding.
130+
131+
type (1) | is_code or data_completes (1) | slot (32) | fec_set_idx (15) | shred_idx or data_cnt or parent_off (15)
132+
[63] | [62] | [30, 61] | [15, 29] | [0, 14]
133+
134+
The first bit of the sig is the sig type. The next 32 bits describe
135+
the slot number and 15 bits after that the fec_set_idx, regardless of
136+
the sig type. Note if the bits are saturated caller MUST ignore the
137+
value extracted from the sig (ie. UINT_MAX for slot and 2^15 - 1 for
138+
fec_set_idx).
139+
140+
The second bit and last 15 bits vary in interpretation depending on
141+
the sig type:
142+
143+
When type is 0, the sig describes a shred header. In this case, the
144+
second bit describes whether it is a coding shred (is_code) and the
145+
last 15 bits either describe a shred_idx if it's a data shred
146+
(is_code = 0) or the data_cnt if it's a coding shred (is_code = 1).
147+
148+
When type is 1, the sig describes a completed FEC set. In this case,
149+
the second bit describes whether the FEC set completes the entry
150+
batch, which will be true if the last data shred in the FEC set is
151+
marked with a DATA_COMPLETES flag (FIXME this is not invariant in the
152+
protocol yet). This implies the FEC set is the last one in the entry
153+
batch. The last 15 bits describe the parent slot's offset
154+
(parent_off) from the FEC set's slot. */
155+
127156
FD_FN_CONST static inline ulong
128-
fd_disco_shred_replay_sig( ulong slot,
129-
uint shred_idx,
130-
uint fec_set_idx,
131-
int is_code,
132-
int completes ) {
133-
134-
/* | 32 LSB of slot | 15 LSB of shred_idx | 15 LSB of fec_idx | 1 bit of shred data/code type | 1 bit if shred completes the fec set |
135-
| slot[32,63] | shred_idx[17,32] | fec_idx[2,16] | is_parity[1] | is_complete[0] | */
136-
137-
ulong slot_ul = fd_ulong_min( (ulong)slot, (ulong)UINT_MAX );
138-
ulong shred_idx_ul = fd_ulong_min( (ulong)shred_idx, (ulong)FD_SHRED_MAX_PER_SLOT );
139-
ulong fec_set_idx_ul = fd_ulong_min( (ulong)fec_set_idx, (ulong)FD_SHRED_MAX_PER_SLOT );
140-
ulong is_code_ul = (ulong)is_code;
141-
ulong completes_ul = (ulong)completes;
142-
return slot_ul << 32 | shred_idx_ul << 17 | fec_set_idx_ul << 2 | is_code_ul << 1 | completes_ul;
157+
fd_disco_shred_repair_sig( int type, int is_code_or_data_completes, ulong slot, uint fec_set_idx, uint shred_idx_or_data_cnt_or_parent_off ) {
158+
ulong type_ul = (ulong)type;
159+
ulong is_code_or_data_completes_ul = (ulong)is_code_or_data_completes;
160+
ulong slot_ul = fd_ulong_min( (ulong)slot, (ulong)UINT_MAX );
161+
ulong fec_set_idx_ul = fd_ulong_min( (ulong)fec_set_idx, (ulong)FD_SHRED_MAX_PER_SLOT );
162+
ulong shred_idx_or_data_cnt_or_parent_off_ul = fd_ulong_min( (ulong)shred_idx_or_data_cnt_or_parent_off, (ulong)FD_SHRED_MAX_PER_SLOT );
163+
return type_ul << 63 | is_code_or_data_completes_ul << 62 | slot_ul << 30 | fec_set_idx_ul << 15 | shred_idx_or_data_cnt_or_parent_off_ul;
143164
}
144165

145-
FD_FN_CONST static inline ulong fd_disco_shred_replay_sig_slot ( ulong sig ) { return fd_ulong_extract ( sig, 32, 63 ); }
146-
FD_FN_CONST static inline uint fd_disco_shred_replay_sig_shred_idx ( ulong sig ) { return (uint)fd_ulong_extract ( sig, 17, 31 ); }
147-
FD_FN_CONST static inline uint fd_disco_shred_replay_sig_fec_set_idx( ulong sig ) { return (uint)fd_ulong_extract ( sig, 2, 16 ); }
148-
FD_FN_CONST static inline int fd_disco_shred_replay_sig_is_code ( ulong sig ) { return fd_ulong_extract_bit( sig, 1 ); }
149-
FD_FN_CONST static inline int fd_disco_shred_replay_sig_completes ( ulong sig ) { return fd_ulong_extract_bit( sig, 0 ); }
166+
/* fd_disco_shred_repair_sig_{...} are accessors for the fields encoded
167+
in the sig described above. */
168+
169+
FD_FN_CONST static inline int fd_disco_shred_repair_sig_type ( ulong sig ) { return fd_ulong_extract_bit( sig, 63 ); }
170+
FD_FN_CONST static inline int fd_disco_shred_repair_sig_is_code ( ulong sig ) { return fd_ulong_extract_bit( sig, 62 ); } /* type 0 */
171+
FD_FN_CONST static inline int fd_disco_shred_repair_sig_data_completes( ulong sig ) { return fd_ulong_extract_bit( sig, 62 ); } /* type 1 */
172+
FD_FN_CONST static inline ulong fd_disco_shred_repair_sig_slot ( ulong sig ) { return fd_ulong_extract ( sig, 30, 61 ); }
173+
FD_FN_CONST static inline uint fd_disco_shred_repair_sig_fec_set_idx ( ulong sig ) { return (uint)fd_ulong_extract ( sig, 15, 29 ); }
174+
FD_FN_CONST static inline uint fd_disco_shred_repair_sig_shred_idx ( ulong sig ) { return (uint)fd_ulong_extract_lsb( sig, 15 ); } /* type 0, is_code 0 */
175+
FD_FN_CONST static inline uint fd_disco_shred_repair_sig_data_cnt ( ulong sig ) { return (uint)fd_ulong_extract_lsb( sig, 15 ); } /* type 0, is_code 1 */
176+
FD_FN_CONST static inline uint fd_disco_shred_repair_sig_parent_off ( ulong sig ) { return (uint)fd_ulong_extract_lsb( sig, 15 ); } /* type 1 */
177+
150178

151179
FD_FN_PURE static inline ulong
152180
fd_disco_compact_chunk0( void * wksp ) {

src/disco/shred/fd_shred_tile.c

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ after_frag( fd_shred_ctx_t * ctx,
554554
const ulong fanout = 200UL;
555555
fd_shred_dest_idx_t _dests[ 200*(FD_REEDSOL_DATA_SHREDS_MAX+FD_REEDSOL_PARITY_SHREDS_MAX) ];
556556

557+
fd_bmtree_node_t out_merkle_root[1];
557558
if( FD_LIKELY( ctx->in_kind[ in_idx ]==IN_KIND_NET ) ) {
558559
uchar * shred_buffer = ctx->shred_buffer;
559560
ulong shred_buffer_sz = ctx->shred_buffer_sz;
@@ -569,7 +570,6 @@ after_frag( fd_shred_ctx_t * ctx,
569570

570571
fd_fec_set_t const * out_fec_set[1];
571572
fd_shred_t const * out_shred[1];
572-
fd_bmtree_node_t out_merkle_root[1];
573573

574574
long add_shred_timing = -fd_tickcount();
575575
int rv = fd_fec_resolver_add_shred( ctx->resolver, shred, shred_buffer_sz, slot_leader->uc, out_fec_set, out_shred, out_merkle_root );
@@ -594,13 +594,25 @@ after_frag( fd_shred_ctx_t * ctx,
594594
for( ulong j=0UL; j<*max_dest_cnt; j++ ) send_shred( ctx, *out_shred, sdest, dests[ j ], ctx->tsorig );
595595
} while( 0 );
596596

597-
if( FD_LIKELY( ctx->blockstore && rv==FD_FEC_RESOLVER_SHRED_OKAY ) ) { /* optimize for the compiler - branch predictor will still be correct */
597+
if( FD_LIKELY( ctx->blockstore ) ) { /* always true or false depending on the topo, so hint to the compiler optimize this branch */
598+
599+
/* Construct the sig. */
600+
601+
int is_code = fd_shred_is_code( fd_shred_type( shred->variant ) );
602+
uint shred_idx_or_data_cnt = shred->idx;
603+
if( FD_LIKELY( is_code ) ) shred_idx_or_data_cnt = shred->code.data_cnt; /* optimize for code_cnt >= data_cnt */
604+
ulong sig = fd_disco_shred_replay_sig( 0, is_code, shred->slot, shred->fec_set_idx, shred_idx_or_data_cnt );
605+
606+
/* Copy the shred header into the frag. */
607+
598608
uchar * buf = fd_chunk_to_laddr( ctx->replay_out_mem, ctx->replay_out_chunk );
599609
ulong sz = fd_shred_header_sz( shred->variant );
600610
fd_memcpy( buf, shred, sz );
601-
ulong tspub = fd_frag_meta_ts_comp( fd_tickcount() );
602-
ulong replay_sig = fd_disco_shred_replay_sig( shred->slot, shred->idx, shred->fec_set_idx, fd_shred_is_code( fd_shred_type( shred->variant ) ), 0 );
603-
fd_stem_publish( stem, REPLAY_OUT_IDX, replay_sig, ctx->replay_out_chunk, sz, 0UL, ctx->tsorig, tspub );
611+
612+
/* Publish the frag. */
613+
614+
ulong tspub = fd_frag_meta_ts_comp( fd_tickcount() );
615+
fd_stem_publish( stem, REPLAY_OUT_IDX, sig, ctx->replay_out_chunk, sz, 0UL, ctx->tsorig, tspub );
604616
ctx->replay_out_chunk = fd_dcache_compact_next( ctx->replay_out_chunk, sz, ctx->replay_out_chunk0, ctx->replay_out_wmark );
605617
}
606618
}
@@ -642,28 +654,43 @@ after_frag( fd_shred_ctx_t * ctx,
642654
ulong sz3 = sizeof(fd_shred34_t) - (34UL - s34[ 3 ].shred_cnt)*FD_SHRED_MAX_SZ;
643655

644656
if( FD_LIKELY( ctx->blockstore ) ) {
645-
/* If the shred has a completes flag, then in the replay tile it
646-
will do immediate polling for shreds in that FEC set, under
647-
the assumption that they live in the blockstore. When a shred
648-
completes a FEC set, we need to add the shreds to the
649-
blockstore before we notify replay of a completed FEC set.
650-
Replay does not poll the blockstore for shreds on notifies of
651-
a regular non-completing shred. */
657+
658+
/* Insert shreds into the blockstore. Note we do this regardless of
659+
whether the shreds are for one of our leader slots or not. Even
660+
though there is a separate link that directly connects pack and
661+
replay when we are leader, we still need the shreds in the
662+
blockstore to, for example, serve repair requests. */
652663

653664
for( ulong i=0UL; i<set->data_shred_cnt; i++ ) {
654665
fd_shred_t const * data_shred = (fd_shred_t const *)fd_type_pun_const( set->data_shreds[ i ] );
655666
fd_blockstore_shred_insert( ctx->blockstore, data_shred );
656667
}
668+
657669
if( FD_LIKELY( ctx->in_kind[ in_idx ]==IN_KIND_NET ) ) {
658-
/* Shred came from block we didn't produce. This is not our leader
659-
slot. */
660-
fd_shred_t const * shred = (fd_shred_t const *)fd_type_pun_const( ctx->shred_buffer );
670+
671+
/* Additionally, if the shreds are not for our leader slot (ie.
672+
receiving the shred via net) publish a notification to replay
673+
that the FEC set is complete.
674+
675+
Note we intentionally insert shreds into the blockstore before
676+
notifying replay. This is because the replay tile immediately
677+
polls for shreds in the blockstore upon receiving a FEC set
678+
complete notification. */
679+
680+
fd_shred_t const * last = (fd_shred_t const *)fd_type_pun_const( set->data_shreds[ set->data_shred_cnt - 1 ] );
681+
int data_completes = last->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE;
682+
ulong sig = fd_disco_shred_replay_sig( 1, data_completes, last->slot, last->fec_set_idx, last->data.parent_off );
683+
684+
/* Copy the merkle root and chained merkle root of the FEC set
685+
into the frag (64 bytes). */
686+
661687
uchar * buf = fd_chunk_to_laddr( ctx->replay_out_mem, ctx->replay_out_chunk );
662-
ulong sz = fd_shred_header_sz( shred->variant );
663-
fd_memcpy( buf, shred, sz );
664-
ulong tspub = fd_frag_meta_ts_comp( fd_tickcount() );
665-
ulong replay_sig = fd_disco_shred_replay_sig( shred->slot, shred->idx, shred->fec_set_idx, fd_shred_is_code( fd_shred_type( shred->variant ) ), 1 );
666-
fd_stem_publish( stem, REPLAY_OUT_IDX, replay_sig, ctx->replay_out_chunk, sz, 0UL, ctx->tsorig, tspub );
688+
ulong sz = fd_shred_header_sz( last->variant );
689+
memcpy( buf, out_merkle_root, FD_SHRED_MERKLE_ROOT_SZ );
690+
memcpy( buf + FD_SHRED_MERKLE_ROOT_SZ, (uchar const *)last + fd_shred_chain_off( last->variant ), FD_SHRED_MERKLE_ROOT_SZ );
691+
692+
ulong tspub = fd_frag_meta_ts_comp( fd_tickcount() );
693+
fd_stem_publish( stem, REPLAY_OUT_IDX, sig, ctx->replay_out_chunk, sz, 0UL, ctx->tsorig, tspub );
667694
ctx->replay_out_chunk = fd_dcache_compact_next( ctx->replay_out_chunk, sz, ctx->replay_out_chunk0, ctx->replay_out_wmark );
668695
}
669696
}

0 commit comments

Comments
 (0)