1+ pub ( crate ) mod encode;
2+
13use std:: collections:: hash_map:: Entry ;
24use std:: collections:: { BTreeMap , VecDeque } ;
35
@@ -17,12 +19,46 @@ use crate::html::format::join_with_double_colon;
1719use crate :: html:: markdown:: short_markdown_summary;
1820use crate :: html:: render:: { self , IndexItem , IndexItemFunctionType , RenderType , RenderTypeId } ;
1921
22+ use encode:: { bitmap_to_string, write_vlqhex_to_string} ;
23+
24+ /// The serialized search description sharded version
25+ ///
26+ /// The `index` is a JSON-encoded list of names and other information.
27+ ///
28+ /// The desc has newlined descriptions, split up by size into 128KiB shards.
29+ /// For example, `(4, "foo\nbar\nbaz\nquux")`.
30+ ///
31+ /// There is no single, optimal size for these shards, because it depends on
32+ /// configuration values that we can't predict or control, such as the version
33+ /// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2
34+ /// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether
35+ /// the search query is going to produce a large number of results or a small
36+ /// number, the bandwidth delay product of the network...
37+ ///
38+ /// Gzipping some standard library descriptions to guess what transport
39+ /// compression will do, the compressed file sizes can be as small as 4.9KiB
40+ /// or as large as 18KiB (ignoring the final 1.9KiB shard of leftovers).
41+ /// A "reasonable" range for files is for them to be bigger than 1KiB,
42+ /// since that's about the amount of data that can be transferred in a
43+ /// single TCP packet, and 64KiB, the maximum amount of data that
44+ /// TCP can transfer in a single round trip without extensions.
45+ ///
46+ /// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media
47+ /// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept
48+ /// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features
49+ pub ( crate ) struct SerializedSearchIndex {
50+ pub ( crate ) index : String ,
51+ pub ( crate ) desc : Vec < ( usize , String ) > ,
52+ }
53+
54+ const DESC_INDEX_SHARD_LEN : usize = 128 * 1024 ;
55+
2056/// Builds the search index from the collected metadata
2157pub ( crate ) fn build_index < ' tcx > (
2258 krate : & clean:: Crate ,
2359 cache : & mut Cache ,
2460 tcx : TyCtxt < ' tcx > ,
25- ) -> String {
61+ ) -> SerializedSearchIndex {
2662 let mut itemid_to_pathid = FxHashMap :: default ( ) ;
2763 let mut primitives = FxHashMap :: default ( ) ;
2864 let mut associated_types = FxHashMap :: default ( ) ;
@@ -319,7 +355,6 @@ pub(crate) fn build_index<'tcx>(
319355 . collect :: < Vec < _ > > ( ) ;
320356
321357 struct CrateData < ' a > {
322- doc : String ,
323358 items : Vec < & ' a IndexItem > ,
324359 paths : Vec < ( ItemType , Vec < Symbol > ) > ,
325360 // The String is alias name and the vec is the list of the elements with this alias.
@@ -328,6 +363,11 @@ pub(crate) fn build_index<'tcx>(
328363 aliases : & ' a BTreeMap < String , Vec < usize > > ,
329364 // Used when a type has more than one impl with an associated item with the same name.
330365 associated_item_disambiguators : & ' a Vec < ( usize , String ) > ,
366+ // A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string
367+ // for information on the format.
368+ desc_index : String ,
369+ // A list of items with no description. This is eventually turned into a bitmap.
370+ empty_desc : Vec < u32 > ,
331371 }
332372
333373 struct Paths {
@@ -409,7 +449,6 @@ pub(crate) fn build_index<'tcx>(
409449 let mut names = Vec :: with_capacity ( self . items . len ( ) ) ;
410450 let mut types = String :: with_capacity ( self . items . len ( ) ) ;
411451 let mut full_paths = Vec :: with_capacity ( self . items . len ( ) ) ;
412- let mut descriptions = Vec :: with_capacity ( self . items . len ( ) ) ;
413452 let mut parents = Vec :: with_capacity ( self . items . len ( ) ) ;
414453 let mut functions = String :: with_capacity ( self . items . len ( ) ) ;
415454 let mut deprecated = Vec :: with_capacity ( self . items . len ( ) ) ;
@@ -432,7 +471,6 @@ pub(crate) fn build_index<'tcx>(
432471 parents. push ( item. parent_idx . map ( |x| x + 1 ) . unwrap_or ( 0 ) ) ;
433472
434473 names. push ( item. name . as_str ( ) ) ;
435- descriptions. push ( & item. desc ) ;
436474
437475 if !item. path . is_empty ( ) {
438476 full_paths. push ( ( index, & item. path ) ) ;
@@ -444,7 +482,8 @@ pub(crate) fn build_index<'tcx>(
444482 }
445483
446484 if item. deprecation . is_some ( ) {
447- deprecated. push ( index) ;
485+ // bitmasks always use 1-indexing for items, with 0 as the crate itself
486+ deprecated. push ( u32:: try_from ( index + 1 ) . unwrap ( ) ) ;
448487 }
449488 }
450489
@@ -455,42 +494,84 @@ pub(crate) fn build_index<'tcx>(
455494 let has_aliases = !self . aliases . is_empty ( ) ;
456495 let mut crate_data =
457496 serializer. serialize_struct ( "CrateData" , if has_aliases { 9 } else { 8 } ) ?;
458- crate_data. serialize_field ( "doc" , & self . doc ) ?;
459497 crate_data. serialize_field ( "t" , & types) ?;
460498 crate_data. serialize_field ( "n" , & names) ?;
461- // Serialize as an array of item indices and full paths
462499 crate_data. serialize_field ( "q" , & full_paths) ?;
463- crate_data. serialize_field ( "d" , & descriptions) ?;
464500 crate_data. serialize_field ( "i" , & parents) ?;
465501 crate_data. serialize_field ( "f" , & functions) ?;
466- crate_data. serialize_field ( "c " , & deprecated ) ?;
502+ crate_data. serialize_field ( "D " , & self . desc_index ) ?;
467503 crate_data. serialize_field ( "p" , & paths) ?;
468504 crate_data. serialize_field ( "b" , & self . associated_item_disambiguators ) ?;
505+ crate_data. serialize_field ( "c" , & bitmap_to_string ( & deprecated) ) ?;
506+ crate_data. serialize_field ( "e" , & bitmap_to_string ( & self . empty_desc ) ) ?;
469507 if has_aliases {
470508 crate_data. serialize_field ( "a" , & self . aliases ) ?;
471509 }
472510 crate_data. end ( )
473511 }
474512 }
475513
476- // Collect the index into a string
477- format ! (
514+ let ( empty_desc, desc) = {
515+ let mut empty_desc = Vec :: new ( ) ;
516+ let mut result = Vec :: new ( ) ;
517+ let mut set = String :: new ( ) ;
518+ let mut len: usize = 0 ;
519+ let mut item_index: u32 = 0 ;
520+ for desc in std:: iter:: once ( & crate_doc) . chain ( crate_items. iter ( ) . map ( |item| & item. desc ) ) {
521+ if desc == "" {
522+ empty_desc. push ( item_index) ;
523+ item_index += 1 ;
524+ continue ;
525+ }
526+ if set. len ( ) >= DESC_INDEX_SHARD_LEN {
527+ result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
528+ len = 0 ;
529+ } else if len != 0 {
530+ set. push ( '\n' ) ;
531+ }
532+ set. push_str ( & desc) ;
533+ len += 1 ;
534+ item_index += 1 ;
535+ }
536+ result. push ( ( len, std:: mem:: replace ( & mut set, String :: new ( ) ) ) ) ;
537+ ( empty_desc, result)
538+ } ;
539+
540+ let desc_index = {
541+ let mut desc_index = String :: with_capacity ( desc. len ( ) * 4 ) ;
542+ for & ( len, _) in desc. iter ( ) {
543+ write_vlqhex_to_string ( len. try_into ( ) . unwrap ( ) , & mut desc_index) ;
544+ }
545+ desc_index
546+ } ;
547+
548+ assert_eq ! (
549+ crate_items. len( ) + 1 ,
550+ desc. iter( ) . map( |( len, _) | * len) . sum:: <usize >( ) + empty_desc. len( )
551+ ) ;
552+
553+ // The index, which is actually used to search, is JSON
554+ // It uses `JSON.parse(..)` to actually load, since JSON
555+ // parses faster than the full JavaScript syntax.
556+ let index = format ! (
478557 r#"["{}",{}]"# ,
479558 krate. name( tcx) ,
480559 serde_json:: to_string( & CrateData {
481- doc: crate_doc,
482560 items: crate_items,
483561 paths: crate_paths,
484562 aliases: & aliases,
485563 associated_item_disambiguators: & associated_item_disambiguators,
564+ desc_index,
565+ empty_desc,
486566 } )
487567 . expect( "failed serde conversion" )
488568 // All these `replace` calls are because we have to go through JS string for JSON content.
489569 . replace( '\\' , r"\\" )
490570 . replace( '\'' , r"\'" )
491571 // We need to escape double quotes for the JSON.
492572 . replace( "\\ \" " , "\\ \\ \" " )
493- )
573+ ) ;
574+ SerializedSearchIndex { index, desc }
494575}
495576
496577pub ( crate ) fn get_function_type_for_search < ' tcx > (
0 commit comments