1111
1212use godot_ffi as sys;
1313
14+ use crate :: builtin:: collections:: extend_buffer:: ExtendBuffer ;
1415use crate :: builtin:: * ;
1516use crate :: meta:: { AsArg , ToGodot } ;
17+ use std:: mem:: size_of;
1618use std:: { fmt, ops, ptr} ;
1719use sys:: types:: * ;
1820use sys:: { ffi_methods, interface_fn, GodotFfi } ;
@@ -380,17 +382,18 @@ macro_rules! impl_packed_array {
380382 array
381383 }
382384
383- /// Drops all elements in `self` and replaces them with data from an array of values.
385+ /// Drops all elements in `self` starting from `dst` and replaces them with data from an array of values.
386+ /// `dst` must be a valid index, even if `len` is zero.
384387 ///
385388 /// # Safety
386389 ///
387- /// * Pointer must be valid slice of data with `len` size.
388- /// * Pointer must not point to `self` data.
389- /// * Length must be equal to `self.len()`.
390+ /// * `src` must be valid slice of data with `len` size.
391+ /// * `src` must not point to `self` data.
392+ /// * `len` must be equal to `self.len() - dst `.
390393 /// * Source data must not be dropped later.
391- unsafe fn move_from_slice( & mut self , src: * const $Element, len: usize ) {
392- let ptr = self . ptr_mut( 0 ) ;
393- debug_assert_eq!( len, self . len( ) , "length precondition violated" ) ;
394+ unsafe fn move_from_slice( & mut self , src: * const $Element, dst : usize , len: usize ) {
395+ let ptr = self . ptr_mut( dst ) ;
396+ debug_assert_eq!( len, self . len( ) - dst , "length precondition violated" ) ;
394397 // Drops all elements in place. Drop impl must not panic.
395398 ptr:: drop_in_place( ptr:: slice_from_raw_parts_mut( ptr, len) ) ;
396399 // Copy is okay since all elements are dropped.
@@ -457,7 +460,7 @@ macro_rules! impl_packed_array {
457460
458461 // SAFETY: The packed array contains exactly N elements and the source array will be forgotten.
459462 unsafe {
460- packed_array. move_from_slice( arr. as_ptr( ) , N ) ;
463+ packed_array. move_from_slice( arr. as_ptr( ) , 0 , N ) ;
461464 }
462465 packed_array
463466 }
@@ -476,13 +479,21 @@ macro_rules! impl_packed_array {
476479 // The vector is forcibly set to empty, so its contents are forgotten.
477480 unsafe {
478481 vec. set_len( 0 ) ;
479- array. move_from_slice( vec. as_ptr( ) , len) ;
482+ array. move_from_slice( vec. as_ptr( ) , 0 , len) ;
480483 }
481484 array
482485 }
483486 }
484487
485488 #[ doc = concat!( "Creates a `" , stringify!( $PackedArray) , "` from an iterator." ) ]
489+ ///
490+ /// # Performance note
491+ /// This uses the lower bound from `Iterator::size_hint()` to allocate memory up front. If the iterator returns
492+ /// more than that number of elements, it falls back to reading elements into a fixed-size buffer before adding
493+ /// them all efficiently as a batch.
494+ ///
495+ /// # Panics
496+ /// - If the iterator's `size_hint()` returns an incorrect lower bound (which is a breach of the `Iterator` protocol).
486497 impl FromIterator <$Element> for $PackedArray {
487498 fn from_iter<I : IntoIterator <Item = $Element>>( iter: I ) -> Self {
488499 let mut array = $PackedArray:: default ( ) ;
@@ -491,16 +502,103 @@ macro_rules! impl_packed_array {
491502 }
492503 }
493504
494- #[ doc = concat!( "Extends a`" , stringify!( $PackedArray) , "` with the contents of an iterator" ) ]
505+ #[ doc = concat!( "Extends a`" , stringify!( $PackedArray) , "` with the contents of an iterator." ) ]
506+ ///
507+ /// # Performance note
508+ /// This uses the lower bound from `Iterator::size_hint()` to allocate memory up front. If the iterator returns
509+ /// more than that number of elements, it falls back to reading elements into a fixed-size buffer before adding
510+ /// them all efficiently as a batch.
511+ ///
512+ /// # Panics
513+ /// - If the iterator's `size_hint()` returns an incorrect lower bound (which is a breach of the `Iterator` protocol).
495514 impl Extend <$Element> for $PackedArray {
496515 fn extend<I : IntoIterator <Item = $Element>>( & mut self , iter: I ) {
497- // Unfortunately the GDExtension API does not offer the equivalent of `Vec::reserve`.
498- // Otherwise we could use it to pre-allocate based on `iter.size_hint()`.
516+ // This function is complicated, but with good reason. The problem is that we don't know the length of
517+ // the `Iterator` ahead of time; all we get is its `size_hint()`.
518+ //
519+ // There are at least two categories of iterators that are common in the wild, for which we'd want good performance:
520+ //
521+ // 1. The length is known: `size_hint()` returns the exact size, e.g. just iterating over a `Vec` or `BTreeSet`.
522+ // 2. The length is unknown: `size_hint()` returns 0, e.g. `Filter`, `FlatMap`, `FromFn`.
523+ //
524+ // A number of implementations are possible, which were benchmarked for 1000 elements of type `i32`:
525+ //
526+ // - Simply call `push()` in a loop:
527+ // 6.1 µs whether or not the length is known.
528+ // - First `collect()` the `Iterator` into a `Vec`, call `self.resize()` to make room, then move out of the `Vec`:
529+ // 0.78 µs if the length is known, 1.62 µs if the length is unknown.
530+ // It also requires additional temporary memory to hold all elements.
531+ // - The strategy implemented below:
532+ // 0.097 µs if the length is known, 0.49 µs if the length is unknown.
499533 //
500- // A faster implementation using `resize()` and direct pointer writes might still be
501- // possible.
502- for item in iter. into_iter( ) {
503- self . push( meta:: ParamType :: owned_to_arg( item) ) ;
534+ // The implementation of `Vec` in the standard library deals with this by repeatedly `reserve()`ing
535+ // whatever `size_hint()` returned, but we don't want to do that because the Godot API call to
536+ // `self.resize()` is relatively slow.
537+
538+ let mut iter = iter. into_iter( ) ;
539+ // Cache the length to avoid repeated Godot API calls.
540+ let mut len = self . len( ) ;
541+
542+ // Fast part.
543+ //
544+ // Use `Iterator::size_hint()` to pre-allocate the minimum number of elements in the iterator, then
545+ // write directly to the resulting slice. We can do this because `size_hint()` is required by the
546+ // `Iterator` contract to return correct bounds. Note that any bugs in it must not result in UB.
547+ let ( size_hint_min, _size_hint_max) = iter. size_hint( ) ;
548+ if size_hint_min > 0 {
549+ let capacity = len + size_hint_min;
550+ self . resize( capacity) ;
551+ for out_ref in & mut self . as_mut_slice( ) [ len..] {
552+ * out_ref = iter. next( ) . expect( "iterator returned fewer than size_hint().0 elements" ) ;
553+ }
554+ len = capacity;
555+ }
556+
557+ // Slower part.
558+ //
559+ // While the iterator is still not finished, gather elements into a fixed-size buffer, then add them all
560+ // at once.
561+ //
562+ // Why not call `self.resize()` with fixed-size increments, like 32 elements at a time? Well, we might
563+ // end up over-allocating, and then need to trim the array length back at the end. Because Godot
564+ // allocates memory in steps of powers of two, this might end up with an array backing storage that is
565+ // twice as large as it needs to be. By first gathering elements into a buffer, we can tell Godot to
566+ // allocate exactly as much as we need, and no more.
567+ //
568+ // Note that we can't get by with simple memcpys, because `PackedStringArray` contains `GString`, which
569+ // does not implement `Copy`.
570+ //
571+ // Buffer size: 2 kB is enough for the performance win, without needlessly blowing up the stack size.
572+ // (A cursory check shows that most/all platforms use a stack size of at least 1 MB.)
573+ const BUFFER_SIZE_BYTES : usize = 2048 ;
574+ const BUFFER_CAPACITY : usize = const_max(
575+ 1 ,
576+ BUFFER_SIZE_BYTES / size_of:: <$Element>( ) ,
577+ ) ;
578+ let mut buf = ExtendBuffer :: <_, BUFFER_CAPACITY >:: default ( ) ;
579+ while let Some ( item) = iter. next( ) {
580+ buf. push( item) ;
581+ while !buf. is_full( ) {
582+ if let Some ( item) = iter. next( ) {
583+ buf. push( item) ;
584+ } else {
585+ break ;
586+ }
587+ }
588+
589+ let buf_slice = buf. drain_as_mut_slice( ) ;
590+ let capacity = len + buf_slice. len( ) ;
591+
592+ // Assumption: resize does not panic. Otherwise we would leak memory here.
593+ self . resize( capacity) ;
594+
595+ // SAFETY: Dropping the first `buf_slice.len()` items is safe, because those are exactly the ones we initialized.
596+ // Writing output is safe because we just allocated `buf_slice.len()` new elements after index `len`.
597+ unsafe {
598+ self . move_from_slice( buf_slice. as_ptr( ) , len, buf_slice. len( ) ) ;
599+ }
600+
601+ len = capacity;
504602 }
505603 }
506604 }
@@ -1071,3 +1169,12 @@ fn populated_or_err(array: PackedByteArray) -> Result<PackedByteArray, ()> {
10711169 Ok ( array)
10721170 }
10731171}
1172+
1173+ /// Helper because `usize::max()` is not const.
1174+ const fn const_max ( a : usize , b : usize ) -> usize {
1175+ if a > b {
1176+ a
1177+ } else {
1178+ b
1179+ }
1180+ }
0 commit comments