11
11
12
12
use godot_ffi as sys;
13
13
14
+ use crate :: builtin:: collections:: extend_buffer:: ExtendBuffer ;
14
15
use crate :: builtin:: * ;
15
16
use crate :: meta:: { AsArg , ToGodot } ;
17
+ use std:: mem:: size_of;
16
18
use std:: { fmt, ops, ptr} ;
17
19
use sys:: types:: * ;
18
20
use sys:: { ffi_methods, interface_fn, GodotFfi } ;
@@ -380,17 +382,18 @@ macro_rules! impl_packed_array {
380
382
array
381
383
}
382
384
383
- /// Drops all elements in `self` and replaces them with data from an array of values.
385
+ /// Drops all elements in `self` starting from `dst` and replaces them with data from an array of values.
386
+ /// `dst` must be a valid index, even if `len` is zero.
384
387
///
385
388
/// # Safety
386
389
///
387
- /// * Pointer must be valid slice of data with `len` size.
388
- /// * Pointer must not point to `self` data.
389
- /// * Length must be equal to `self.len()`.
390
+ /// * `src` must be valid slice of data with `len` size.
391
+ /// * `src` must not point to `self` data.
392
+ /// * `len` must be equal to `self.len() - dst `.
390
393
/// * Source data must not be dropped later.
391
- unsafe fn move_from_slice( & mut self , src: * const $Element, len: usize ) {
392
- let ptr = self . ptr_mut( 0 ) ;
393
- debug_assert_eq!( len, self . len( ) , "length precondition violated" ) ;
394
+ unsafe fn move_from_slice( & mut self , src: * const $Element, dst : usize , len: usize ) {
395
+ let ptr = self . ptr_mut( dst ) ;
396
+ debug_assert_eq!( len, self . len( ) - dst , "length precondition violated" ) ;
394
397
// Drops all elements in place. Drop impl must not panic.
395
398
ptr:: drop_in_place( ptr:: slice_from_raw_parts_mut( ptr, len) ) ;
396
399
// Copy is okay since all elements are dropped.
@@ -457,7 +460,7 @@ macro_rules! impl_packed_array {
457
460
458
461
// SAFETY: The packed array contains exactly N elements and the source array will be forgotten.
459
462
unsafe {
460
- packed_array. move_from_slice( arr. as_ptr( ) , N ) ;
463
+ packed_array. move_from_slice( arr. as_ptr( ) , 0 , N ) ;
461
464
}
462
465
packed_array
463
466
}
@@ -476,13 +479,21 @@ macro_rules! impl_packed_array {
476
479
// The vector is forcibly set to empty, so its contents are forgotten.
477
480
unsafe {
478
481
vec. set_len( 0 ) ;
479
- array. move_from_slice( vec. as_ptr( ) , len) ;
482
+ array. move_from_slice( vec. as_ptr( ) , 0 , len) ;
480
483
}
481
484
array
482
485
}
483
486
}
484
487
485
488
#[ doc = concat!( "Creates a `" , stringify!( $PackedArray) , "` from an iterator." ) ]
489
+ ///
490
+ /// # Performance note
491
+ /// This uses the lower bound from `Iterator::size_hint()` to allocate memory up front. If the iterator returns
492
+ /// more than that number of elements, it falls back to reading elements into a fixed-size buffer before adding
493
+ /// them all efficiently as a batch.
494
+ ///
495
+ /// # Panics
496
+ /// - If the iterator's `size_hint()` returns an incorrect lower bound (which is a breach of the `Iterator` protocol).
486
497
impl FromIterator <$Element> for $PackedArray {
487
498
fn from_iter<I : IntoIterator <Item = $Element>>( iter: I ) -> Self {
488
499
let mut array = $PackedArray:: default ( ) ;
@@ -491,16 +502,103 @@ macro_rules! impl_packed_array {
491
502
}
492
503
}
493
504
494
- #[ doc = concat!( "Extends a`" , stringify!( $PackedArray) , "` with the contents of an iterator" ) ]
505
+ #[ doc = concat!( "Extends a`" , stringify!( $PackedArray) , "` with the contents of an iterator." ) ]
506
+ ///
507
+ /// # Performance note
508
+ /// This uses the lower bound from `Iterator::size_hint()` to allocate memory up front. If the iterator returns
509
+ /// more than that number of elements, it falls back to reading elements into a fixed-size buffer before adding
510
+ /// them all efficiently as a batch.
511
+ ///
512
+ /// # Panics
513
+ /// - If the iterator's `size_hint()` returns an incorrect lower bound (which is a breach of the `Iterator` protocol).
495
514
impl Extend <$Element> for $PackedArray {
496
515
fn extend<I : IntoIterator <Item = $Element>>( & mut self , iter: I ) {
497
- // Unfortunately the GDExtension API does not offer the equivalent of `Vec::reserve`.
498
- // Otherwise we could use it to pre-allocate based on `iter.size_hint()`.
516
+ // This function is complicated, but with good reason. The problem is that we don't know the length of
517
+ // the `Iterator` ahead of time; all we get is its `size_hint()`.
518
+ //
519
+ // There are at least two categories of iterators that are common in the wild, for which we'd want good performance:
520
+ //
521
+ // 1. The length is known: `size_hint()` returns the exact size, e.g. just iterating over a `Vec` or `BTreeSet`.
522
+ // 2. The length is unknown: `size_hint()` returns 0, e.g. `Filter`, `FlatMap`, `FromFn`.
523
+ //
524
+ // A number of implementations are possible, which were benchmarked for 1000 elements of type `i32`:
525
+ //
526
+ // - Simply call `push()` in a loop:
527
+ // 6.1 µs whether or not the length is known.
528
+ // - First `collect()` the `Iterator` into a `Vec`, call `self.resize()` to make room, then move out of the `Vec`:
529
+ // 0.78 µs if the length is known, 1.62 µs if the length is unknown.
530
+ // It also requires additional temporary memory to hold all elements.
531
+ // - The strategy implemented below:
532
+ // 0.097 µs if the length is known, 0.49 µs if the length is unknown.
499
533
//
500
- // A faster implementation using `resize()` and direct pointer writes might still be
501
- // possible.
502
- for item in iter. into_iter( ) {
503
- self . push( meta:: ParamType :: owned_to_arg( item) ) ;
534
+ // The implementation of `Vec` in the standard library deals with this by repeatedly `reserve()`ing
535
+ // whatever `size_hint()` returned, but we don't want to do that because the Godot API call to
536
+ // `self.resize()` is relatively slow.
537
+
538
+ let mut iter = iter. into_iter( ) ;
539
+ // Cache the length to avoid repeated Godot API calls.
540
+ let mut len = self . len( ) ;
541
+
542
+ // Fast part.
543
+ //
544
+ // Use `Iterator::size_hint()` to pre-allocate the minimum number of elements in the iterator, then
545
+ // write directly to the resulting slice. We can do this because `size_hint()` is required by the
546
+ // `Iterator` contract to return correct bounds. Note that any bugs in it must not result in UB.
547
+ let ( size_hint_min, _size_hint_max) = iter. size_hint( ) ;
548
+ if size_hint_min > 0 {
549
+ let capacity = len + size_hint_min;
550
+ self . resize( capacity) ;
551
+ for out_ref in & mut self . as_mut_slice( ) [ len..] {
552
+ * out_ref = iter. next( ) . expect( "iterator returned fewer than size_hint().0 elements" ) ;
553
+ }
554
+ len = capacity;
555
+ }
556
+
557
+ // Slower part.
558
+ //
559
+ // While the iterator is still not finished, gather elements into a fixed-size buffer, then add them all
560
+ // at once.
561
+ //
562
+ // Why not call `self.resize()` with fixed-size increments, like 32 elements at a time? Well, we might
563
+ // end up over-allocating, and then need to trim the array length back at the end. Because Godot
564
+ // allocates memory in steps of powers of two, this might end up with an array backing storage that is
565
+ // twice as large as it needs to be. By first gathering elements into a buffer, we can tell Godot to
566
+ // allocate exactly as much as we need, and no more.
567
+ //
568
+ // Note that we can't get by with simple memcpys, because `PackedStringArray` contains `GString`, which
569
+ // does not implement `Copy`.
570
+ //
571
+ // Buffer size: 2 kB is enough for the performance win, without needlessly blowing up the stack size.
572
+ // (A cursory check shows that most/all platforms use a stack size of at least 1 MB.)
573
+ const BUFFER_SIZE_BYTES : usize = 2048 ;
574
+ const BUFFER_CAPACITY : usize = const_max(
575
+ 1 ,
576
+ BUFFER_SIZE_BYTES / size_of:: <$Element>( ) ,
577
+ ) ;
578
+ let mut buf = ExtendBuffer :: <_, BUFFER_CAPACITY >:: default ( ) ;
579
+ while let Some ( item) = iter. next( ) {
580
+ buf. push( item) ;
581
+ while !buf. is_full( ) {
582
+ if let Some ( item) = iter. next( ) {
583
+ buf. push( item) ;
584
+ } else {
585
+ break ;
586
+ }
587
+ }
588
+
589
+ let buf_slice = buf. drain_as_mut_slice( ) ;
590
+ let capacity = len + buf_slice. len( ) ;
591
+
592
+ // Assumption: resize does not panic. Otherwise we would leak memory here.
593
+ self . resize( capacity) ;
594
+
595
+ // SAFETY: Dropping the first `buf_slice.len()` items is safe, because those are exactly the ones we initialized.
596
+ // Writing output is safe because we just allocated `buf_slice.len()` new elements after index `len`.
597
+ unsafe {
598
+ self . move_from_slice( buf_slice. as_ptr( ) , len, buf_slice. len( ) ) ;
599
+ }
600
+
601
+ len = capacity;
504
602
}
505
603
}
506
604
}
@@ -1071,3 +1169,12 @@ fn populated_or_err(array: PackedByteArray) -> Result<PackedByteArray, ()> {
1071
1169
Ok ( array)
1072
1170
}
1073
1171
}
1172
+
1173
+ /// Helper because `usize::max()` is not const.
1174
+ const fn const_max ( a : usize , b : usize ) -> usize {
1175
+ if a > b {
1176
+ a
1177
+ } else {
1178
+ b
1179
+ }
1180
+ }
0 commit comments