@@ -332,6 +332,8 @@ class CompressedExternalIdTableBase {
332
332
this ->currentBlock_ .reserve (blocksize_);
333
333
AD_CONTRACT_CHECK (NumStaticCols == 0 || NumStaticCols == numCols);
334
334
}
335
+ // TODO<joka921> Shouldn't be public.
336
+ std::atomic<bool > isFirstMerge = true ;
335
337
// Add a single row to the input. The type of `row` needs to be something that
336
338
// can be `push_back`ed to a `IdTable`.
337
339
void push (const auto & row) requires requires { currentBlock_.push_back (row); }
@@ -364,6 +366,7 @@ class CompressedExternalIdTableBase {
364
366
}
365
367
writer_.clear ();
366
368
numBlocksPushed_ = 0 ;
369
+ isFirstMerge = true ;
367
370
}
368
371
369
372
protected:
@@ -401,6 +404,9 @@ class CompressedExternalIdTableBase {
401
404
// until the pushing is actually finished, and return `true`. Using this
402
405
// function allows for an efficient usage of this class for very small inputs.
403
406
bool transformAndPushLastBlock () {
407
+ if (!isFirstMerge) {
408
+ return numBlocksPushed_ != 0 ;
409
+ }
404
410
// If we have pushed at least one (complete) block, then the last future
405
411
// from pushing a block is still in flight. If we have never pushed a block,
406
412
// then also the future cannot be valid.
@@ -411,7 +417,7 @@ class CompressedExternalIdTableBase {
411
417
if (numBlocksPushed_ == 0 ) {
412
418
AD_CORRECTNESS_CHECK (this ->numElementsPushed_ ==
413
419
this ->currentBlock_ .size ());
414
- blockTransformation_ (this ->currentBlock_ );
420
+ blockTransformation_ (this ->currentBlock_ );
415
421
return false ;
416
422
}
417
423
pushBlock (std::move (this ->currentBlock_ ));
@@ -511,7 +517,7 @@ class CompressedExternalIdTableSorterTypeErased {
511
517
// false positives in the memory limit mechanism, so setting the following
512
518
// variable to `true` allows to disable the memory limit.
513
519
inline std::atomic<bool >
514
- EXTERNAL_ID_TABLE_SORTER_IGNORE_MEMORY_LIMIT_FOR_TESTING = true ;
520
+ EXTERNAL_ID_TABLE_SORTER_IGNORE_MEMORY_LIMIT_FOR_TESTING = false ;
515
521
516
522
// The implementation of sorting a single block
517
523
template <typename Comparator>
@@ -604,6 +610,7 @@ class CompressedExternalIdTableSorter
604
610
std::max (1 , numBufferedOutputBlocks_ - 2 ))) {
605
611
co_yield block;
606
612
}
613
+ this ->isFirstMerge = false ;
607
614
mergeIsActive_.store (false );
608
615
}
609
616
@@ -623,36 +630,25 @@ class CompressedExternalIdTableSorter
623
630
}
624
631
625
632
private:
626
- // TODO<joka921> Implement `CallFixedSize` optimization also for the merging.
627
633
// Transition from the input phase, where `push()` may be called, to the
628
634
// output phase and return a generator that yields the sorted elements. This
629
635
// function may be called exactly once.
630
636
template <size_t N = NumStaticCols>
631
637
requires (N == NumStaticCols || N == 0 )
632
638
cppcoro::generator<IdTableStatic<N>> sortedBlocks (
633
639
std::optional<size_t > blocksize = std::nullopt) {
634
- auto impl = [blocksize, this ]<size_t I>() {
635
- if constexpr (NumStaticCols == 0 || NumStaticCols == I) {
636
- return sortedBlocksImpl<I>(blocksize);
637
- } else {
638
- AD_FAIL ();
639
- return sortedBlocksImpl<0 >(blocksize);
640
- }
641
- };
642
- auto generator =
643
- ad_utility::callFixedSize (this ->writer_ .numColumns (), impl);
644
- for (auto & block : generator) {
645
- co_yield std::move (block).template toStatic <N>();
646
- }
647
- /*
648
640
if (!this ->transformAndPushLastBlock ()) {
649
641
// There was only one block, return it. If a blocksize was explicitly
650
642
// requested for the output, and the single block is larger than this
651
643
// blocksize, we manually have to split it into chunks.
652
- auto& block = this->currentBlock_;
644
+ // TODO<joka921> doesn't need to be const...
645
+ const auto & block = this ->currentBlock_ ;
653
646
const auto blocksizeOutput = blocksize.value_or (block.numRows ());
654
647
if (block.numRows () <= blocksizeOutput) {
655
- co_yield std::move(this->currentBlock_).template toStatic<N>();
648
+ // TODO<joka921> We don't need the copy if we only want to iterate once, make this configurable.
649
+ auto blockAsStatic = IdTableStatic<N>(this ->currentBlock_ .clone ().template toStatic <N>());
650
+ co_yield blockAsStatic;
651
+ // co_yield std::move(this->currentBlock_).template toStatic<N>();
656
652
} else {
657
653
for (size_t i = 0 ; i < block.numRows (); i += blocksizeOutput) {
658
654
size_t upper = std::min (i + blocksizeOutput, block.numRows ());
@@ -713,84 +709,15 @@ class CompressedExternalIdTableSorter
713
709
numPopped += result.numRows ();
714
710
co_yield std::move (result).template toStatic <N>();
715
711
AD_CORRECTNESS_CHECK (numPopped == this ->numElementsPushed_ );
716
- */
717
- }
718
-
719
- // TODO<joka921> Implement `CallFixedSize` optimization also for the merging.
720
- // Transition from the input phase, where `push()` may be called, to the
721
- // output phase and return a generator that yields the sorted elements. This
722
- // function may be called exactly once.
723
- template <size_t N>
724
- cppcoro::generator<IdTableStatic<NumStaticCols>> sortedBlocksImpl (
725
- std::optional<size_t > blocksize = std::nullopt) {
726
- if (!this ->transformAndPushLastBlock ()) {
727
- // There was only one block, return it.
728
- co_yield std::move (this ->currentBlock_ )
729
- .template toStatic <NumStaticCols>();
730
- co_return ;
731
- }
732
- auto rowGenerators = this ->writer_ .template getAllRowGenerators <N>();
733
-
734
- const size_t blockSizeOutput =
735
- blocksize.value_or (computeBlockSizeForMergePhase (rowGenerators.size ()));
736
-
737
- using P = std::pair<decltype (rowGenerators[0 ].begin ()),
738
- decltype (rowGenerators[0 ].end ())>;
739
- auto projection = [](const auto & el) -> decltype (auto ) {
740
- return *el.first ;
741
- };
742
- // NOTE: We have to switch the arguments, because the heap operations by
743
- // default order descending...
744
- auto comp = [&, this ](const auto & a, const auto & b) {
745
- return comparator_ (projection (b), projection (a));
746
- };
747
- std::vector<P> pq;
748
-
749
- for (auto & gen : rowGenerators) {
750
- pq.emplace_back (gen.begin (), gen.end ());
751
- }
752
- std::ranges::make_heap (pq, comp);
753
- IdTableStatic<N> result (this ->writer_ .numColumns (),
754
- this ->writer_ .allocator ());
755
- result.reserve (blockSizeOutput);
756
- size_t numPopped = 0 ;
757
- while (!pq.empty ()) {
758
- std::ranges::pop_heap (pq, comp);
759
- auto & min = pq.back ();
760
- result.push_back (*min.first );
761
- ++(min.first );
762
- if (min.first == min.second ) {
763
- pq.pop_back ();
764
- } else {
765
- std::ranges::push_heap (pq, comp);
766
- }
767
- if (result.size () >= blockSizeOutput) {
768
- numPopped += result.numRows ();
769
- co_yield std::move (result).template toStatic <NumStaticCols>();
770
- // The `result` will be moved away, so we have to reset it again.
771
- result = IdTableStatic<N>(this ->writer_ .numColumns (),
772
- this ->writer_ .allocator ());
773
- result.reserve (blockSizeOutput);
774
- }
775
- }
776
- numPopped += result.numRows ();
777
- co_yield std::move (result).template toStatic <NumStaticCols>();
778
- AD_CORRECTNESS_CHECK (numPopped == this ->numElementsPushed_ );
779
712
}
780
713
781
714
// _____________________________________________________________
782
715
void sortBlockInPlace (IdTableStatic<NumStaticCols>& block) const {
783
- auto doSort = [&]<size_t I>() {
784
- auto staticBlock = std::move (block).template toStatic <I>();
785
716
#ifdef _PARALLEL_SORT
786
- ad_utility::parallel_sort (staticBlock.begin (), staticBlock.end (),
787
- comparator_);
717
+ ad_utility::parallel_sort (block.begin (), block.end (), comparator_);
788
718
#else
789
- std::ranges::sort (staticBlock , comparator_);
719
+ std::ranges::sort (block , comparator_);
790
720
#endif
791
- block = std::move (staticBlock).template toStatic <NumStaticCols>();
792
- };
793
- ad_utility::callFixedSize (block.numColumns (), doSort);
794
721
}
795
722
796
723
// A function with this name is needed by the mixin base class.
@@ -837,4 +764,4 @@ class CompressedExternalIdTableSorter
837
764
};
838
765
} // namespace ad_utility
839
766
840
- #endif // QLEVER_COMPRESSEDEXTERNALIDTABLE_H
767
+ #endif // QLEVER_COMPRESSEDEXTERNALIDTABLE_H
0 commit comments