diff --git a/lib/backend/src/BackEnd.cpp b/lib/backend/src/BackEnd.cpp index d9a60300..a6672b65 100644 --- a/lib/backend/src/BackEnd.cpp +++ b/lib/backend/src/BackEnd.cpp @@ -124,9 +124,48 @@ class AddWindowsDLLExportPass: public PassInfoMixin }; }; +struct mlir::rlc::TargetInfoImpl +{ + public: + TargetInfoImpl(std::string triple, bool shared, bool optimize) + : triple(triple), + optimize( + optimize ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::Default), + reloc(shared ? llvm::Reloc::PIC_ : llvm::Reloc::Static) + { + std::string Error; + target = llvm::TargetRegistry::lookupTarget("", this->triple, Error); + assert(target); + options = llvm::codegen::InitTargetOptionsFromCodeGenFlags(this->triple); + + auto *Ptr = target->createTargetMachine( + this->triple.getTriple(), + "", + "", + options, + reloc, + llvm::CodeModel::Large, + this->optimize); + targetMachine = unique_ptr(Ptr); + + datalayout = + std::make_unique(targetMachine->createDataLayout()); + } + + llvm::Triple triple; + llvm::CodeModel::Model model; + llvm::CodeGenOptLevel optimize; + llvm::Reloc::Model reloc; + const llvm::Target *target; + llvm::TargetOptions options; + std::unique_ptr targetMachine; + std::unique_ptr datalayout; +}; + static const bool printTimings = false; static void runOptimizer( + const mlir::rlc::TargetInfoImpl &pimpl, llvm::Module &M, bool optimize, bool emitSanitizerInstrumentation, @@ -144,17 +183,22 @@ static void runOptimizer( if (printTimings) TimePasses->registerCallbacks(PIC); + // Create the analysis managers. LoopAnalysisManager LAM; FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModuleAnalysisManager MAM; + PipelineTuningOptions PTO; + // If !optimize this will be discarded + PTO.SLPVectorization = true; + // Create the new pass manager builder. // Take a look at the PassBuilder constructor parameters for more // customization, e.g. specifying a TargetMachine or various debugging // options. - PassBuilder PB(nullptr, llvm::PipelineTuningOptions(), std::nullopt, &PIC); + PassBuilder PB(pimpl.targetMachine.get(), PTO, std::nullopt, &PIC); // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); @@ -165,73 +209,35 @@ static void runOptimizer( // Create the pass manager. // This one corresponds to a typical -O2 optimization pipeline. + ModulePassManager MPM; + if (optimize) { - ModulePassManager passManager; FunctionPassManager functionPassManager; + MPM = PB.buildPerModuleDefaultPipeline(OptimizationLevel::O2); if (targetIsWindows) functionPassManager.addPass(AddWindowsDLLExportPass()); functionPassManager.addPass(llvm::PromotePass()); - passManager.addPass( + MPM.addPass( createModuleToFunctionPassAdaptor(std::move(functionPassManager))); - if (emitSanitizerInstrumentation and not targetIsWindows) - addFuzzerInstrumentationPass(passManager); - passManager.run(M, MAM); - - PB.buildPerModuleDefaultPipeline(OptimizationLevel::O2).run(M, MAM); } else { - ModulePassManager MPM = PB.buildO0DefaultPipeline( + MPM = PB.buildO0DefaultPipeline( OptimizationLevel::O0, ThinOrFullLTOPhase::None); if (targetIsWindows) MPM.addPass(createModuleToFunctionPassAdaptor(AddWindowsDLLExportPass())); - if (emitSanitizerInstrumentation and not targetIsWindows) - addFuzzerInstrumentationPass(MPM); - MPM.run(M, MAM); } + if (emitSanitizerInstrumentation and not targetIsWindows) + addFuzzerInstrumentationPass(MPM); + + MPM.run(M, MAM); + if (printTimings) TimePasses->print(); } -struct mlir::rlc::TargetInfoImpl -{ - public: - TargetInfoImpl(std::string triple, bool shared, bool optimize) - : triple(triple), - optimize( - optimize ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::Default), - reloc(shared ? llvm::Reloc::PIC_ : llvm::Reloc::Static) - { - std::string Error; - target = llvm::TargetRegistry::lookupTarget("", this->triple, Error); - assert(target); - options = llvm::codegen::InitTargetOptionsFromCodeGenFlags(this->triple); - - auto *Ptr = target->createTargetMachine( - this->triple.getTriple(), - "", - "", - options, - reloc, - llvm::CodeModel::Large, - this->optimize); - targetMachine = unique_ptr(Ptr); - - datalayout = - std::make_unique(targetMachine->createDataLayout()); - } - - llvm::Triple triple; - llvm::CodeModel::Model model; - llvm::CodeGenOptLevel optimize; - llvm::Reloc::Model reloc; - const llvm::Target *target; - llvm::TargetOptions options; - std::unique_ptr targetMachine; - std::unique_ptr datalayout; -}; mlir::rlc::TargetInfo::TargetInfo( std::string triple, bool shared, bool optimize) @@ -556,7 +562,7 @@ namespace mlir::rlc assert(Module); Module->setTargetTriple(targetInfo->triple()); - runOptimizer( + runOptimizer(*targetInfo->pimpl, *Module, targetInfo->optimize(), emitSanitizer, diff --git a/lib/utils/benchmark/src/DictIntBenchBenchmark.cpp b/lib/utils/benchmark/src/DictIntBenchBenchmark.cpp index 4d01ce6c..9a24cf3e 100644 --- a/lib/utils/benchmark/src/DictIntBenchBenchmark.cpp +++ b/lib/utils/benchmark/src/DictIntBenchBenchmark.cpp @@ -30,7 +30,7 @@ static std::random_device rd; static std::mt19937 gen(rd()); constexpr std::size_t minRange = 1 << 2; -constexpr std::size_t maxRange = 1 << 10; +constexpr std::size_t maxRange = 1 << 20; // Helper function to generate random unique integers static std::vector GenerateRandomIntegers(int n) diff --git a/stdlib/collections/SoA_dictionary.rl b/stdlib/collections/SoA_dictionary.rl new file mode 100644 index 00000000..5c37696a --- /dev/null +++ b/stdlib/collections/SoA_dictionary.rl @@ -0,0 +1,426 @@ +## Copyright 2024 Samuele Pasini +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. + +import serialization.to_hash +import serialization.print +import serialization.key_equal +import collections.vector + +const _max_load_factor = 0.75 +const _init_capacity = 4 +const _stride = 17 + +cls Dict_SoA: + OwningPtr _keys + OwningPtr _values + OwningPtr _hashes + Int _size + Int _capacity + + fun init(): + self._capacity = _init_capacity + self._size = 0 + self._keys = __builtin_malloc_do_not_use(self._capacity) + self._values = __builtin_malloc_do_not_use(self._capacity) + self._hashes = __builtin_malloc_do_not_use(self._capacity) + let counter = 0 + while counter < _init_capacity: + self._hashes[counter] = -1 + counter = counter + 1 + + fun insert(KeyType key, ValueType value) -> Bool: + # if called after drop() + if self._capacity == 0: + self.init() + else: + let load_factor : Float + load_factor = float(self._size + 1) / float(self._capacity) + if load_factor > _max_load_factor: + self._grow() + + self._insert(key, value) + return true + + fun _insert(KeyType key, ValueType value): + let hash = compute_hash_of(key) + let index = hash % self._capacity + let distance = 0 + let probe_count = 0 # Add safety counter + + # Create local copies of key and value to avoid modifying the input parameters + let current_key = key + let current_value = value + + while probe_count < self._capacity: + if self._hashes[index] == -1: + __builtin_construct_do_not_use(self._keys[index]) + __builtin_construct_do_not_use(self._values[index]) + self._hashes[index] = hash + self._keys[index] = current_key + self._values[index] = current_value + self._size = self._size + 1 + return + else if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + self._values[index] = current_value # Update the actual entry in entries + return + let existing_entry_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + if existing_entry_distance < distance: + let tmp_hash = hash + let tmp_key = current_key + let tmp_val = current_value + + current_value = self._values[index] + current_key = self._keys[index] + hash = self._hashes[index] + + self._values[index] = tmp_val + self._keys[index] = tmp_key + self._hashes[index] = tmp_hash + + distance = existing_entry_distance + index = index + 1 + if index == self._capacity: + index = 0 + distance = distance + 1 + probe_count = probe_count + 1 + assert(false, "Maximum probe count exceeded - likely an implementation bug") + return + + fun get(KeyType key) -> ValueType: + # Quick return for empty dictionary + if self._size == 0: + assert(false, "key not found in empty dictionary") + + let hash = compute_hash_of(key) + let index = hash % self._capacity + let probe_count = 0 # Add safety counter + + # STANDARD IMPL + # while probe_count < self._capacity: + # if self._hashes[index] == -1: + # assert(false, "key not found") + # if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + # return self._values[index] + # let existing_entry_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + # if existing_entry_distance < probe_count: + # assert(false, "key not found") + # index = index + 1 + # probe_count = probe_count + 1 + + # if index >= self._capacity: + # index = 0 + + if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + return self._values[index] + + while probe_count < self._capacity: + if self._hashes[index] == -1: + assert(false, "key not found") + + let diff = self._capacity - index - _stride + # TODO: put a size threshold here + if diff >= 0: + let counter = 0 + let sum = false + while counter < _stride : + sum = sum + (self._hashes[index + counter] == hash) * (compute_equal_of(self._keys[index + counter], key)) + counter = counter + 1 + if sum : + counter = 0 + while counter < _stride : + if self._hashes[index + counter] == hash and compute_equal_of(self._keys[index + counter], key): + return self._values[index] + counter = counter + 1 + + let last_distance = (index + _stride + self._capacity - (self._hashes[index + _stride] % self._capacity)) % self._capacity + probe_count = probe_count + _stride + if last_distance < probe_count - 1: + assert(false, "key not found") + index = index + _stride + else: + while index < self._capacity: + if self._hashes[index] == -1: + assert(false, "key not found") + if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + return self._values[index] + let existing_entry_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + if existing_entry_distance < probe_count: + assert(false, "key not found") + index = index + 1 + probe_count = probe_count + 1 + index = 0 + + assert(false, "GET: Maximum probe count exceeded - likely an implementation bug") + return self._values[0] + + fun contains(KeyType key) -> Bool: + # Quick return for empty dictionary + if self._size == 0: + return false + + let hash = compute_hash_of(key) + let index = hash % self._capacity + let start_index = index + let probe_count = 0 # Add safety counter + + while probe_count < self._capacity: + if self._hashes[index] == -1: + return false + let diff = self._capacity - index - _stride + if diff >= 0: + if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + return true + let counter = 0 + let sum = false + while counter < _stride : + sum = sum + (self._hashes[index + counter] == hash) * (compute_equal_of(self._keys[index + counter], key)) + counter = counter + 1 + if sum : + return true + index = index + _stride + probe_count = probe_count + _stride + let last_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + if last_distance < probe_count - 1: + return false + else: + while index < self._capacity: + if self._hashes[index] == -1: + return false + if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + return true + let existing_entry_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + if existing_entry_distance < probe_count: + return false + index = index + 1 + probe_count = probe_count + 1 + index = 0 + + assert(false, "CONTAINS: Maximum probe count exceeded - likely an implementation bug") + return false + + fun _shift_back(Int index): + self._size = self._size - 1 + + # Perform backward-shift operation + let next_index = (index + 1) % self._capacity + let current_index = index + + # Shift elements until we find an empty slot or an element with probe distance 0 + while true: + let next_hash = self._hashes[next_index] + let next_key = self._keys[next_index] + let next_val = self._values[next_index] + + if next_hash == -1: + self._hashes[current_index] = -1 + return + + # Calculate probe distance of the next element + let next_probe_distance = (next_index + self._capacity - (next_hash % self._capacity)) % self._capacity + + # If probe distance is 0, it's already at its ideal position + if next_probe_distance == 0: + self._hashes[current_index] = -1 + return + + # Move the element back + self._keys[current_index] = next_key + self._values[current_index] = next_val + self._hashes[current_index] = next_hash + + # Move to next positions + current_index = next_index + next_index = (next_index + 1) % self._capacity + return + + fun remove(KeyType key) -> Bool: + if self._size == 0: + return false + + let hash = compute_hash_of(key) + let index = hash % self._capacity + let probe_count = 0 # Add safety counter + + # STANDARD IMPL + # while probe_count < self._capacity: + # if self._hashes[index] == -1: + # return false + # if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + # self._shift_back(index) + # return true + + # let existing_entry_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + # if existing_entry_distance < probe_count: + # return false + # index = index + 1 + # probe_count = probe_count + 1 + + # if index >= self._capacity: + # index = 0 + + if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + self._shift_back(index) + return true + + while probe_count < self._capacity: + if self._hashes[index] == -1: + return false + + let diff = self._capacity - index - _stride + # TODO: put a size threshold here + if diff >= 0: + let counter = 0 + let sum = false + while counter < _stride : + sum = sum + (self._hashes[index + counter] == hash) * (compute_equal_of(self._keys[index + counter], key)) + counter = counter + 1 + if sum : + counter = 0 + while counter < _stride : + if self._hashes[index + counter] == hash and compute_equal_of(self._keys[index + counter], key): + self._shift_back(index + counter) + return true + counter = counter + 1 + assert(false, "Error: Reached the end of getting index loop of a found element") + + index = index + _stride + probe_count = probe_count + _stride + let last_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + if last_distance < probe_count - 1: + return false + else: + while index < self._capacity: + if self._hashes[index] == -1: + return false + if self._hashes[index] == hash and compute_equal_of(self._keys[index], key): + self._shift_back(index) + return true + + let existing_entry_distance = (index + self._capacity - (self._hashes[index] % self._capacity)) % self._capacity + if existing_entry_distance < probe_count: + return false + index = index + 1 + probe_count = probe_count + 1 + index = 0 + + assert(false, "REMOVE: Maximum probe count exceeded - likely an implementation bug") + return false + + + fun keys() -> Vector: + let to_return : Vector + to_return.resize(self._size) + let counter = 0 + let index = 0 + while counter < self._size: + if self._hashes[index] != -1: + to_return.set(counter, self._keys[index]) + counter = counter + 1 + index = index + 1 + return to_return + + fun values() -> Vector: + let to_return : Vector + to_return.resize(self._size) + let counter = 0 + let index = 0 + while counter < self._size: + if self._hashes[index] != -1: + to_return.set(counter, self._values[index]) + counter = counter + 1 + index = index + 1 + return to_return + + # returns true if the + # size of the dictionary is equal + # to zero + fun empty() -> Bool: + return self._size == 0 + + fun size() -> Int: + return self._size + + fun _clear(): + let counter = 0 + while counter < self._capacity: + __builtin_destroy_do_not_use(self._keys[counter]) + __builtin_destroy_do_not_use(self._values[counter]) + counter = counter + 1 + if self._capacity != 0: + __builtin_free_do_not_use(self._values) + __builtin_free_do_not_use(self._keys) + __builtin_free_do_not_use(self._hashes) + + # erases all the elements + # of the dictionary + fun clear(): + self._clear() + self.init() + + fun _grow(): + let old_capacity = self._capacity + let old_hashes = self._hashes + let old_values = self._values + let old_keys = self._keys + + # Create new, larger array + self._capacity = (self._capacity << 1) #+ 1 + self._keys = __builtin_malloc_do_not_use(self._capacity) + self._values = __builtin_malloc_do_not_use(self._capacity) + self._hashes = __builtin_malloc_do_not_use(self._capacity) + self._size = 0 + + # Initialize new entries + let counter = 0 + while counter < self._capacity: + self._hashes[counter] = -1 + counter = counter + 1 + + # Copy old entries to new array, but only scan up to old_capacity + counter = 0 + while counter < old_capacity: + if old_hashes[counter] != -1: + # Insert directly without triggering another growth + self._insert(old_keys[counter], old_values[counter]) + counter = counter + 1 + + # Clean up old entries + counter = 0 + while counter < old_capacity: + __builtin_destroy_do_not_use(old_keys[counter]) + __builtin_destroy_do_not_use(old_values[counter]) + counter = counter + 1 + + __builtin_free_do_not_use(old_keys) + __builtin_free_do_not_use(old_values) + __builtin_free_do_not_use(old_hashes) + return + + fun drop(): + self._clear() + self._size = 0 + self._capacity = 0 + + fun print_dict(): + let counter = 0 + print("Dictionary") + print("Size = "s + to_string(self._size) + ", Capacity = "s + to_string(self._capacity)) + print("Index | Hash | Value | Key") + while counter < self._capacity: + if self._hashes[counter] != -1: + print(to_string(counter) + " | "s + + to_string(self._hashes[counter]) + " | "s + + to_string(self._values[counter]) + " | "s + + to_string(self._keys[counter])) + counter = counter + 1 \ No newline at end of file diff --git a/stdlib/collections/dictionary.rl b/stdlib/collections/dictionary.rl index e1c834df..73e91e24 100644 --- a/stdlib/collections/dictionary.rl +++ b/stdlib/collections/dictionary.rl @@ -243,9 +243,7 @@ cls Dict: fun empty() -> Bool: return self._size == 0 - # returns true if the - # size of the dictionary is equal - # to zero + # returns the number of occupied entries fun size() -> Int: return self._size