diff --git a/docs/benchmarks.md b/docs/benchmarks.md index ef9fb48..0c2f0c4 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -237,9 +237,36 @@ NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types)) ``` This would generate a total of 36 configurations and instantiate the benchmark 6 -times. Keep the rapid growth of these combinations in mind when choosing the -number of values in an axis. See the section about combinatorial explosion for -more examples and information. +times. + +Keep the rapid growth of combinations due to multiple parameter axes in mind when +choosing the number of values in an axis. See the section about combinatorial +explosion for more examples and information. + +## Zipped Iteration of Value Axes + +At times multiple value axes need to be iterated like they are actually a tuple +or zipped together. To enable this behavior you can request axes to be 'zipped' +together. + +```cpp +// InputTypes: {char, int, unsigned int} +// OutputTypes: {float, double} +// NumInputs: {1000, 10000, 100000, 200000, 200000, 200000} +// Quality: {0.05, 0.1, 0.25, 0.5, 0.75, 1.} + +using input_types = nvbench::type_list; +using output_types = nvbench::type_list; +NVBENCH_BENCH_TYPES(benchmark, NVBENCH_TYPE_AXES(input_types, output_types)) + .set_type_axes_names({"InputType", "OutputType"}) + .add_zip_axes(nvbench::int64_axis{"NumInputs", {1000, 10000, 100000, 200000, 200000, 200000}}, + nvbench::float64_axis{"Quality", {0.05, 0.1, 0.25, 0.5, 0.75, 1.}}); +``` + +Zipping these two axes reduces the total combinations from 216 to 36, reducing the +combinatorial explosion. + +Note: Only value axes may be zipped together. # Throughput Measurements @@ -426,9 +453,9 @@ NVBENCH_BENCH_TYPES(my_benchmark, ``` For large configuration spaces like this, pruning some of the less useful -combinations (e.g. `sizeof(init_type) < sizeof(output)`) using the techniques -described in the "Skip Uninteresting / Invalid Benchmarks" section can help -immensely with keeping compile / run times manageable. +combinations using the techniques described in the [Zipped/Tied Iteration of Value Axes](#zipped-iteration-of-value-axes) +or [Skip Uninteresting / Invalid Benchmarks](#skip-uninteresting--invalid-benchmarks) section can help immensely with +keeping compile / run times manageable. Splitting a single large configuration space into multiple, more focused benchmarks with reduced dimensionality will likely be worth the effort as well. diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 219fc89..67e9132 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -7,6 +7,7 @@ set(example_srcs stream.cu throughput.cu auto_throughput.cu + custom_iteration_spaces.cu ) # Metatarget for all examples: diff --git a/examples/custom_iteration_spaces.cu b/examples/custom_iteration_spaces.cu new file mode 100644 index 0000000..1b0ae9e --- /dev/null +++ b/examples/custom_iteration_spaces.cu @@ -0,0 +1,233 @@ +/* + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// Grab some testing kernels from NVBench: +#include + +// Thrust vectors simplify memory management: +#include + +#include + +//============================================================================== +// Multiple parameters: +// Varies block_size and num_blocks while invoking a naive copy of 256 MiB worth +// of int32_t. +void copy_sweep_grid_shape(nvbench::state &state) +{ + // Get current parameters: + const int block_size = static_cast(state.get_int64("BlockSize")); + const int num_blocks = static_cast(state.get_int64("NumBlocks")); + + // Number of int32s in 256 MiB: + const std::size_t num_values = 256 * 1024 * 1024 / sizeof(nvbench::int32_t); + + // Report throughput stats: + state.add_element_count(num_values); + state.add_global_memory_reads(num_values); + state.add_global_memory_writes(num_values); + + // Allocate device memory: + thrust::device_vector in(num_values, 0); + thrust::device_vector out(num_values, 0); + + state.exec( + [block_size, + num_blocks, + num_values, + in_ptr = thrust::raw_pointer_cast(in.data()), + out_ptr = thrust::raw_pointer_cast(out.data())](nvbench::launch &launch) { + nvbench::copy_kernel<<>>( + in_ptr, + out_ptr, + num_values); + }); +} + +//============================================================================== +// Naive iteration of both the BlockSize and NumBlocks axes. +// Will generate the full cartesian product of the two axes for a total of +// 16 invocations of copy_sweep_grid_shape. +NVBENCH_BENCH(copy_sweep_grid_shape) + .set_name("naive_copy_sweep_grid_shape") + .add_int64_axis("BlockSize", {32, 64, 128, 256}) + .add_int64_axis("NumBlocks", {1024, 512, 256, 128}); + +//============================================================================== +// Zipped iteration of BlockSize and NumBlocks axes. +// Will generate only 4 invocations of copy_sweep_grid_shape +NVBENCH_BENCH(copy_sweep_grid_shape) + .set_name("zipped_copy_sweep_grid_shape") + .add_zip_axes(nvbench::int64_axis{"BlockSize", {32, 64, 128, 256}}, + nvbench::int64_axis{"NumBlocks", {1024, 512, 256, 128}}); + +//============================================================================== +// under_diag: +// Custom iterator that only searches the `X` locations of two axes: +// [- - - - X] +// [- - - X X] +// [- - X X X] +// [- X X X X] +// [X X X X X] +// +struct under_diag final : nvbench::user_axis_space +{ + under_diag(std::vector input_indices) + : nvbench::user_axis_space(std::move(input_indices)) + {} + + mutable std::size_t x_pos = 0; + mutable std::size_t y_pos = 0; + mutable std::size_t x_start = 0; + + nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const + { + // generate our increment function + auto adv_func = [&, info](std::size_t &inc_index, + std::size_t /*len*/) -> bool { + inc_index++; + x_pos++; + if (x_pos == info[0].size) + { + x_pos = ++x_start; + y_pos = x_start; + return true; + } + return false; + }; + + // our update function + auto diag_under = + [&, info](std::size_t, + std::vector::iterator start, + std::vector::iterator end) { + start->index = x_pos; + end->index = y_pos; + }; + + const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2); + return nvbench::detail::axis_space_iterator(info, + iteration_length, + adv_func, + diag_under); + } + + std::size_t do_get_size(const axes_info &info) const + { + return ((info[0].size * (info[1].size + 1)) / 2); + } + + std::size_t do_get_active_count(const axes_info &info) const + { + return ((info[0].size * (info[1].size + 1)) / 2); + } + + std::unique_ptr do_clone() const + { + return std::make_unique(*this); + } +}; + +NVBENCH_BENCH(copy_sweep_grid_shape) + .set_name("user_copy_sweep_grid_shape") + .add_user_iteration_axes( + [](auto... args) -> std::unique_ptr { + return std::make_unique(args...); + }, + nvbench::int64_axis("BlockSize", {64, 128, 256, 512, 1024}), + nvbench::int64_axis("NumBlocks", {1024, 521, 256, 128, 64})); + +//============================================================================== +// gauss: +// Custom iteration space that uses a gauss distribution to +// sample the points near the middle of the index space +// +struct gauss final : nvbench::user_axis_space +{ + + gauss(std::vector input_indices) + : nvbench::user_axis_space(std::move(input_indices)) + {} + + nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const + { + const double mid_point = static_cast((info[0].size / 2)); + + std::random_device rd{}; + std::mt19937 gen{rd()}; + std::normal_distribution<> d{mid_point, 2}; + + const size_t iteration_length = info[0].size; + std::vector gauss_indices(iteration_length); + for (auto &g : gauss_indices) + { + auto v = std::min(static_cast(info[0].size), d(gen)); + v = std::max(0.0, v); + g = static_cast(v); + } + + // our update function + auto gauss_func = + [=](std::size_t index, + std::vector::iterator start, + std::vector::iterator) { + start->index = gauss_indices[index]; + }; + + return nvbench::detail::axis_space_iterator(info, + iteration_length, + gauss_func); + } + + std::size_t do_get_size(const axes_info &info) const { return info[0].size; } + + std::size_t do_get_active_count(const axes_info &info) const + { + return info[0].size; + } + + std::unique_ptr do_clone() const + { + return std::make_unique(*this); + } +}; +//============================================================================== +// Dual parameter sweep: +void dual_float64_axis(nvbench::state &state) +{ + const auto duration_A = state.get_float64("Duration_A"); + const auto duration_B = state.get_float64("Duration_B"); + + state.exec([duration_A, duration_B](nvbench::launch &launch) { + nvbench::sleep_kernel<<<1, 1, 0, launch.get_stream()>>>(duration_A + + duration_B); + }); +} +NVBENCH_BENCH(dual_float64_axis) + .add_user_iteration_axes( + [](auto... args) -> std::unique_ptr { + return std::make_unique(args...); + }, + nvbench::float64_axis("Duration_A", nvbench::range(0., 1e-4, 1e-5))) + .add_user_iteration_axes( + [](auto... args) -> std::unique_ptr { + return std::make_unique(args...); + }, + nvbench::float64_axis("Duration_B", nvbench::range(0., 1e-4, 1e-5))); diff --git a/nvbench/CMakeLists.txt b/nvbench/CMakeLists.txt index f86bd41..3fa320c 100644 --- a/nvbench/CMakeLists.txt +++ b/nvbench/CMakeLists.txt @@ -1,6 +1,7 @@ set(srcs axes_metadata.cxx axis_base.cxx + iteration_space_base.cxx benchmark_base.cxx benchmark_manager.cxx blocking_kernel.cu @@ -10,6 +11,7 @@ set(srcs device_manager.cu float64_axis.cxx int64_axis.cxx + linear_axis_space.cxx markdown_printer.cu named_values.cxx option_parser.cu @@ -20,6 +22,8 @@ set(srcs string_axis.cxx type_axis.cxx type_strings.cxx + user_axis_space.cxx + zip_axis_space.cxx detail/measure_cold.cu detail/measure_hot.cu diff --git a/nvbench/axes_metadata.cuh b/nvbench/axes_metadata.cuh index 353855a..83ae4cd 100644 --- a/nvbench/axes_metadata.cuh +++ b/nvbench/axes_metadata.cuh @@ -20,10 +20,15 @@ #include #include +#include +#include #include #include #include +#include +#include +#include #include #include #include @@ -37,6 +42,8 @@ namespace nvbench struct axes_metadata { using axes_type = std::vector>; + using iteration_space_type = + std::vector>; template explicit axes_metadata(nvbench::type_list); @@ -58,6 +65,38 @@ struct axes_metadata void add_string_axis(std::string name, std::vector data); + void add_axis(const axis_base &axis); + + template + void add_zip_axes(Args &&...args) + { + const std::size_t start = this->m_axes.size(); + const std::size_t count = sizeof...(Args); + // (this->add_axis(std::forward(args)), ...); + (m_axes.push_back(args.clone()), ...); + this->add_zip_space(start, count); + } + + template + void add_user_iteration_axes( + std::function make, + Args &&...args) + { + const std::size_t start = this->m_axes.size(); + const std::size_t count = sizeof...(Args); + (m_axes.push_back(args.clone()), ...); + this->add_user_iteration_space(std::move(make), start, count); + } + + [[nodiscard]] const iteration_space_type &get_type_iteration_space() const + { + return m_type_space; + } + [[nodiscard]] const iteration_space_type &get_value_iteration_space() const + { + return m_value_space; + } + [[nodiscard]] const nvbench::int64_axis & get_int64_axis(std::string_view name) const; [[nodiscard]] nvbench::int64_axis &get_int64_axis(std::string_view name); @@ -93,6 +132,15 @@ struct axes_metadata private: axes_type m_axes; + std::size_t m_type_axe_count = 0; + iteration_space_type m_type_space; + iteration_space_type m_value_space; + + void add_zip_space(std::size_t first_index, std::size_t count); + void add_user_iteration_space( + std::function make, + std::size_t first_index, + std::size_t count); }; template @@ -105,11 +153,15 @@ axes_metadata::axes_metadata(nvbench::type_list) auto names_iter = names.begin(); // contents will be moved from nvbench::tl::foreach( - [&axes = m_axes, &names_iter]([[maybe_unused]] auto wrapped_type) { + [&axes = m_axes, &spaces = m_type_space, &names_iter]( + [[maybe_unused]] auto wrapped_type) { // This is always called before other axes are added, so the length of the // axes vector will be the type axis index: const std::size_t type_axis_index = axes.size(); + spaces.push_back( + std::make_unique(type_axis_index)); + // Note: // The word "type" appears 6 times in the next line. // Every. Single. Token. @@ -119,6 +171,7 @@ axes_metadata::axes_metadata(nvbench::type_list) axis->template set_inputs(); axes.push_back(std::move(axis)); }); + m_type_axe_count = m_axes.size(); } } // namespace nvbench diff --git a/nvbench/axes_metadata.cxx b/nvbench/axes_metadata.cxx index 044bc91..a6dea06 100644 --- a/nvbench/axes_metadata.cxx +++ b/nvbench/axes_metadata.cxx @@ -24,7 +24,11 @@ #include #include +#include #include +#include + +#include namespace nvbench { @@ -36,6 +40,19 @@ axes_metadata::axes_metadata(const axes_metadata &other) { m_axes.push_back(axis->clone()); } + + m_type_axe_count = other.m_type_axe_count; + m_type_space.reserve(other.m_type_space.size()); + for (const auto &iter : other.m_type_space) + { + m_type_space.push_back(iter->clone()); + } + + m_value_space.reserve(other.m_value_space.size()); + for (const auto &iter : other.m_value_space) + { + m_value_space.push_back(iter->clone()); + } } axes_metadata &axes_metadata::operator=(const axes_metadata &other) @@ -46,6 +63,23 @@ axes_metadata &axes_metadata::operator=(const axes_metadata &other) { m_axes.push_back(axis->clone()); } + + m_type_axe_count = other.m_type_axe_count; + + m_type_space.clear(); + m_type_space.reserve(other.m_type_space.size()); + for (const auto &iter : other.m_type_space) + { + m_type_space.push_back(iter->clone()); + } + + m_value_space.clear(); + m_value_space.reserve(other.m_value_space.size()); + for (const auto &iter : other.m_value_space) + { + m_value_space.push_back(iter->clone()); + } + return *this; } @@ -84,26 +118,81 @@ catch (std::exception &e) void axes_metadata::add_float64_axis(std::string name, std::vector data) { - auto axis = std::make_unique(std::move(name)); - axis->set_inputs(std::move(data)); - m_axes.push_back(std::move(axis)); + this->add_axis(nvbench::float64_axis{name, data}); } void axes_metadata::add_int64_axis(std::string name, std::vector data, nvbench::int64_axis_flags flags) { - auto axis = std::make_unique(std::move(name)); - axis->set_inputs(std::move(data), flags); - m_axes.push_back(std::move(axis)); + this->add_axis(nvbench::int64_axis{name, data, flags}); } void axes_metadata::add_string_axis(std::string name, std::vector data) { - auto axis = std::make_unique(std::move(name)); - axis->set_inputs(std::move(data)); - m_axes.push_back(std::move(axis)); + this->add_axis(nvbench::string_axis{name, data}); +} + +void axes_metadata::add_axis(const axis_base &axis) +{ + m_value_space.push_back( + std::make_unique(m_axes.size())); + m_axes.push_back(axis.clone()); +} + +void axes_metadata::add_zip_space(std::size_t first_index, std::size_t count) +{ + NVBENCH_THROW_IF((count < 2), + std::runtime_error, + "At least two axi ( {} provided ) need to be provided " + "when using zip_axes.", + count); + + // compute the numeric indice for each name we have + std::vector input_indices(count); + std::iota(input_indices.begin(), input_indices.end(), first_index); + + const auto expected_size = m_axes[input_indices[0]]->get_size(); + for (auto i : input_indices) + { + NVBENCH_THROW_IF((m_axes[i]->get_type() == nvbench::axis_type::type), + std::runtime_error, + "Currently no support for tieing type axis ( {} ).", + m_axes[i]->get_name()); + + NVBENCH_THROW_IF((m_axes[i]->get_size() < expected_size), + std::runtime_error, + "All axes that are tied together must be atleast as long " + "the first axi provided ( {} ).", + expected_size); + } + + // add the new tied iteration space + auto tied = std::make_unique(std::move(input_indices)); + m_value_space.push_back(std::move(tied)); +} + +void axes_metadata::add_user_iteration_space( + std::function make, + std::size_t first_index, + std::size_t count) +{ + // compute the numeric indice for each name we have + std::vector input_indices(count); + std::iota(input_indices.begin(), input_indices.end(), first_index); + + for (auto i : input_indices) + { + NVBENCH_THROW_IF((m_axes[i]->get_type() == nvbench::axis_type::type), + std::runtime_error, + "Currently no support for using type axis with " + "user_iteration_axes ( {} ).", + m_axes[i]->get_name()); + } + + auto user_func = make(std::move(input_indices)); + m_value_space.push_back(std::move(user_func)); } const int64_axis &axes_metadata::get_int64_axis(std::string_view name) const diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh index 588445d..d35e1fa 100644 --- a/nvbench/benchmark_base.cuh +++ b/nvbench/benchmark_base.cuh @@ -111,6 +111,43 @@ struct benchmark_base return *this; } + /// Construct a zip iteration space from the provided value axes. + /// + /// When axes are zipped together they are iterated like a tuple + /// of values instead of separate parameters. For example two + /// value axes of 5 entries will generate 25 combinations, but + /// when zipped will generate 5 combinations. + /// + /// @param[axes] a set of axis_base to be added to the benchmark + /// and zipped together + /// + template + benchmark_base &add_zip_axes(Axes&&... axes) + { + m_axes.add_zip_axes(std::forward(axes)...); + return *this; + } + /// @} + + /// Construct a user iteration space from the provided value axes. + /// + /// Instead of using the standard iteration over each axes, they + /// are iterated using the custom user iterator that was provided. + /// This allows for fancy iteration such as using every other + /// value, random sampling, etc. + /// + /// @param[args] First argument is a `std::function` + /// which constructs the user iteration space, and the reseet are axis_base to be + /// added to the benchmark and iterated using the user iteration space + /// + template + benchmark_base &add_user_iteration_axes(ConstructorAndAxes&&... args) + { + m_axes.add_user_iteration_axes(std::forward(args)...); + return *this; + } + /// @} + benchmark_base &set_devices(std::vector device_ids); benchmark_base &set_devices(std::vector devices) diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx index 2d08fdb..0a736a9 100644 --- a/nvbench/benchmark_base.cxx +++ b/nvbench/benchmark_base.cxx @@ -20,6 +20,8 @@ #include +#include + namespace nvbench { @@ -62,22 +64,22 @@ benchmark_base &benchmark_base::add_device(int device_id) std::size_t benchmark_base::get_config_count() const { - const std::size_t per_device_count = nvbench::detail::transform_reduce( - m_axes.get_axes().cbegin(), - m_axes.get_axes().cend(), + const auto& axes = m_axes.get_axes(); + const std::size_t value_count = nvbench::detail::transform_reduce( + m_axes.get_value_iteration_space().cbegin(), + m_axes.get_value_iteration_space().cend(), std::size_t{1}, std::multiplies<>{}, - [](const auto &axis_ptr) { - if (const auto *type_axis_ptr = - dynamic_cast(axis_ptr.get()); - type_axis_ptr != nullptr) - { - return type_axis_ptr->get_active_count(); - } - return axis_ptr->get_size(); - }); - - return per_device_count * m_devices.size(); + [&axes](const auto &space) { return space->get_size(axes); }); + + const std::size_t type_count = nvbench::detail::transform_reduce( + m_axes.get_type_iteration_space().cbegin(), + m_axes.get_type_iteration_space().cend(), + std::size_t{1}, + std::multiplies<>{}, + [&axes](const auto &space) { return space->get_active_count(axes); }); + + return (value_count * type_count) * std::max(1UL, m_devices.size()); } } // namespace nvbench diff --git a/nvbench/detail/axes_iterator.cuh b/nvbench/detail/axes_iterator.cuh new file mode 100644 index 0000000..2275daa --- /dev/null +++ b/nvbench/detail/axes_iterator.cuh @@ -0,0 +1,112 @@ +/* + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include +#include + +namespace nvbench +{ +namespace detail +{ + +struct axis_index +{ + axis_index() = default; + + explicit axis_index(const axis_base *axi) + : index(0) + , name(axi->get_name()) + , type(axi->get_type()) + , size(axi->get_size()) + , active_size(axi->get_size()) + { + if (type == nvbench::axis_type::type) + { + active_size = + static_cast(axi)->get_active_count(); + } + } + std::size_t index; + std::string name; + nvbench::axis_type type; + std::size_t size; + std::size_t active_size; +}; + +struct axis_space_iterator +{ + using axes_info = std::vector; + using AdvanceSignature = bool(std::size_t ¤t_index, std::size_t length); + using UpdateSignature = void(std::size_t index, + axes_info::iterator start, + axes_info::iterator end); + + axis_space_iterator( + std::vector info, + std::size_t iter_count, + std::function &&advance, + std::function &&update) + : m_info(info) + , m_iteration_size(iter_count) + , m_advance(std::move(advance)) + , m_update(std::move(update)) + {} + + axis_space_iterator( + std::vector info, + std::size_t iter_count, + std::function &&update) + : m_info(info) + , m_iteration_size(iter_count) + , m_update(std::move(update)) + {} + + [[nodiscard]] bool next() + { + return this->m_advance(m_current_index, m_iteration_size); + } + + void update_indices(std::vector &indices) const + { + indices.insert(indices.end(), m_info.begin(), m_info.end()); + axes_info::iterator end = indices.end(); + axes_info::iterator start = end - m_info.size(); + this->m_update(m_current_index, start, end); + } + + axes_info m_info; + std::size_t m_iteration_size = 1; + std::function m_advance = [](std::size_t ¤t_index, + std::size_t length) { + (current_index + 1 == length) ? current_index = 0 : current_index++; + return (current_index == 0); // we rolled over + }; + std::function m_update = nullptr; + +private: + std::size_t m_current_index = 0; +}; + +} // namespace detail +} // namespace nvbench diff --git a/nvbench/detail/state_generator.cuh b/nvbench/detail/state_generator.cuh index 035f61f..9499676 100644 --- a/nvbench/detail/state_generator.cuh +++ b/nvbench/detail/state_generator.cuh @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -59,7 +60,7 @@ private: // Usage: // ``` // state_iterator sg; -// sg.add_axis(...); +// sg.add_iteration_space(...); // for (sg.init(); sg.iter_valid(); sg.next()) // { // for (const auto& index : sg.get_current_indices()) @@ -73,25 +74,19 @@ private: // ``` struct state_iterator { - struct axis_index - { - std::string axis; - nvbench::axis_type type; - std::size_t index; - std::size_t size; - }; + void add_iteration_space(const nvbench::detail::axis_space_iterator &iter); - void add_axis(const nvbench::axis_base &axis); - void add_axis(std::string axis, nvbench::axis_type type, std::size_t size); [[nodiscard]] std::size_t get_number_of_states() const; void init(); - [[nodiscard]] const std::vector &get_current_indices() const; + [[nodiscard]] std::vector get_current_indices() const; [[nodiscard]] bool iter_valid() const; void next(); - std::vector m_indices; - std::size_t m_current{}; - std::size_t m_total{}; + std::vector m_space; + std::size_t m_axes_count = 0; + std::size_t m_current_space = 0; + std::size_t m_current_iteration = 0; + std::size_t m_max_iteration = 1; }; } // namespace detail diff --git a/nvbench/detail/state_generator.cxx b/nvbench/detail/state_generator.cxx index 8c153bf..d1d4bda 100644 --- a/nvbench/detail/state_generator.cxx +++ b/nvbench/detail/state_generator.cxx @@ -32,66 +32,58 @@ namespace nvbench::detail { - // state_iterator ============================================================== -void state_iterator::add_axis(const nvbench::axis_base &axis) +void state_iterator::add_iteration_space( + const nvbench::detail::axis_space_iterator &iter) { - this->add_axis(axis.get_name(), axis.get_type(), axis.get_size()); -} + m_axes_count += iter.m_info.size(); + m_max_iteration *= iter.m_iteration_size; -void state_iterator::add_axis(std::string axis, - nvbench::axis_type type, - std::size_t size) -{ - m_indices.push_back({std::move(axis), type, std::size_t{0}, size}); + m_space.push_back(std::move(iter)); } [[nodiscard]] std::size_t state_iterator::get_number_of_states() const { - return nvbench::detail::transform_reduce(m_indices.cbegin(), - m_indices.cend(), - std::size_t{1}, - std::multiplies<>{}, - [](const axis_index &size_info) { - return size_info.size; - }); + return this->m_max_iteration; } void state_iterator::init() { - m_current = 0; - m_total = this->get_number_of_states(); - for (axis_index &entry : m_indices) - { - entry.index = 0; - } + m_current_space = 0; + m_current_iteration = 0; } -[[nodiscard]] const std::vector & -state_iterator::get_current_indices() const +[[nodiscard]] std::vector state_iterator::get_current_indices() const { - return m_indices; + std::vector indices; + indices.reserve(m_axes_count); + for (auto &m : m_space) + { + m.update_indices(indices); + } + // verify length + return indices; } [[nodiscard]] bool state_iterator::iter_valid() const { - return m_current < m_total; + return m_current_iteration < m_max_iteration; } void state_iterator::next() { - for (axis_index &axis_info : m_indices) + m_current_iteration++; + + for (auto &&space : this->m_space) { - axis_info.index += 1; - if (axis_info.index >= axis_info.size) + auto rolled_over = space.next(); + if (rolled_over) { - axis_info.index = 0; - continue; // carry the addition to the next entry in m_indices + continue; } - break; // done + break; } - m_current += 1; } // state_generator ============================================================= @@ -103,122 +95,92 @@ state_generator::state_generator(const benchmark_base &bench) void state_generator::build_axis_configs() { const axes_metadata &axes = m_benchmark.get_axes(); - const std::vector> &axes_vec = axes.get_axes(); + const auto &type_space = axes.get_type_iteration_space(); + const auto &value_space = axes.get_value_iteration_space(); - // Construct two state_generators: - // - Only type_axis objects. - // - Only non-type axes. - state_iterator type_si; - state_iterator non_type_si; + state_iterator ti; + state_iterator vi; - // state_iterator initialization: + // Reverse add type axes by index. This way the state_generator's cartesian + // product of the type axes values will be enumerated in the same order as + // nvbench::tl::cartesian_product. This is necessary to ensure + // that the correct states are passed to the corresponding benchmark + // instantiations. { - // stage the type axes in a vector to allow sorting: - std::vector> type_axes; - type_axes.reserve(axes_vec.size()); - - // Filter all axes by into type and non-type: - std::for_each(axes_vec.cbegin(), - axes_vec.cend(), - [&non_type_si, &type_axes](const auto &axis) { - if (axis->get_type() == nvbench::axis_type::type) - { - type_axes.push_back( - std::cref(static_cast(*axis))); - } - else - { - non_type_si.add_axis(*axis); - } + const auto &axes_vec = axes.get_axes(); + std::for_each(type_space.crbegin(), + type_space.crend(), + [&ti, &axes_vec](const auto &space) { + ti.add_iteration_space(space->get_iterator(axes_vec)); + }); + std::for_each(value_space.begin(), + value_space.end(), + [&vi, &axes_vec](const auto &space) { + vi.add_iteration_space(space->get_iterator(axes_vec)); }); - - // Reverse sort type axes by index. This way the state_generator's cartesian - // product of the type axes values will be enumerated in the same order as - // nvbench::tl::cartesian_product. This is necessary to ensure - // that the correct states are passed to the corresponding benchmark - // instantiations. - std::sort(type_axes.begin(), - type_axes.end(), - [](const auto &axis_1, const auto &axis_2) { - return axis_1.get().get_axis_index() > - axis_2.get().get_axis_index(); - }); - - std::for_each(type_axes.cbegin(), - type_axes.cend(), - [&type_si](const auto &axis) { type_si.add_axis(axis); }); } - // type_axis_configs generation: + m_type_axis_configs.clear(); + m_type_axis_configs.reserve(ti.get_number_of_states()); + + m_non_type_axis_configs.clear(); + m_non_type_axis_configs.reserve(vi.get_number_of_states()); + + for (ti.init(); ti.iter_valid(); ti.next()) { - m_type_axis_configs.clear(); - m_type_axis_configs.reserve(type_si.get_number_of_states()); + auto &[config, active_mask] = m_type_axis_configs.emplace_back( + std::make_pair(nvbench::named_values{}, true)); - // Build type_axis_configs - for (type_si.init(); type_si.iter_valid(); type_si.next()) + for (const auto &axis_info : ti.get_current_indices()) { - auto &[config, active_mask] = m_type_axis_configs.emplace_back( - std::make_pair(nvbench::named_values{}, true)); + const auto &axis = axes.get_type_axis(axis_info.name); - // Reverse the indices so they're once again in the same order as - // specified: - auto indices = type_si.get_current_indices(); - std::reverse(indices.begin(), indices.end()); + active_mask &= axis.get_is_active(axis_info.index); - for (const auto &axis_info : indices) - { - const auto &axis = axes.get_type_axis(axis_info.axis); - if (!axis.get_is_active(axis_info.index)) - { - active_mask = false; - } - - config.set_string(axis_info.axis, - axis.get_input_string(axis_info.index)); - } - } // type_si - } // type_axis_config generation - - // non_type_axis_config generation + config.set_string(axis.get_name(), + axis.get_input_string(axis_info.index)); + } + } + + for (vi.init(); vi.iter_valid(); vi.next()) { - m_non_type_axis_configs.clear(); - m_non_type_axis_configs.reserve(type_si.get_number_of_states()); + auto &config = m_non_type_axis_configs.emplace_back(); - for (non_type_si.init(); non_type_si.iter_valid(); non_type_si.next()) + // Add non-type parameters to state: + for (const auto &axis_info : vi.get_current_indices()) { - auto &config = m_non_type_axis_configs.emplace_back(); - - // Add non-type parameters to state: - for (const auto &axis_info : non_type_si.get_current_indices()) + switch (axis_info.type) { - switch (axis_info.type) - { - default: - case axis_type::type: - assert("unreachable." && false); - break; - - case axis_type::int64: - config.set_int64( - axis_info.axis, - axes.get_int64_axis(axis_info.axis).get_value(axis_info.index)); - break; - - case axis_type::float64: - config.set_float64( - axis_info.axis, - axes.get_float64_axis(axis_info.axis).get_value(axis_info.index)); - break; - - case axis_type::string: - config.set_string( - axis_info.axis, - axes.get_string_axis(axis_info.axis).get_value(axis_info.index)); - break; - } // switch (type) - } // for (axis_info : current_indices) - } // for non_type_sg configs - } // non_type_axis_config generation + default: + case axis_type::type: + assert("unreachable." && false); + break; + case axis_type::int64: + config.set_int64( + axis_info.name, + axes.get_int64_axis(axis_info.name).get_value(axis_info.index)); + break; + + case axis_type::float64: + config.set_float64( + axis_info.name, + axes.get_float64_axis(axis_info.name).get_value(axis_info.index)); + break; + + case axis_type::string: + config.set_string( + axis_info.name, + axes.get_string_axis(axis_info.name).get_value(axis_info.index)); + break; + } // switch (type) + } // for (axis_info : current_indices) + } + + if (m_type_axis_configs.empty()) + { + m_type_axis_configs.emplace_back( + std::make_pair(nvbench::named_values{}, true)); + } } void state_generator::build_states() @@ -248,7 +210,6 @@ void state_generator::add_states_for_device( { const auto &[type_config, axis_mask] = m_type_axis_configs[type_config_index]; - if (!axis_mask) { // Don't generate inner vector if the type config is masked out. continue; diff --git a/nvbench/float64_axis.cuh b/nvbench/float64_axis.cuh index 0d60651..b9bcdc8 100644 --- a/nvbench/float64_axis.cuh +++ b/nvbench/float64_axis.cuh @@ -34,6 +34,11 @@ struct float64_axis final : public axis_base , m_values{} {} + explicit float64_axis(std::string name, std::vector inputs) + : axis_base{std::move(name), axis_type::float64} + , m_values{std::move(inputs)} + {} + ~float64_axis() final; void set_inputs(std::vector inputs) diff --git a/nvbench/int64_axis.cuh b/nvbench/int64_axis.cuh index a6cec2e..08d6686 100644 --- a/nvbench/int64_axis.cuh +++ b/nvbench/int64_axis.cuh @@ -51,6 +51,10 @@ struct int64_axis final : public axis_base , m_flags{int64_axis_flags::none} {} + explicit int64_axis(std::string name, + std::vector inputs, + int64_axis_flags flags = int64_axis_flags::none); + ~int64_axis() final; [[nodiscard]] bool is_power_of_two() const diff --git a/nvbench/int64_axis.cxx b/nvbench/int64_axis.cxx index 24ff913..271f93c 100644 --- a/nvbench/int64_axis.cxx +++ b/nvbench/int64_axis.cxx @@ -26,23 +26,24 @@ #include #include -namespace nvbench +namespace { -int64_axis::~int64_axis() = default; - -void int64_axis::set_inputs(std::vector inputs, int64_axis_flags flags) +std::vector +construct_values(nvbench::int64_axis_flags flags, + const std::vector &inputs) { - m_inputs = std::move(inputs); - m_flags = flags; - if (!this->is_power_of_two()) + std::vector values; + const bool is_power_of_two = + static_cast(flags & nvbench::int64_axis_flags::power_of_two); + if (!is_power_of_two) { - m_values = m_inputs; + values = inputs; } else { - m_values.resize(m_inputs.size()); + values.resize(inputs.size()); auto conv = [](int64_t in) -> int64_t { if (in < 0 || in >= 64) @@ -52,11 +53,35 @@ void int64_axis::set_inputs(std::vector inputs, int64_axis_flags flags) "Input={} ValidRange=[0, 63]", in); } - return int64_axis::compute_pow2(in); + return nvbench::int64_axis::compute_pow2(in); }; - std::transform(m_inputs.cbegin(), m_inputs.cend(), m_values.begin(), conv); + std::transform(inputs.cbegin(), inputs.cend(), values.begin(), conv); } + + return values; +} +} // namespace + +namespace nvbench +{ + +int64_axis::int64_axis(std::string name, + std::vector inputs, + int64_axis_flags flags) + : axis_base{std::move(name), axis_type::int64} + , m_inputs{std::move(inputs)} + , m_values{construct_values(flags, m_inputs)} + , m_flags{flags} +{} + +int64_axis::~int64_axis() = default; + +void int64_axis::set_inputs(std::vector inputs, int64_axis_flags flags) +{ + m_inputs = std::move(inputs); + m_flags = flags; + m_values = construct_values(flags, m_inputs); } std::string int64_axis::do_get_input_string(std::size_t i) const diff --git a/nvbench/iteration_space_base.cuh b/nvbench/iteration_space_base.cuh new file mode 100644 index 0000000..130ae30 --- /dev/null +++ b/nvbench/iteration_space_base.cuh @@ -0,0 +1,111 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace nvbench +{ + +/*! + * Base class for all axi and axes iteration spaces. + * + * If we consider an axi to be a container of values, iteration_spaces + * would be how we can create iterators over that container. + * + * With that in mind we get the following mapping: + * * linear_axis_space is equivalant to a forward iterator. + * + * * zip_axis_space is equivalant to a zip iterator. + * + * * user_axis_space is equivalant to a transform iterator. + * + * The `nvbench::axes_metadata` stores all axes in a std::vector. To represent + * which axes each space is 'over' we store those indices. We don't store + * the pointers or names for the following reasons: + * + * * The names of an axis can change after being added. The `nvbench::axes_metadata` + * is not aware of the name change, and can't inform this class of it. + * + * * The `nvbench::axes_metadata` can be deep copied, which would invalidate + * any pointers held by this class. By holding onto the index we remove the need + * to do any form of fixup on deep copies of `nvbench::axes_metadata`. + * + * + */ +struct iteration_space_base +{ + using axes_type = std::vector>; + using axes_info = std::vector; + + using AdvanceSignature = + nvbench::detail::axis_space_iterator::AdvanceSignature; + using UpdateSignature = nvbench::detail::axis_space_iterator::UpdateSignature; + + /*! + * Construct a new derived iteration_space + * + * The input_indices and output_indices combine together to allow the iteration space to know + * what axi they should query from axes_metadata and where each of those map to in the output + * iteration space. + * @param[input_indices] recorded indices of each axi from the axes metadata value space + * @param[output_indices] requested indices of each axi for output when iterating the type+value space + */ + iteration_space_base(std::vector input_indices); + virtual ~iteration_space_base(); + + [[nodiscard]] std::unique_ptr clone() const; + + /*! + * Returns the iterator over the @a axis provided + * + * @param[axes] + * + */ + [[nodiscard]] detail::axis_space_iterator + get_iterator(const axes_type &axes) const; + + /*! + * Returns the number of active and inactive elements the iterator will have + * when executed over @a axes + * + * Note: + * Type Axis support inactive elements + */ + [[nodiscard]] std::size_t get_size(const axes_type &axes) const; + + /*! + * Returns the number of active elements the iterator will over when + * executed over @a axes + * + * Note: + * Type Axis support inactive elements + */ + [[nodiscard]] std::size_t get_active_count(const axes_type &axes) const; + +protected: + std::vector m_input_indices; + + virtual std::unique_ptr do_clone() const = 0; + virtual detail::axis_space_iterator do_get_iterator(axes_info info) const = 0; + virtual std::size_t do_get_size(const axes_info &info) const = 0; + virtual std::size_t do_get_active_count(const axes_info &info) const = 0; +}; + +} // namespace nvbench diff --git a/nvbench/iteration_space_base.cxx b/nvbench/iteration_space_base.cxx new file mode 100644 index 0000000..262cdc6 --- /dev/null +++ b/nvbench/iteration_space_base.cxx @@ -0,0 +1,69 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "iteration_space_base.cuh" + +#include + +namespace nvbench +{ + +iteration_space_base::iteration_space_base(std::vector input_indices) + : m_input_indices(std::move(input_indices)) +{} + +iteration_space_base::~iteration_space_base() = default; + +std::unique_ptr iteration_space_base::clone() const +{ + auto clone = this->do_clone(); + return clone; +} + +namespace +{ +nvbench::iteration_space_base::axes_info +get_axes_info(const nvbench::iteration_space_base::axes_type &axes, + const std::vector &indices) +{ + nvbench::iteration_space_base::axes_info info; + info.reserve(indices.size()); + for (auto &n : indices) + { + info.emplace_back(axes[n].get()); + } + return info; +} +} // namespace + +detail::axis_space_iterator iteration_space_base::get_iterator(const axes_type &axes) const +{ + + return this->do_get_iterator(get_axes_info(axes, m_input_indices)); +} + +std::size_t iteration_space_base::get_size(const axes_type &axes) const +{ + return this->do_get_size(get_axes_info(axes, m_input_indices)); +} +std::size_t iteration_space_base::get_active_count(const axes_type &axes) const +{ + return this->do_get_active_count(get_axes_info(axes, m_input_indices)); +} + +} // namespace nvbench diff --git a/nvbench/linear_axis_space.cuh b/nvbench/linear_axis_space.cuh new file mode 100644 index 0000000..7875593 --- /dev/null +++ b/nvbench/linear_axis_space.cuh @@ -0,0 +1,43 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace nvbench +{ + +/*! + * Provides linear forward iteration over a single axi + * + * The default for all axi added to a benchmark + * + */ +struct linear_axis_space final : iteration_space_base +{ + linear_axis_space(std::size_t in); + ~linear_axis_space(); + + std::unique_ptr do_clone() const override; + detail::axis_space_iterator do_get_iterator(axes_info info) const override; + std::size_t do_get_size(const axes_info &info) const override; + std::size_t do_get_active_count(const axes_info &info) const override; +}; + +} // namespace nvbench diff --git a/nvbench/linear_axis_space.cxx b/nvbench/linear_axis_space.cxx new file mode 100644 index 0000000..23ccc86 --- /dev/null +++ b/nvbench/linear_axis_space.cxx @@ -0,0 +1,58 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linear_axis_space.cuh" + +#include + +namespace nvbench +{ + +linear_axis_space::linear_axis_space(std::size_t in_index) + : iteration_space_base({in_index}) +{} + +linear_axis_space::~linear_axis_space() = default; + +detail::axis_space_iterator linear_axis_space::do_get_iterator(axes_info info) const +{ + auto update_func = [=](std::size_t inc_index, + axes_info::iterator start, + axes_info::iterator) { + start->index = inc_index; + }; + + return detail::axis_space_iterator(info, info[0].size, update_func); +} + +std::size_t linear_axis_space::do_get_size(const axes_info &info) const +{ + return info[0].size; +} + +std::size_t linear_axis_space::do_get_active_count(const axes_info &info) const +{ + return info[0].active_size; +} + +std::unique_ptr linear_axis_space::do_clone() const +{ + return std::make_unique(*this); +} + +} // namespace nvbench diff --git a/nvbench/string_axis.cuh b/nvbench/string_axis.cuh index 2f526e7..d2a3bde 100644 --- a/nvbench/string_axis.cuh +++ b/nvbench/string_axis.cuh @@ -34,6 +34,11 @@ struct string_axis final : public axis_base , m_values{} {} + explicit string_axis(std::string name, std::vector inputs) + : axis_base{std::move(name), axis_type::string} + , m_values{std::move(inputs)} + {} + ~string_axis() final; void set_inputs(std::vector inputs) diff --git a/nvbench/user_axis_space.cuh b/nvbench/user_axis_space.cuh new file mode 100644 index 0000000..2737cef --- /dev/null +++ b/nvbench/user_axis_space.cuh @@ -0,0 +1,76 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace nvbench +{ + +/*! + * Provides user defined iteration over multiple axes + * + * Consider two axi with the following values: + * { 0, 1, 2, 3, 4, 5 } + * { 0, 1, 2, 3, 4, 5 } + * + * If we wanted to provide an axis space that skipped every third value + * We would implement it like this: + * + * struct every_third final : nvbench::user_axis_space + * { + * every_third(std::vector input_indices) + * : nvbench::user_axis_space(std::move(input_indices)) + * {} + * + * nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const + * { + * // our increment function + * auto adv_func = [&, info](std::size_t &inc_index, + * std::size_t len) -> bool { + * inc_index += 3; return inc_index >= len; + * }; + * + * // our update function + * auto update_func = [=](std::size_t inc_index, + * axes_info::iterator start, + * axes_info::iterator end) { + * for (; start != end; ++start) { + * start->index = inc_index; + * } + * }; + * return detail::axis_space_iterator(info, (info[0].size/3), + * adv_func, update_func); + * } + * + * std::size_t do_get_size(const axes_info &info) const { return + * (info[0].size/3); } + * ... + * + */ +struct user_axis_space : iteration_space_base +{ + user_axis_space(std::vector input_indices); + ~user_axis_space(); +}; + +using make_user_space_signature = + std::unique_ptr(std::vector input_indices); + +} // namespace nvbench diff --git a/nvbench/user_axis_space.cxx b/nvbench/user_axis_space.cxx new file mode 100644 index 0000000..c191ac4 --- /dev/null +++ b/nvbench/user_axis_space.cxx @@ -0,0 +1,31 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "user_axis_space.cuh" + +#include + +namespace nvbench +{ + +user_axis_space::user_axis_space(std::vector input_indices) + : iteration_space_base(std::move(input_indices)) +{} +user_axis_space::~user_axis_space() = default; + +} // namespace nvbench diff --git a/nvbench/zip_axis_space.cuh b/nvbench/zip_axis_space.cuh new file mode 100644 index 0000000..21f5681 --- /dev/null +++ b/nvbench/zip_axis_space.cuh @@ -0,0 +1,49 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace nvbench +{ + +/*! + * Provides linear forward iteration over multiple axes in lockstep + * + * Consider two axi with the following values: + * { 0, 1, 2, 3, 4, 5 } + * { 0, 1, 2, 3, 4, 5 } + * + * Using a zip_axis_space over these two axi will generate 6 values + * ( {0,0}, {1,1}, {2,2}, ... ) instead of the default 36 values + * ( {0,0}, {0,1}, {0,2}, ...). + * + */ +struct zip_axis_space final : iteration_space_base +{ + zip_axis_space(std::vector input_indices); + ~zip_axis_space(); + + std::unique_ptr do_clone() const override; + detail::axis_space_iterator do_get_iterator(axes_info info) const override; + std::size_t do_get_size(const axes_info &info) const override; + std::size_t do_get_active_count(const axes_info &info) const override; +}; + +} // namespace nvbench diff --git a/nvbench/zip_axis_space.cxx b/nvbench/zip_axis_space.cxx new file mode 100644 index 0000000..3e687f7 --- /dev/null +++ b/nvbench/zip_axis_space.cxx @@ -0,0 +1,61 @@ +/* + * Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "zip_axis_space.cuh" + +#include + +namespace nvbench +{ + +zip_axis_space::zip_axis_space(std::vector input_indices) + : iteration_space_base(std::move(input_indices)) +{} + +zip_axis_space::~zip_axis_space() = default; + +detail::axis_space_iterator zip_axis_space::do_get_iterator(axes_info info) const +{ + auto update_func = [=](std::size_t inc_index, + axes_info::iterator start, + axes_info::iterator end) { + for (; start != end; ++start) + { + start->index = inc_index; + } + }; + + return detail::axis_space_iterator(info, info[0].size, update_func); +} + +std::size_t zip_axis_space::do_get_size(const axes_info &info) const +{ + return info[0].size; +} + +std::size_t zip_axis_space::do_get_active_count(const axes_info &info) const +{ + return info[0].active_size; +} + +std::unique_ptr zip_axis_space::do_clone() const +{ + return std::make_unique(*this); +} + +} // namespace nvbench diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 4928ebc..034fcef 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -1,5 +1,6 @@ set(test_srcs axes_metadata.cu + axes_iteration_space.cu benchmark.cu create.cu cuda_timer.cu diff --git a/testing/axes_iteration_space.cu b/testing/axes_iteration_space.cu new file mode 100644 index 0000000..76c9946 --- /dev/null +++ b/testing/axes_iteration_space.cu @@ -0,0 +1,263 @@ +/* + * Copyright 2021 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 with the LLVM exception + * (the "License"); you may not use this file except in compliance with + * the License. + * + * You may obtain a copy of the License at + * + * http://llvm.org/foundation/relicensing/LICENSE.txt + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "test_asserts.cuh" + +#include + +#include +#include +#include +#include + +template +std::vector sort(std::vector &&vec) +{ + std::sort(vec.begin(), vec.end()); + return std::move(vec); +} + +void no_op_generator(nvbench::state &state) +{ + fmt::memory_buffer params; + fmt::format_to(params, "Params:"); + const auto &axis_values = state.get_axis_values(); + for (const auto &name : sort(axis_values.get_names())) + { + std::visit( + [¶ms, &name](const auto &value) { + fmt::format_to(params, " {}: {}", name, value); + }, + axis_values.get_value(name)); + } + + // Marking as skipped to signal that this state is run: + state.skip(fmt::to_string(std::move(params))); +} +NVBENCH_DEFINE_CALLABLE(no_op_generator, no_op_callable); + +template > +struct rezippable_benchmark final : public nvbench::benchmark_base +{ + using kernel_generator = KernelGenerator; + using type_axes = TypeAxes; + using type_configs = nvbench::tl::cartesian_product; + + static constexpr std::size_t num_type_configs = + nvbench::tl::size{}; + + rezippable_benchmark() + : benchmark_base(type_axes{}) + {} + +private: + std::unique_ptr do_clone() const final + { + return std::make_unique(); + } + + void do_set_type_axes_names(std::vector names) final + { + m_axes.set_type_axes_names(std::move(names)); + } + + void do_run() final + { + nvbench::runner runner{*this}; + runner.generate_states(); + runner.run(); + } +}; + +template +void template_no_op_generator(nvbench::state &state, + nvbench::type_list) +{ + ASSERT(nvbench::type_strings::input_string() == + state.get_string("Integer")); + ASSERT(nvbench::type_strings::input_string() == + state.get_string("Float")); + ASSERT(nvbench::type_strings::input_string() == + state.get_string("Other")); + + // Enum params using non-templated version: + no_op_generator(state); +} +NVBENCH_DEFINE_CALLABLE_TEMPLATE(template_no_op_generator, + template_no_op_callable); + +void test_zip_axes() +{ + using benchmark_type = nvbench::benchmark; + benchmark_type bench; + bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}), + nvbench::int64_axis("I64 Axis", {1, 3, 2, 4, 5})); + + ASSERT_MSG(bench.get_config_count() == 5 * bench.get_devices().size(), + "Got {}", + bench.get_config_count()); +} + +void test_zip_unequal_length() +{ + using benchmark_type = nvbench::benchmark; + benchmark_type bench; + + ASSERT_THROWS_ANY( + bench.add_zip_axes(nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}), + nvbench::int64_axis("I64 Axis", {1, 3, 2}))); +} + +void test_zip_clone() +{ + using benchmark_type = nvbench::benchmark; + benchmark_type bench; + bench.set_devices(std::vector{}); + bench.add_int64_power_of_two_axis("I64 POT Axis", {10, 20}); + bench.add_int64_axis("I64 Axis", {10, 20}); + bench.add_zip_axes(nvbench::string_axis("Strings", + {"string a", "string b", "string c"}), + nvbench::float64_axis("F64 Axis", {0., .1, .25})); + + const auto expected_count = bench.get_config_count(); + + std::unique_ptr clone_base = bench.clone(); + ASSERT(clone_base.get() != nullptr); + + ASSERT_MSG(expected_count == clone_base->get_config_count(), + "Got {}", + clone_base->get_config_count()); + + auto *clone = dynamic_cast(clone_base.get()); + ASSERT(clone != nullptr); + + ASSERT(bench.get_name() == clone->get_name()); + + const auto &ref_axes = bench.get_axes().get_axes(); + const auto &clone_axes = clone->get_axes().get_axes(); + ASSERT(ref_axes.size() == clone_axes.size()); + for (std::size_t i = 0; i < ref_axes.size(); ++i) + { + const nvbench::axis_base *ref_axis = ref_axes[i].get(); + const nvbench::axis_base *clone_axis = clone_axes[i].get(); + ASSERT(ref_axis != nullptr); + ASSERT(clone_axis != nullptr); + ASSERT(ref_axis->get_name() == clone_axis->get_name()); + ASSERT(ref_axis->get_type() == clone_axis->get_type()); + ASSERT(ref_axis->get_size() == clone_axis->get_size()); + for (std::size_t j = 0; j < ref_axis->get_size(); ++j) + { + ASSERT(ref_axis->get_input_string(j) == clone_axis->get_input_string(j)); + ASSERT(ref_axis->get_description(j) == clone_axis->get_description(j)); + } + } + + ASSERT(clone->get_states().empty()); +} + +struct under_diag final : nvbench::user_axis_space +{ + under_diag(std::vector input_indices) + : nvbench::user_axis_space(std::move(input_indices)) + {} + + mutable std::size_t x_pos = 0; + mutable std::size_t y_pos = 0; + mutable std::size_t x_start = 0; + + nvbench::detail::axis_space_iterator do_get_iterator(axes_info info) const + { + // generate our increment function + auto adv_func = [&, info](std::size_t &inc_index, + std::size_t /*len*/) -> bool { + inc_index++; + x_pos++; + if (x_pos == info[0].size) + { + x_pos = ++x_start; + y_pos = x_start; + return true; + } + return false; + }; + + // our update function + auto diag_under = + [&, info](std::size_t, + std::vector::iterator start, + std::vector::iterator end) { + start->index = x_pos; + end->index = y_pos; + }; + + const size_t iteration_length = ((info[0].size * (info[1].size + 1)) / 2); + return nvbench::detail::axis_space_iterator(info, + iteration_length, + adv_func, + diag_under); + } + + std::size_t do_get_size(const axes_info &info) const + { + return ((info[0].size * (info[1].size + 1)) / 2); + } + + std::size_t do_get_active_count(const axes_info &info) const + { + return ((info[0].size * (info[1].size + 1)) / 2); + } + + std::unique_ptr do_clone() const + { + return std::make_unique(*this); + } +}; + +void test_user_axes() +{ + using benchmark_type = rezippable_benchmark; + benchmark_type bench; + bench.add_user_iteration_axes( + [](auto... args) -> std::unique_ptr { + return std::make_unique(args...); + }, + nvbench::float64_axis("F64 Axis", {0., .1, .25, .5, 1.}), + nvbench::int64_axis("I64 Axis", {1, 3, 2, 4, 5})); + + ASSERT_MSG(bench.get_config_count() == 15 * bench.get_devices().size(), + "Got {}", + bench.get_config_count()); +} + +int main() +{ + test_zip_axes(); + test_zip_unequal_length(); + test_zip_clone(); + + test_user_axes(); +} diff --git a/testing/benchmark.cu b/testing/benchmark.cu index 71ffe03..7bc759b 100644 --- a/testing/benchmark.cu +++ b/testing/benchmark.cu @@ -296,10 +296,9 @@ void test_get_config_count() bench.add_float64_axis("foo", {0.4, 2.3, 4.3}); // 3, 12 bench.add_int64_axis("bar", {4, 6, 15}); // 3, 36 bench.add_string_axis("baz", {"str", "ing"}); // 2, 72 - bench.add_string_axis("baz", {"single"}); // 1, 72 + bench.add_string_axis("fez", {"single"}); // 1, 72 auto const num_devices = bench.get_devices().size(); - ASSERT_MSG(bench.get_config_count() == 72 * num_devices, "Got {}", bench.get_config_count()); diff --git a/testing/option_parser.cu b/testing/option_parser.cu index 9d7e6a9..26dcc37 100644 --- a/testing/option_parser.cu +++ b/testing/option_parser.cu @@ -25,6 +25,8 @@ #include +#include + //============================================================================== // Declare a couple benchmarks for testing: void DummyBench(nvbench::state &state) { state.skip("Skipping for testing."); } @@ -96,6 +98,7 @@ states_to_string(const std::vector &states) ASSERT(bench != nullptr); bench->run(); + std::cout << bench->get_config_count() << std::endl; return bench->get_states(); } diff --git a/testing/state_generator.cu b/testing/state_generator.cu index cb584be..8ba6c5a 100644 --- a/testing/state_generator.cu +++ b/testing/state_generator.cu @@ -56,12 +56,18 @@ void test_single_state() { // one single-value axis = one state nvbench::detail::state_iterator sg; - sg.add_axis("OnlyAxis", nvbench::axis_type::string, 1); + nvbench::string_axis si("OnlyAxis"); + si.set_inputs({""}); + + std::vector> axes; + axes.push_back(std::make_unique(si)); + + sg.add_iteration_space(nvbench::linear_axis_space{0}.get_iterator(axes)); ASSERT(sg.get_number_of_states() == 1); sg.init(); ASSERT(sg.iter_valid()); ASSERT(sg.get_current_indices().size() == 1); - ASSERT(sg.get_current_indices()[0].axis == "OnlyAxis"); + ASSERT(sg.get_current_indices()[0].name == "OnlyAxis"); ASSERT(sg.get_current_indices()[0].index == 0); ASSERT(sg.get_current_indices()[0].size == 1); ASSERT(sg.get_current_indices()[0].type == nvbench::axis_type::string); @@ -73,10 +79,27 @@ void test_single_state() void test_basic() { nvbench::detail::state_iterator sg; - sg.add_axis("Axis1", nvbench::axis_type::string, 2); - sg.add_axis("Axis2", nvbench::axis_type::string, 3); - sg.add_axis("Axis3", nvbench::axis_type::string, 3); - sg.add_axis("Axis4", nvbench::axis_type::string, 2); + + nvbench::string_axis si1("Axis1"); + nvbench::string_axis si2("Axis2"); + nvbench::string_axis si3("Axis3"); + nvbench::string_axis si4("Axis4"); + + si1.set_inputs({"", ""}); + si2.set_inputs({"", "", ""}); + si3.set_inputs({"", "", ""}); + si4.set_inputs({"", ""}); + + std::vector> axes; + axes.emplace_back(std::make_unique(si1)); + axes.emplace_back(std::make_unique(si2)); + axes.emplace_back(std::make_unique(si3)); + axes.emplace_back(std::make_unique(si4)); + + sg.add_iteration_space(nvbench::linear_axis_space{0}.get_iterator(axes)); + sg.add_iteration_space(nvbench::linear_axis_space{1}.get_iterator(axes)); + sg.add_iteration_space(nvbench::linear_axis_space{2}.get_iterator(axes)); + sg.add_iteration_space(nvbench::linear_axis_space{3}.get_iterator(axes)); ASSERT_MSG(sg.get_number_of_states() == (2 * 3 * 3 * 2), "Actual: {} Expected: {}", @@ -95,7 +118,7 @@ void test_basic() ASSERT(axis_index.type == nvbench::axis_type::string); fmt::format_to(line, " | {}: {}/{}", - axis_index.axis, + axis_index.name, axis_index.index, axis_index.size); }