Skip to content

Commit fbed0b2

Browse files
authored
Skip grain_size computation in parallel_for_each_reduce_* wrappers without threadpool (#9144)
If we don't have the threadpool then this division is wasted.
1 parent 2d761f0 commit fbed0b2

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

kernels/portable/cpu/util/reduce_util.h

+8
Original file line numberDiff line numberDiff line change
@@ -823,11 +823,15 @@ template <typename Func>
823823
executorch::aten::optional<int64_t> dim,
824824
const Tensor& out,
825825
const Func& func) {
826+
#ifdef ET_USE_THREADPOOL
826827
const ssize_t reduction_size = get_reduced_dim_product(in, dim);
827828
const auto grain_size = std::max(
828829
static_cast<ssize_t>(1),
829830
static_cast<ssize_t>(executorch::extension::internal::GRAIN_SIZE) /
830831
reduction_size);
832+
#else // ET_USE_THREADPOOL
833+
const auto grain_size = 1;
834+
#endif // ET_USE_THREADPOOL
831835
return executorch::extension::parallel_for(0, out.numel(), grain_size, func);
832836
}
833837

@@ -842,11 +846,15 @@ template <typename Func>
842846
optional<ArrayRef<int64_t>> dim_list,
843847
const Tensor& out,
844848
const Func& func) {
849+
#ifdef ET_UE_THREADPOOL
845850
const ssize_t reduction_size = get_reduced_dim_product(in, dim_list);
846851
const auto grain_size = std::max(
847852
static_cast<ssize_t>(1),
848853
static_cast<ssize_t>(executorch::extension::internal::GRAIN_SIZE) /
849854
reduction_size);
855+
#else // ET_USE_THREADPOOL
856+
const auto grain_size = 1;
857+
#endif // ET_USE_THREADPOOL
850858
return executorch::extension::parallel_for(0, out.numel(), grain_size, func);
851859
}
852860

0 commit comments

Comments
 (0)