Skip to content

Commit 012f5e5

Browse files
authored
rework alignment for empty ranges (#303)
1 parent 834d27d commit 012f5e5

File tree

10 files changed

+77
-36
lines changed

10 files changed

+77
-36
lines changed

Diff for: include/dr/mhp/algorithms/copy.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ namespace dr::mhp {
1010

1111
/// Copy
1212
void copy(dr::distributed_range auto &&in, dr::distributed_iterator auto out) {
13+
if (rng::empty(in)) {
14+
return;
15+
}
16+
1317
if (aligned(in, out)) {
1418
dr::drlog.debug("copy: parallel execution\n");
1519
for (const auto &&[in_seg, out_seg] :

Diff for: include/dr/mhp/algorithms/for_each.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ namespace dr::mhp {
2020
/// Collective for_each on distributed range
2121
void for_each(dr::distributed_range auto &&dr, auto op) {
2222
dr::drlog.debug("for_each: parallel execution\n");
23+
if (rng::empty(dr)) {
24+
return;
25+
}
2326
assert(aligned(dr));
2427

2528
for (const auto &s : local_segments(dr)) {

Diff for: include/dr/mhp/algorithms/reduce.hpp

+24-1
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,16 @@ auto reduce(std::size_t root, bool root_provided, DR &&dr, auto &&binary_op) {
3636
using value_type = rng::range_value_t<DR>;
3737
auto comm = default_comm();
3838

39+
if (rng::empty(dr)) {
40+
return rng::range_value_t<DR>{};
41+
}
42+
3943
if (aligned(dr)) {
4044
dr::drlog.debug("Parallel reduce\n");
4145

4246
// Reduce the local segments
4347
auto reduce = [=](auto &&r) {
48+
assert(rng::size(r) > 0);
4449
if (mhp::use_sycl()) {
4550
dr::drlog.debug(" with DPL\n");
4651
return dpl_reduce(r, binary_op);
@@ -84,6 +89,13 @@ T reduce(std::size_t root, bool root_provided, DR &&dr, T init,
8489
return binary_op(init, reduce(root, root_provided, dr, binary_op));
8590
}
8691

92+
inline void
93+
#if defined(__GNUC__) && !defined(__clang__)
94+
__attribute__((optimize(0)))
95+
#endif
96+
no_optimize(auto x) {
97+
}
98+
8799
}; // namespace dr::mhp::__detail
88100

89101
namespace dr::mhp {
@@ -124,9 +136,20 @@ template <typename T, dr::distributed_range DR> auto reduce(DR &&dr, T init) {
124136
template <dr::distributed_range DR> auto reduce(std::size_t root, DR &&dr) {
125137
return __detail::reduce(root, true, std::forward<DR>(dr), std::plus<>{});
126138
}
139+
127140
/// Collective reduction on a distributed range
128141
template <dr::distributed_range DR> auto reduce(DR &&dr) {
129-
return __detail::reduce(0, false, std::forward<DR>(dr), std::plus<>{});
142+
auto x = __detail::reduce(0, false, std::forward<DR>(dr), std::plus<>{});
143+
144+
// The code below avoids an issue where DotProduct_ZipReduce_DR
145+
// fails with gcc11. From debugging, I can see that the call to
146+
// __detail::reduce above computes the correct value, but this
147+
// function returns a bad value. My theory is that the problem is
148+
// related to tail call optimization and the function below disables
149+
// the optimization.
150+
__detail::no_optimize(x);
151+
152+
return x;
130153
}
131154

132155
//

Diff for: include/dr/mhp/algorithms/transform.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ namespace dr::mhp {
1919

2020
void transform(dr::distributed_range auto &&in,
2121
dr::distributed_iterator auto out, auto op) {
22+
if (rng::empty(in)) {
23+
return;
24+
}
2225
assert(aligned(in, out));
2326

2427
auto zip = mhp::views::zip(in, rng::subrange(out, out + rng::size(in)));

Diff for: include/dr/mhp/alignment.hpp

+12-14
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,19 @@ concept has_segments = requires(T &t) { dr::ranges::segments(t); };
1616
template <typename T>
1717
concept no_segments = !has_segments<T>;
1818

19-
auto sub_aligned(bool non_empty, has_segments auto &&r) {
20-
if (non_empty && dr::ranges::segments(r).empty()) {
19+
auto sub_aligned(has_segments auto &&r) {
20+
if (rng::empty(dr::ranges::segments(r))) {
2121
dr::drlog.debug("unaligned: empty segments\n");
2222
return false;
2323
} else {
2424
return true;
2525
}
2626
}
2727

28-
auto sub_aligned(bool non_empty, auto &&r) { return true; }
28+
auto sub_aligned(auto &&r) { return true; }
2929

3030
// iter1 is aligned with iter2, and iter2 is aligned with the rest
31-
bool sub_aligned(bool non_empty, has_segments auto &&r1, has_segments auto &&r2,
31+
bool sub_aligned(has_segments auto &&r1, has_segments auto &&r2,
3232
auto &&...rest) {
3333
auto z = rng::views::zip(dr::ranges::segments(r1), dr::ranges::segments(r2));
3434
auto i = rng::distance(z) - 1;
@@ -48,25 +48,23 @@ bool sub_aligned(bool non_empty, has_segments auto &&r1, has_segments auto &&r2,
4848
i--;
4949
}
5050

51-
if (!rng::empty(dr::ranges::segments(r1))) {
52-
non_empty = true;
53-
}
54-
return sub_aligned(non_empty, r2, rest...);
51+
return sub_aligned(r2, rest...);
5552
}
5653

5754
// Skip local iterators
58-
bool sub_aligned(bool non_empty, no_segments auto &&r1, has_segments auto &&r2,
59-
auto... rest) {
60-
return sub_aligned(non_empty, r2, rest...);
55+
bool sub_aligned(no_segments auto &&r1, has_segments auto &&r2, auto... rest) {
56+
return sub_aligned(r2, rest...);
6157
}
6258

63-
bool sub_aligned(bool non_empty, has_segments auto &&r1, no_segments auto &&r2,
59+
bool sub_aligned(has_segments auto &&r1, no_segments auto &&r2,
6460
auto &&...rest) {
65-
return sub_aligned(non_empty, r1, rest...);
61+
return sub_aligned(r1, rest...);
6662
}
6763

64+
// This was added to allow passing state down the call tree, but it is
65+
// no longer needed. I did not delete it in case we need it again.
6866
template <typename... Args> bool aligned(Args &&...args) {
69-
return sub_aligned(false, std::forward<Args>(args)...);
67+
return sub_aligned(std::forward<Args>(args)...);
7068
}
7169

7270
} // namespace dr::mhp

Diff for: include/dr/mhp/views/zip.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ template <__detail::zipable... Rs> class zip_view : public rng::view_base {
230230
return std::apply(make_end, base_);
231231
}
232232
auto size() const { return rng::size(rng_zip_); }
233+
auto empty() const { return begin() == end(); }
234+
233235
auto operator[](difference_type n) const { return rng_zip_[n]; }
234236

235237
auto base() const { return base_; }

Diff for: scripts/dr-style.py

+4
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@
113113
r'\.size\(\)',
114114
'use rng::size()',
115115
),
116+
(
117+
r'\.empty\(\)',
118+
'use rng::empty()',
119+
),
116120
(
117121
r'std::begin\(',
118122
'use rng::begin()',

Diff for: test/gtest/common/zip_local.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ TEST_F(ZipLocal, Begin) {
5656
EXPECT_EQ(*r.begin(), *z.begin());
5757
}
5858

59+
TEST_F(ZipLocal, Empty) {
60+
EXPECT_FALSE(test_zip(ops[0]).empty());
61+
EXPECT_TRUE(test_zip(rng::views::take(ops[0], 0)).empty());
62+
}
63+
5964
TEST_F(ZipLocal, End) {
6065
auto z = test_zip(ops[0], ops[1]);
6166
auto r = rng::views::zip(ops[0], ops[1]);

Diff for: test/gtest/mhp/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ add_executable(
4444
add_executable(
4545
mhp-quick-test
4646
mhp-tests.cpp
47-
../common/counted.cpp
47+
alignment.cpp
48+
../common/zip_local.cpp
4849
)
4950

5051
target_compile_definitions(mhp-quick-test PRIVATE QUICK_TEST)

Diff for: test/gtest/mhp/alignment.cpp

+18-20
Original file line numberDiff line numberDiff line change
@@ -33,26 +33,6 @@ TEST(Alignment, OffsetBy1) {
3333
}
3434
}
3535

36-
TEST(Alignment, EmptyRangeRange) {
37-
Ops2<DV> ops(10);
38-
EXPECT_TRUE(dr::mhp::aligned(
39-
rng::subrange(ops.dist_vec0.begin() + 10, ops.dist_vec0.begin() + 10),
40-
rng::subrange(ops.dist_vec1.begin() + 10, ops.dist_vec1.begin() + 10)));
41-
}
42-
43-
TEST(Alignment, EmptyRangeIter) {
44-
Ops2<DV> ops(10);
45-
EXPECT_TRUE(dr::mhp::aligned(
46-
rng::subrange(ops.dist_vec0.begin() + 10, ops.dist_vec0.begin() + 10),
47-
ops.dist_vec1.begin() + 10));
48-
}
49-
50-
TEST(Alignment, EmptyIterIter) {
51-
Ops2<DV> ops(10);
52-
EXPECT_TRUE(
53-
dr::mhp::aligned(ops.dist_vec1.begin() + 10, ops.dist_vec1.begin() + 10));
54-
}
55-
5636
TEST(Alignment, Subrange) {
5737
Ops2<DV> ops(10);
5838
auto is_aligned = dr::mhp::aligned(
@@ -72,3 +52,21 @@ TEST(Alignment, Iota2) {
7252
Ops1<DV> ops(10);
7353
EXPECT_TRUE(dr::mhp::aligned(ops.dist_vec, rng::views::iota(100)));
7454
}
55+
56+
TEST(Alignment, ZipAligned) {
57+
Ops2<DV> ops(10);
58+
EXPECT_TRUE(
59+
dr::mhp::aligned(dr::mhp::views::zip(ops.dist_vec0, ops.dist_vec1)));
60+
}
61+
62+
TEST(Alignment, ZipMisaligned) {
63+
Ops2<DV> ops(10);
64+
auto is_aligned = dr::mhp::aligned(dr::mhp::views::zip(
65+
dr::mhp::views::drop(ops.dist_vec0, 1), ops.dist_vec1));
66+
if (comm_size == 1) {
67+
// If there is a single segment, then it is aligned
68+
EXPECT_TRUE(is_aligned);
69+
} else {
70+
EXPECT_FALSE(is_aligned);
71+
}
72+
}

0 commit comments

Comments
 (0)