Skip to content

Commit 8163d02

Browse files
ZhangHuiGuipitrou
andauthored
apacheGH-40431: [C++] Move key_hash/key_map/light_array related files to internal for prevent using by users (apache#40484)
### Rationale for this change These files expose implementation details and APIs that are not meant for third-party use. This PR explicitly marks them internal, which also avoids having them installed. ### Are these changes tested? By existing builds and tests. ### Are there any user-facing changes? No, except hiding some header files that were not supposed to be included externally. * GitHub Issue: apache#40431 Lead-authored-by: ZhangHuiGui <[email protected]> Co-authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent 5ddef63 commit 8163d02

24 files changed

+44
-39
lines changed

cpp/src/arrow/CMakeLists.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -689,9 +689,9 @@ set(ARROW_COMPUTE_SRCS
689689
compute/function.cc
690690
compute/function_internal.cc
691691
compute/kernel.cc
692-
compute/key_hash.cc
693-
compute/key_map.cc
694-
compute/light_array.cc
692+
compute/key_hash_internal.cc
693+
compute/key_map_internal.cc
694+
compute/light_array_internal.cc
695695
compute/ordering.cc
696696
compute/registry.cc
697697
compute/kernels/codegen_internal.cc
@@ -717,8 +717,8 @@ set(ARROW_COMPUTE_SRCS
717717
compute/row/row_internal.cc
718718
compute/util.cc)
719719

720-
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc)
721-
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc)
720+
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
721+
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
722722
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
723723
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
724724
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)

cpp/src/arrow/acero/asof_join_node.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@
4545
#include "arrow/compute/function_internal.h"
4646
#endif
4747
#include "arrow/acero/time_series_util.h"
48-
#include "arrow/compute/key_hash.h"
49-
#include "arrow/compute/light_array.h"
48+
#include "arrow/compute/key_hash_internal.h"
49+
#include "arrow/compute/light_array_internal.h"
5050
#include "arrow/record_batch.h"
5151
#include "arrow/result.h"
5252
#include "arrow/status.h"

cpp/src/arrow/acero/bloom_filter_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "arrow/acero/task_util.h"
2828
#include "arrow/acero/test_util_internal.h"
2929
#include "arrow/acero/util.h"
30-
#include "arrow/compute/key_hash.h"
30+
#include "arrow/compute/key_hash_internal.h"
3131
#include "arrow/util/bitmap_ops.h"
3232
#include "arrow/util/config.h"
3333
#include "arrow/util/cpu_info.h"

cpp/src/arrow/acero/hash_join_node.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "arrow/acero/options.h"
2828
#include "arrow/acero/schema_util.h"
2929
#include "arrow/acero/util.h"
30-
#include "arrow/compute/key_hash.h"
30+
#include "arrow/compute/key_hash_internal.h"
3131
#include "arrow/util/checked_cast.h"
3232
#include "arrow/util/future.h"
3333
#include "arrow/util/thread_pool.h"

cpp/src/arrow/acero/hash_join_node.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#pragma once
1919

20+
#include <cassert>
2021
#include <vector>
2122

2223
#include "arrow/acero/options.h"
@@ -88,7 +89,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
8889
const Expression& filter);
8990

9091
bool PayloadIsEmpty(int side) {
91-
ARROW_DCHECK(side == 0 || side == 1);
92+
assert(side == 0 || side == 1);
9293
return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
9394
}
9495

cpp/src/arrow/acero/schema_util.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717

1818
#pragma once
1919

20+
#include <cassert>
2021
#include <cstdint>
2122
#include <memory>
2223
#include <string>
2324
#include <vector>
2425

25-
#include "arrow/compute/light_array.h" // for KeyColumnMetadata
26-
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema
26+
#include "arrow/type.h" // for DataType, FieldRef, Field and Schema
2727

2828
namespace arrow {
2929

@@ -47,8 +47,8 @@ struct SchemaProjectionMap {
4747
const int* source_to_base;
4848
const int* base_to_target;
4949
inline int get(int i) const {
50-
ARROW_DCHECK(i >= 0 && i < num_cols);
51-
ARROW_DCHECK(source_to_base[i] != kMissingField);
50+
assert(i >= 0 && i < num_cols);
51+
assert(source_to_base[i] != kMissingField);
5252
return base_to_target[source_to_base[i]];
5353
}
5454
};
@@ -66,7 +66,7 @@ class SchemaProjectionMaps {
6666
Status Init(ProjectionIdEnum full_schema_handle, const Schema& schema,
6767
const std::vector<ProjectionIdEnum>& projection_handles,
6868
const std::vector<const std::vector<FieldRef>*>& projections) {
69-
ARROW_DCHECK(projection_handles.size() == projections.size());
69+
assert(projection_handles.size() == projections.size());
7070
ARROW_RETURN_NOT_OK(RegisterSchema(full_schema_handle, schema));
7171
for (size_t i = 0; i < projections.size(); ++i) {
7272
ARROW_RETURN_NOT_OK(
@@ -174,7 +174,7 @@ class SchemaProjectionMaps {
174174
}
175175
}
176176
// We should never get here
177-
ARROW_DCHECK(false);
177+
assert(false);
178178
return -1;
179179
}
180180

@@ -207,7 +207,7 @@ class SchemaProjectionMaps {
207207
break;
208208
}
209209
}
210-
ARROW_DCHECK(field_id != SchemaProjectionMap::kMissingField);
210+
assert(field_id != SchemaProjectionMap::kMissingField);
211211
mapping[i] = field_id;
212212
inverse_mapping[field_id] = i;
213213
}

cpp/src/arrow/acero/swiss_join.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include "arrow/acero/util.h"
2626
#include "arrow/array/util.h" // MakeArrayFromScalar
2727
#include "arrow/compute/kernels/row_encoder_internal.h"
28-
#include "arrow/compute/key_hash.h"
28+
#include "arrow/compute/key_hash_internal.h"
2929
#include "arrow/compute/row/compare_internal.h"
3030
#include "arrow/compute/row/encode_internal.h"
3131
#include "arrow/util/bit_util.h"

cpp/src/arrow/acero/swiss_join_internal.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
#include "arrow/acero/schema_util.h"
2424
#include "arrow/acero/task_util.h"
2525
#include "arrow/compute/kernels/row_encoder_internal.h"
26-
#include "arrow/compute/key_map.h"
27-
#include "arrow/compute/light_array.h"
26+
#include "arrow/compute/key_map_internal.h"
27+
#include "arrow/compute/light_array_internal.h"
2828
#include "arrow/compute/row/encode_internal.h"
2929

3030
namespace arrow {

cpp/src/arrow/compute/key_hash.cc cpp/src/arrow/compute/key_hash_internal.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include "arrow/compute/key_hash.h"
18+
#include "arrow/compute/key_hash_internal.h"
1919

2020
#include <memory.h>
2121

2222
#include <algorithm>
2323
#include <cstdint>
2424

25-
#include "arrow/compute/light_array.h"
25+
#include "arrow/compute/light_array_internal.h"
2626
#include "arrow/util/bit_util.h"
2727
#include "arrow/util/ubsan.h"
2828

cpp/src/arrow/compute/key_hash.h cpp/src/arrow/compute/key_hash_internal.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
#include <cstdint>
2525

26-
#include "arrow/compute/light_array.h"
26+
#include "arrow/compute/light_array_internal.h"
2727
#include "arrow/compute/util.h"
2828

2929
namespace arrow {

cpp/src/arrow/compute/key_hash_avx2.cc cpp/src/arrow/compute/key_hash_internal_avx2.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
#include <immintrin.h>
1919

20-
#include "arrow/compute/key_hash.h"
20+
#include "arrow/compute/key_hash_internal.h"
2121
#include "arrow/util/bit_util.h"
2222

2323
namespace arrow {

cpp/src/arrow/compute/key_hash_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#include <unordered_set>
2424

2525
#include "arrow/array/builder_binary.h"
26-
#include "arrow/compute/key_hash.h"
26+
#include "arrow/compute/key_hash_internal.h"
2727
#include "arrow/testing/gtest_util.h"
2828
#include "arrow/testing/util.h"
2929
#include "arrow/util/cpu_info.h"

cpp/src/arrow/compute/key_map.cc cpp/src/arrow/compute/key_map_internal.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include "arrow/compute/key_map.h"
18+
#include "arrow/compute/key_map_internal.h"
1919

2020
#include <algorithm>
2121
#include <cstdint>
File renamed without changes.

cpp/src/arrow/compute/key_map_avx2.cc cpp/src/arrow/compute/key_map_internal_avx2.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
#include <immintrin.h>
1919

20-
#include "arrow/compute/key_map.h"
20+
#include "arrow/compute/key_map_internal.h"
2121
#include "arrow/util/logging.h"
2222

2323
namespace arrow {

cpp/src/arrow/compute/light_array.cc cpp/src/arrow/compute/light_array_internal.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include "arrow/compute/light_array.h"
18+
#include "arrow/compute/light_array_internal.h"
1919

2020
#include <type_traits>
2121

cpp/src/arrow/compute/light_array_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include "arrow/compute/light_array.h"
18+
#include "arrow/compute/light_array_internal.h"
1919

2020
#include <gtest/gtest.h>
2121
#include <numeric>

cpp/src/arrow/compute/row/compare_internal.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
#include <cstdint>
2121

22-
#include "arrow/compute/light_array.h"
22+
#include "arrow/compute/light_array_internal.h"
2323
#include "arrow/compute/row/encode_internal.h"
2424
#include "arrow/compute/row/row_internal.h"
2525
#include "arrow/compute/util.h"

cpp/src/arrow/compute/row/encode_internal.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
#include <vector>
2323

2424
#include "arrow/array/data.h"
25-
#include "arrow/compute/key_map.h"
26-
#include "arrow/compute/light_array.h"
25+
#include "arrow/compute/key_map_internal.h"
26+
#include "arrow/compute/light_array_internal.h"
2727
#include "arrow/compute/row/row_internal.h"
2828
#include "arrow/compute/util.h"
2929
#include "arrow/memory_pool.h"

cpp/src/arrow/compute/row/grouper.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
#include "arrow/compute/api_vector.h"
2727
#include "arrow/compute/function.h"
2828
#include "arrow/compute/kernels/row_encoder_internal.h"
29-
#include "arrow/compute/key_hash.h"
30-
#include "arrow/compute/light_array.h"
29+
#include "arrow/compute/key_hash_internal.h"
30+
#include "arrow/compute/light_array_internal.h"
3131
#include "arrow/compute/registry.h"
3232
#include "arrow/compute/row/compare_internal.h"
3333
#include "arrow/compute/row/grouper_internal.h"

cpp/src/arrow/compute/row/row_internal.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include <vector>
2121

2222
#include "arrow/buffer.h"
23-
#include "arrow/compute/light_array.h"
23+
#include "arrow/compute/light_array_internal.h"
2424
#include "arrow/memory_pool.h"
2525
#include "arrow/status.h"
2626
#include "arrow/util/logging.h"

cpp/src/arrow/compute/util.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ using internal::CpuInfo;
3232
namespace util {
3333

3434
void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
35-
int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
35+
int64_t new_top = top_ + EstimatedAllocationSize(num_bytes);
3636
// Stack overflow check (see GH-39582).
3737
// XXX cannot return a regular Status because most consumers do not either.
3838
ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow";
@@ -48,7 +48,7 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
4848

4949
void TempVectorStack::release(int id, uint32_t num_bytes) {
5050
ARROW_DCHECK(num_vectors_ == id + 1);
51-
int64_t size = PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
51+
int64_t size = EstimatedAllocationSize(num_bytes);
5252
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[-1] ==
5353
kGuard2);
5454
ARROW_DCHECK(top_ >= size);

cpp/src/arrow/compute/util.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class ARROW_EXPORT TempVectorStack {
8989
Status Init(MemoryPool* pool, int64_t size) {
9090
num_vectors_ = 0;
9191
top_ = 0;
92-
buffer_size_ = PaddedAllocationSize(size) + kPadding + 2 * sizeof(uint64_t);
92+
buffer_size_ = EstimatedAllocationSize(size);
9393
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
9494
// Ensure later operations don't accidentally read uninitialized memory.
9595
std::memset(buffer->mutable_data(), 0xFF, size);
@@ -98,7 +98,11 @@ class ARROW_EXPORT TempVectorStack {
9898
}
9999

100100
private:
101-
int64_t PaddedAllocationSize(int64_t num_bytes) {
101+
static int64_t EstimatedAllocationSize(int64_t size) {
102+
return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
103+
}
104+
105+
static int64_t PaddedAllocationSize(int64_t num_bytes) {
102106
// Round up allocation size to multiple of 8 bytes
103107
// to avoid returning temp vectors with unaligned address.
104108
//

0 commit comments

Comments
 (0)