Skip to content

Commit db51f68

Browse files
martijnvelscopybara-github
authored andcommitted
Introduce Abseil Prefetch API
PiperOrigin-RevId: 504941246 Change-Id: I94c1e85afd254e84948477b511d41eeb8285fdae
1 parent c21bd95 commit db51f68

File tree

5 files changed

+272
-9
lines changed

5 files changed

+272
-9
lines changed

CMake/AbseilDll.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ set(ABSL_INTERNAL_DLL_FILES
2828
"base/internal/low_level_scheduling.h"
2929
"base/internal/per_thread_tls.h"
3030
"base/internal/prefetch.h"
31+
"base/prefetch.h"
3132
"base/internal/pretty_function.h"
3233
"base/internal/raw_logging.cc"
3334
"base/internal/raw_logging.h"

absl/base/BUILD.bazel

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -732,21 +732,22 @@ cc_test(
732732

733733
cc_library(
734734
name = "prefetch",
735-
hdrs = ["internal/prefetch.h"],
735+
hdrs = [
736+
"internal/prefetch.h",
737+
"prefetch.h",
738+
],
736739
copts = ABSL_DEFAULT_COPTS,
737740
linkopts = ABSL_DEFAULT_LINKOPTS,
738-
visibility = [
739-
"//absl:__subpackages__",
740-
],
741-
deps = [
742-
":config",
743-
],
741+
deps = [":config"],
744742
)
745743

746744
cc_test(
747745
name = "prefetch_test",
748746
size = "small",
749-
srcs = ["internal/prefetch_test.cc"],
747+
srcs = [
748+
"internal/prefetch_test.cc",
749+
"prefetch_test.cc",
750+
],
750751
copts = ABSL_TEST_COPTS,
751752
linkopts = ABSL_DEFAULT_LINKOPTS,
752753
deps = [

absl/base/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,11 +645,11 @@ absl_cc_test(
645645
GTest::gtest_main
646646
)
647647

648-
# Internal-only target, do not depend on directly.
649648
absl_cc_library(
650649
NAME
651650
prefetch
652651
HDRS
652+
"prefetch.h"
653653
"internal/prefetch.h"
654654
COPTS
655655
${ABSL_DEFAULT_COPTS}
@@ -663,6 +663,7 @@ absl_cc_test(
663663
NAME
664664
prefetch_test
665665
SRCS
666+
"prefetch_test.cc"
666667
"internal/prefetch_test.cc"
667668
COPTS
668669
${ABSL_TEST_COPTS}

absl/base/prefetch.h

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
// Copyright 2023 The Abseil Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
//
15+
// -----------------------------------------------------------------------------
16+
// File: prefetch.h
17+
// -----------------------------------------------------------------------------
18+
//
19+
// This header file defines prefetch functions to prefetch memory contents
20+
// into the first level cache (L1) for the current CPU. The prefetch logic
21+
// offered in this header is limited to prefetching first level cachelines
22+
// only, and is aimed at relatively 'simple' prefetching logic.
23+
//
24+
#ifndef ABSL_BASE_PREFETCH_H_
25+
#define ABSL_BASE_PREFETCH_H_
26+
27+
#include "absl/base/config.h"
28+
29+
#if defined(ABSL_INTERNAL_HAVE_SSE)
30+
#include <xmmintrin.h>
31+
#endif
32+
33+
#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE)
34+
#include <intrin.h>
35+
#pragma intrinsic(_mm_prefetch)
36+
#endif
37+
38+
namespace absl {
39+
ABSL_NAMESPACE_BEGIN
40+
41+
// Moves data into the L1 cache before it is read, or "prefetches" it.
42+
//
43+
// The value of `addr` is the address of the memory to prefetch. If
44+
// the target and compiler support it, data prefetch instructions are
45+
// generated. If the prefetch is done some time before the memory is
46+
// read, it may be in the cache by the time the read occurs.
47+
//
48+
// This method prefetches data with the highest degree of temporal locality;
49+
// data is prefetched where possible into all levels of the cache.
50+
//
51+
// Incorrect or gratuitous use of this function can degrade performance.
52+
// Use this function only when representative benchmarks show an improvement.
53+
//
54+
// Example:
55+
//
56+
// // Computes incremental checksum for `data`.
57+
// int ComputeChecksum(int sum, absl::string_view data);
58+
//
59+
// // Computes cumulative checksum for all values in `data`
60+
// int ComputeChecksum(absl::Span<const std::string> data) {
61+
// int sum = 0;
62+
// auto it = data.begin();
63+
// auto pit = data.begin();
64+
// auto end = data.end();
65+
// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
66+
// absl::PrefetchToLocalCache(pit->data());
67+
// }
68+
// for (; pit != end; ++pit, ++it) {
69+
// sum = ComputeChecksum(sum, *it);
70+
// absl::PrefetchToLocalCache(pit->data());
71+
// }
72+
// for (; it != end; ++it) {
73+
// sum = ComputeChecksum(sum, *it);
74+
// }
75+
// return sum;
76+
// }
77+
//
78+
void PrefetchToLocalCache(const void* addr);
79+
80+
// Moves data into the L1 cache before it is read, or "prefetches" it.
81+
//
82+
// This function is identical to `PrefetchToLocalCache()` except that it has
83+
// non-temporal locality: the fetched data should not be left in any of the
84+
// cache tiers. This is useful for cases where the data is used only once /
85+
// short term, for example, invoking a destructor on an object.
86+
//
87+
// Incorrect or gratuitous use of this function can degrade performance.
88+
// Use this function only when representative benchmarks show an improvement.
89+
//
90+
// Example:
91+
//
92+
// template <typename Iterator>
93+
// void DestroyPointers(Iterator begin, Iterator end) {
94+
// size_t distance = std::min(8U, bars.size());
95+
//
96+
// int dist = 8;
97+
// auto prefetch_it = begin;
98+
// while (prefetch_it != end && --dist;) {
99+
// absl::PrefetchToLocalCacheNta(*prefetch_it++);
100+
// }
101+
// while (prefetch_it != end) {
102+
// delete *begin++;
103+
// absl::PrefetchToLocalCacheNta(*prefetch_it++);
104+
// }
105+
// while (begin != end) {
106+
// delete *begin++;
107+
// }
108+
// }
109+
//
110+
void PrefetchToLocalCacheNta(const void* addr);
111+
112+
// Moves data into the L1 cache with the intent to modify it.
113+
//
114+
// This function is similar to `PrefetchToLocalCache()` except that it
115+
// prefetches cachelines with an 'intent to modify' This typically includes
116+
// invalidating cache entries for this address in all other cache tiers, and an
117+
// exclusive access intent.
118+
//
119+
// Incorrect or gratuitous use of this function can degrade performance. As this
120+
// function can invalidate cached cachelines on other caches and computer cores,
121+
// incorrect usage of this function can have an even greater negative impact
122+
// than incorrect regular prefetches.
123+
// Use this function only when representative benchmarks show an improvement.
124+
//
125+
// Example:
126+
//
127+
// void* Arena::Allocate(size_t size) {
128+
// void* ptr = AllocateBlock(size);
129+
// absl::PrefetchToLocalCacheForWrite(p);
130+
// return ptr;
131+
// }
132+
//
133+
void PrefetchToLocalCacheforWrite(const void* addr);
134+
135+
#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
136+
137+
#define ABSL_HAVE_PREFETCH 1
138+
139+
// See __builtin_prefetch:
140+
// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
141+
//
142+
inline void PrefetchToLocalCache(const void* addr) {
143+
__builtin_prefetch(addr, 0, 3);
144+
}
145+
146+
inline void PrefetchToLocalCacheNta(const void* addr) {
147+
__builtin_prefetch(addr, 0, 0);
148+
}
149+
150+
inline void PrefetchToLocalCacheForWrite(const void* addr) {
151+
// [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
152+
// unless -march=broadwell or newer; this is not generally the default, so we
153+
// manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
154+
// processors and has been present on AMD processors since the K6-2.
155+
#if defined(__x86_64__)
156+
asm("prefetchw (%0)" : : "r"(addr));
157+
#else
158+
__builtin_prefetch(addr, 1, 0);
159+
#endif
160+
}
161+
162+
#elif defined(ABSL_INTERNAL_HAVE_SSE)
163+
164+
#define ABSL_HAVE_PREFETCH 1
165+
166+
inline void PrefetchToLocalCache(const void* addr) {
167+
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
168+
}
169+
170+
inline void PrefetchToLocalCacheNta(const void* addr) {
171+
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
172+
}
173+
174+
inline void PrefetchToLocalCacheForWrite(const void* addr) {
175+
#if defined(_MM_HINT_ET0)
176+
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
177+
#elif defined(__x86_64__)
178+
// _MM_HINT_ET0 is not universally supported. As we commented further
179+
// up, PREFETCHW is recognized as a no-op on older Intel processors
180+
// and has been present on AMD processors since the K6-2
181+
asm("prefetchw (%0)" : : "r"(addr));
182+
#endif
183+
}
184+
185+
#else
186+
187+
inline void PrefetchToLocalCache(const void* addr) {}
188+
inline void PrefetchToLocalCacheNta(const void* addr) {}
189+
inline void PrefetchToLocalCacheForWrite(const void* addr) {}
190+
191+
#endif
192+
193+
ABSL_NAMESPACE_END
194+
} // namespace absl
195+
196+
#endif // ABSL_BASE_PREFETCH_H_

absl/base/prefetch_test.cc

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2023 The Abseil Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "absl/base/prefetch.h"
16+
17+
#include <memory>
18+
19+
#include "gtest/gtest.h"
20+
21+
namespace {
22+
23+
// Below tests exercise the functions only to guarantee they compile and execute
24+
// correctly. We make no attempt at verifying any prefetch instructions being
25+
// generated and executed: we assume the various implementation in terms of
26+
// __builtin_prefetch() or x86 intrinsics to be correct and well tested.
27+
28+
TEST(PrefetchTest, PrefetchToLocalCache_StackA) {
29+
char buf[100] = {};
30+
absl::PrefetchToLocalCache(buf);
31+
absl::PrefetchToLocalCacheNta(buf);
32+
absl::PrefetchToLocalCacheForWrite(buf);
33+
}
34+
35+
TEST(PrefetchTest, PrefetchToLocalCache_Heap) {
36+
auto memory = std::make_unique<char[]>(200 << 10);
37+
memset(memory.get(), 0, 200 << 10);
38+
absl::PrefetchToLocalCache(memory.get());
39+
absl::PrefetchToLocalCacheNta(memory.get());
40+
absl::PrefetchToLocalCacheForWrite(memory.get());
41+
absl::PrefetchToLocalCache(memory.get() + (50 << 10));
42+
absl::PrefetchToLocalCacheNta(memory.get() + (50 << 10));
43+
absl::PrefetchToLocalCacheForWrite(memory.get() + (50 << 10));
44+
absl::PrefetchToLocalCache(memory.get() + (100 << 10));
45+
absl::PrefetchToLocalCacheNta(memory.get() + (100 << 10));
46+
absl::PrefetchToLocalCacheForWrite(memory.get() + (100 << 10));
47+
absl::PrefetchToLocalCache(memory.get() + (150 << 10));
48+
absl::PrefetchToLocalCacheNta(memory.get() + (150 << 10));
49+
absl::PrefetchToLocalCacheForWrite(memory.get() + (150 << 10));
50+
}
51+
52+
TEST(PrefetchTest, PrefetchToLocalCache_Nullptr) {
53+
absl::PrefetchToLocalCache(nullptr);
54+
absl::PrefetchToLocalCacheNta(nullptr);
55+
absl::PrefetchToLocalCacheForWrite(nullptr);
56+
}
57+
58+
TEST(PrefetchTest, PrefetchToLocalCache_InvalidPtr) {
59+
absl::PrefetchToLocalCache(reinterpret_cast<const void*>(0x785326532L));
60+
absl::PrefetchToLocalCacheNta(reinterpret_cast<const void*>(0x785326532L));
61+
absl::PrefetchToLocalCacheForWrite(reinterpret_cast<const void*>(0x78532L));
62+
}
63+
64+
} // namespace

0 commit comments

Comments
 (0)