|
| 1 | +// Copyright 2023 The Abseil Authors |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | +// |
| 15 | +// ----------------------------------------------------------------------------- |
| 16 | +// File: prefetch.h |
| 17 | +// ----------------------------------------------------------------------------- |
| 18 | +// |
| 19 | +// This header file defines prefetch functions to prefetch memory contents |
| 20 | +// into the first level cache (L1) for the current CPU. The prefetch logic |
| 21 | +// offered in this header is limited to prefetching first level cachelines |
| 22 | +// only, and is aimed at relatively 'simple' prefetching logic. |
| 23 | +// |
| 24 | +#ifndef ABSL_BASE_PREFETCH_H_ |
| 25 | +#define ABSL_BASE_PREFETCH_H_ |
| 26 | + |
| 27 | +#include "absl/base/config.h" |
| 28 | + |
| 29 | +#if defined(ABSL_INTERNAL_HAVE_SSE) |
| 30 | +#include <xmmintrin.h> |
| 31 | +#endif |
| 32 | + |
| 33 | +#if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE) |
| 34 | +#include <intrin.h> |
| 35 | +#pragma intrinsic(_mm_prefetch) |
| 36 | +#endif |
| 37 | + |
| 38 | +namespace absl { |
| 39 | +ABSL_NAMESPACE_BEGIN |
| 40 | + |
| 41 | +// Moves data into the L1 cache before it is read, or "prefetches" it. |
| 42 | +// |
| 43 | +// The value of `addr` is the address of the memory to prefetch. If |
| 44 | +// the target and compiler support it, data prefetch instructions are |
| 45 | +// generated. If the prefetch is done some time before the memory is |
| 46 | +// read, it may be in the cache by the time the read occurs. |
| 47 | +// |
| 48 | +// This method prefetches data with the highest degree of temporal locality; |
| 49 | +// data is prefetched where possible into all levels of the cache. |
| 50 | +// |
| 51 | +// Incorrect or gratuitous use of this function can degrade performance. |
| 52 | +// Use this function only when representative benchmarks show an improvement. |
| 53 | +// |
| 54 | +// Example: |
| 55 | +// |
| 56 | +// // Computes incremental checksum for `data`. |
| 57 | +// int ComputeChecksum(int sum, absl::string_view data); |
| 58 | +// |
| 59 | +// // Computes cumulative checksum for all values in `data` |
| 60 | +// int ComputeChecksum(absl::Span<const std::string> data) { |
| 61 | +// int sum = 0; |
| 62 | +// auto it = data.begin(); |
| 63 | +// auto pit = data.begin(); |
| 64 | +// auto end = data.end(); |
| 65 | +// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) { |
| 66 | +// absl::PrefetchToLocalCache(pit->data()); |
| 67 | +// } |
| 68 | +// for (; pit != end; ++pit, ++it) { |
| 69 | +// sum = ComputeChecksum(sum, *it); |
| 70 | +// absl::PrefetchToLocalCache(pit->data()); |
| 71 | +// } |
| 72 | +// for (; it != end; ++it) { |
| 73 | +// sum = ComputeChecksum(sum, *it); |
| 74 | +// } |
| 75 | +// return sum; |
| 76 | +// } |
| 77 | +// |
| 78 | +void PrefetchToLocalCache(const void* addr); |
| 79 | + |
| 80 | +// Moves data into the L1 cache before it is read, or "prefetches" it. |
| 81 | +// |
| 82 | +// This function is identical to `PrefetchToLocalCache()` except that it has |
| 83 | +// non-temporal locality: the fetched data should not be left in any of the |
| 84 | +// cache tiers. This is useful for cases where the data is used only once / |
| 85 | +// short term, for example, invoking a destructor on an object. |
| 86 | +// |
| 87 | +// Incorrect or gratuitous use of this function can degrade performance. |
| 88 | +// Use this function only when representative benchmarks show an improvement. |
| 89 | +// |
| 90 | +// Example: |
| 91 | +// |
| 92 | +// template <typename Iterator> |
| 93 | +// void DestroyPointers(Iterator begin, Iterator end) { |
| 94 | +// size_t distance = std::min(8U, bars.size()); |
| 95 | +// |
| 96 | +// int dist = 8; |
| 97 | +// auto prefetch_it = begin; |
| 98 | +// while (prefetch_it != end && --dist;) { |
| 99 | +// absl::PrefetchToLocalCacheNta(*prefetch_it++); |
| 100 | +// } |
| 101 | +// while (prefetch_it != end) { |
| 102 | +// delete *begin++; |
| 103 | +// absl::PrefetchToLocalCacheNta(*prefetch_it++); |
| 104 | +// } |
| 105 | +// while (begin != end) { |
| 106 | +// delete *begin++; |
| 107 | +// } |
| 108 | +// } |
| 109 | +// |
| 110 | +void PrefetchToLocalCacheNta(const void* addr); |
| 111 | + |
| 112 | +// Moves data into the L1 cache with the intent to modify it. |
| 113 | +// |
| 114 | +// This function is similar to `PrefetchToLocalCache()` except that it |
| 115 | +// prefetches cachelines with an 'intent to modify' This typically includes |
| 116 | +// invalidating cache entries for this address in all other cache tiers, and an |
| 117 | +// exclusive access intent. |
| 118 | +// |
| 119 | +// Incorrect or gratuitous use of this function can degrade performance. As this |
| 120 | +// function can invalidate cached cachelines on other caches and computer cores, |
| 121 | +// incorrect usage of this function can have an even greater negative impact |
| 122 | +// than incorrect regular prefetches. |
| 123 | +// Use this function only when representative benchmarks show an improvement. |
| 124 | +// |
| 125 | +// Example: |
| 126 | +// |
| 127 | +// void* Arena::Allocate(size_t size) { |
| 128 | +// void* ptr = AllocateBlock(size); |
| 129 | +// absl::PrefetchToLocalCacheForWrite(p); |
| 130 | +// return ptr; |
| 131 | +// } |
| 132 | +// |
| 133 | +void PrefetchToLocalCacheforWrite(const void* addr); |
| 134 | + |
| 135 | +#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) |
| 136 | + |
| 137 | +#define ABSL_HAVE_PREFETCH 1 |
| 138 | + |
| 139 | +// See __builtin_prefetch: |
| 140 | +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html. |
| 141 | +// |
| 142 | +inline void PrefetchToLocalCache(const void* addr) { |
| 143 | + __builtin_prefetch(addr, 0, 3); |
| 144 | +} |
| 145 | + |
| 146 | +inline void PrefetchToLocalCacheNta(const void* addr) { |
| 147 | + __builtin_prefetch(addr, 0, 0); |
| 148 | +} |
| 149 | + |
| 150 | +inline void PrefetchToLocalCacheForWrite(const void* addr) { |
| 151 | + // [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1) |
| 152 | + // unless -march=broadwell or newer; this is not generally the default, so we |
| 153 | + // manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel |
| 154 | + // processors and has been present on AMD processors since the K6-2. |
| 155 | +#if defined(__x86_64__) |
| 156 | + asm("prefetchw (%0)" : : "r"(addr)); |
| 157 | +#else |
| 158 | + __builtin_prefetch(addr, 1, 0); |
| 159 | +#endif |
| 160 | +} |
| 161 | + |
| 162 | +#elif defined(ABSL_INTERNAL_HAVE_SSE) |
| 163 | + |
| 164 | +#define ABSL_HAVE_PREFETCH 1 |
| 165 | + |
| 166 | +inline void PrefetchToLocalCache(const void* addr) { |
| 167 | + _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0); |
| 168 | +} |
| 169 | + |
| 170 | +inline void PrefetchToLocalCacheNta(const void* addr) { |
| 171 | + _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA); |
| 172 | +} |
| 173 | + |
| 174 | +inline void PrefetchToLocalCacheForWrite(const void* addr) { |
| 175 | +#if defined(_MM_HINT_ET0) |
| 176 | + _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0); |
| 177 | +#elif defined(__x86_64__) |
| 178 | + // _MM_HINT_ET0 is not universally supported. As we commented further |
| 179 | + // up, PREFETCHW is recognized as a no-op on older Intel processors |
| 180 | + // and has been present on AMD processors since the K6-2 |
| 181 | + asm("prefetchw (%0)" : : "r"(addr)); |
| 182 | +#endif |
| 183 | +} |
| 184 | + |
| 185 | +#else |
| 186 | + |
| 187 | +inline void PrefetchToLocalCache(const void* addr) {} |
| 188 | +inline void PrefetchToLocalCacheNta(const void* addr) {} |
| 189 | +inline void PrefetchToLocalCacheForWrite(const void* addr) {} |
| 190 | + |
| 191 | +#endif |
| 192 | + |
| 193 | +ABSL_NAMESPACE_END |
| 194 | +} // namespace absl |
| 195 | + |
| 196 | +#endif // ABSL_BASE_PREFETCH_H_ |
0 commit comments