Skip to content

Commit ff5644b

Browse files
derekmaurocopybara-github
authored andcommitted
Allow Cord to store chunked checksums
PiperOrigin-RevId: 494587777 Change-Id: I41504edca6fcf750d52602fa84a33bc7fe5fbb48
1 parent 0b8e676 commit ff5644b

16 files changed

+544
-35
lines changed

CMake/AbseilDll.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ set(ABSL_INTERNAL_DLL_FILES
100100
"crc/internal/crc32_x86_arm_combined_simd.h"
101101
"crc/internal/crc.cc"
102102
"crc/internal/crc.h"
103+
"crc/internal/crc_cord_state.cc"
104+
"crc/internal/crc_cord_state.h"
103105
"crc/internal/crc_internal.h"
104106
"crc/internal/crc_x86_arm_combined.cc"
105107
"crc/internal/crc_memcpy_fallback.cc"

absl/crc/BUILD.bazel

+28
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,34 @@ cc_test(
163163
],
164164
)
165165

166+
cc_library(
167+
name = "crc_cord_state",
168+
srcs = ["internal/crc_cord_state.cc"],
169+
hdrs = ["internal/crc_cord_state.h"],
170+
copts = ABSL_DEFAULT_COPTS,
171+
linkopts = ABSL_DEFAULT_LINKOPTS,
172+
visibility = ["//absl/strings:__pkg__"],
173+
deps = [
174+
":crc32c",
175+
"//absl/base:config",
176+
"//absl/numeric:bits",
177+
"//absl/strings",
178+
],
179+
)
180+
181+
cc_test(
182+
name = "crc_cord_state_test",
183+
srcs = ["internal/crc_cord_state_test.cc"],
184+
copts = ABSL_TEST_COPTS,
185+
linkopts = ABSL_DEFAULT_LINKOPTS,
186+
visibility = ["//visibility:private"],
187+
deps = [
188+
":crc32c",
189+
":crc_cord_state",
190+
"@com_google_googletest//:gtest_main",
191+
],
192+
)
193+
166194
cc_binary(
167195
name = "crc32c_benchmark",
168196
testonly = 1,

absl/crc/CMakeLists.txt

+28
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,31 @@ absl_cc_test(
146146
absl::non_temporal_memcpy
147147
GTest::gtest_main
148148
)
149+
150+
absl_cc_library(
151+
NAME
152+
crc_cord_state
153+
HDRS
154+
"internal/crc_cord_state.h"
155+
SRCS
156+
"internal/crc_cord_state.cc"
157+
COPTS
158+
${ABSL_DEFAULT_COPTS}
159+
DEPS
160+
absl::crc32c
161+
absl::config
162+
absl::strings
163+
)
164+
165+
absl_cc_test(
166+
NAME
167+
crc_cord_state_test
168+
SRCS
169+
"internal/crc_cord_state_test.cc"
170+
COPTS
171+
${ABSL_DEFAULT_COPTS}
172+
DEPS
173+
absl::crc_cord_state
174+
absl::crc32c
175+
GTest::gtest_main
176+
)

absl/crc/internal/crc32_x86_arm_combined_simd.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
#define ABSL_CRC_INTERNAL_HAVE_X86_SIMD
3939

4040
#elif defined(__aarch64__) && defined(__LITTLE_ENDIAN__) && \
41-
defined(__ARM_FEATURE_CRC32) && defined(__ARM_NEON)
41+
defined(__ARM_FEATURE_CRC32) && defined(__ARM_NEON) && \
42+
defined(__ARM_FEATURE_CRYPTO)
4243

4344
#include <arm_acle.h>
4445
#include <arm_neon.h>
@@ -254,7 +255,7 @@ inline int64_t V128_Low64(const V128 l) {
254255
}
255256

256257
inline V128 V128_ShiftLeft64(const V128 l, const V128 r) {
257-
return vshlq_u64(l, r);
258+
return vshlq_u64(l, vreinterpretq_s64_u64(r));
258259
}
259260

260261
#endif

absl/crc/internal/crc_cord_state.cc

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2022 The Abseil Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "absl/crc/internal/crc_cord_state.h"
16+
17+
#include <cassert>
18+
19+
#include "absl/base/config.h"
20+
#include "absl/numeric/bits.h"
21+
22+
namespace absl {
23+
ABSL_NAMESPACE_BEGIN
24+
namespace crc_internal {
25+
26+
CrcCordState::RefcountedRep* CrcCordState::RefSharedEmptyRep() {
27+
static CrcCordState::RefcountedRep* empty = new CrcCordState::RefcountedRep;
28+
29+
assert(empty->count.load(std::memory_order_relaxed) >= 1);
30+
assert(empty->rep.removed_prefix.length == 0);
31+
assert(empty->rep.prefix_crc.empty());
32+
33+
Ref(empty);
34+
return empty;
35+
}
36+
37+
CrcCordState::CrcCordState() : refcounted_rep_(new RefcountedRep) {}
38+
39+
CrcCordState::CrcCordState(const CrcCordState& other)
40+
: refcounted_rep_(other.refcounted_rep_) {
41+
Ref(refcounted_rep_);
42+
}
43+
44+
CrcCordState::CrcCordState(CrcCordState&& other)
45+
: refcounted_rep_(other.refcounted_rep_) {
46+
// Make `other` valid for use after move.
47+
other.refcounted_rep_ = RefSharedEmptyRep();
48+
}
49+
50+
CrcCordState& CrcCordState::operator=(const CrcCordState& other) {
51+
if (this != &other) {
52+
Unref(refcounted_rep_);
53+
refcounted_rep_ = other.refcounted_rep_;
54+
Ref(refcounted_rep_);
55+
}
56+
return *this;
57+
}
58+
59+
CrcCordState& CrcCordState::operator=(CrcCordState&& other) {
60+
if (this != &other) {
61+
Unref(refcounted_rep_);
62+
refcounted_rep_ = other.refcounted_rep_;
63+
// Make `other` valid for use after move.
64+
other.refcounted_rep_ = RefSharedEmptyRep();
65+
}
66+
return *this;
67+
}
68+
69+
CrcCordState::~CrcCordState() {
70+
Unref(refcounted_rep_);
71+
}
72+
73+
crc32c_t CrcCordState::Checksum() const {
74+
if (rep().prefix_crc.empty()) {
75+
return absl::crc32c_t{0};
76+
}
77+
if (IsNormalized()) {
78+
return rep().prefix_crc.back().crc;
79+
}
80+
return absl::RemoveCrc32cPrefix(
81+
rep().removed_prefix.crc, rep().prefix_crc.back().crc,
82+
rep().prefix_crc.back().length - rep().removed_prefix.length);
83+
}
84+
85+
CrcCordState::PrefixCrc CrcCordState::NormalizedPrefixCrcAtNthChunk(
86+
size_t n) const {
87+
assert(n < NumChunks());
88+
if (IsNormalized()) {
89+
return rep().prefix_crc[n];
90+
}
91+
size_t length = rep().prefix_crc[n].length - rep().removed_prefix.length;
92+
return PrefixCrc(length,
93+
absl::RemoveCrc32cPrefix(rep().removed_prefix.crc,
94+
rep().prefix_crc[n].crc, length));
95+
}
96+
97+
void CrcCordState::Normalize() {
98+
if (IsNormalized() || rep().prefix_crc.empty()) {
99+
return;
100+
}
101+
102+
Rep* r = mutable_rep();
103+
for (auto& prefix_crc : r->prefix_crc) {
104+
size_t remaining = prefix_crc.length - r->removed_prefix.length;
105+
prefix_crc.crc = absl::RemoveCrc32cPrefix(r->removed_prefix.crc,
106+
prefix_crc.crc, remaining);
107+
prefix_crc.length = remaining;
108+
}
109+
r->removed_prefix = PrefixCrc();
110+
}
111+
112+
void CrcCordState::Poison() {
113+
Rep* rep = mutable_rep();
114+
if (NumChunks() > 0) {
115+
for (auto& prefix_crc : rep->prefix_crc) {
116+
// This is basically CRC32::Scramble().
117+
uint32_t crc = static_cast<uint32_t>(prefix_crc.crc);
118+
crc += 0x2e76e41b;
119+
crc = absl::rotr(crc, 17);
120+
prefix_crc.crc = crc32c_t{crc};
121+
}
122+
} else {
123+
// Add a fake corrupt chunk.
124+
rep->prefix_crc.push_back(PrefixCrc(0, crc32c_t{1}));
125+
}
126+
}
127+
128+
} // namespace crc_internal
129+
ABSL_NAMESPACE_END
130+
} // namespace absl

absl/crc/internal/crc_cord_state.h

+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// Copyright 2022 The Abseil Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
16+
#define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
17+
18+
#include <atomic>
19+
#include <cstddef>
20+
#include <deque>
21+
22+
#include "absl/base/config.h"
23+
#include "absl/crc/crc32c.h"
24+
25+
namespace absl {
26+
ABSL_NAMESPACE_BEGIN
27+
namespace crc_internal {
28+
29+
// CrcCordState is a copy-on-write class that holds the chunked CRC32C data
30+
// that allows CrcCord to perform efficient substring operations. CrcCordState
31+
// is used as a member variable in CrcCord. When a CrcCord is converted to a
32+
// Cord, the CrcCordState is shallow-copied into the root node of the Cord. If
33+
// the converted Cord is modified outside of CrcCord, the CrcCordState is
34+
// discarded from the Cord. If the Cord is converted back to a CrcCord, and the
35+
// Cord is still carrying the CrcCordState in its root node, the CrcCord can
36+
// re-use the CrcCordState, making the construction of the CrcCord cheap.
37+
//
38+
// CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires
39+
// knowledge of how CrcCordState represents the CRC32C state). It does
40+
// encapsulate the copy-on-write nature of the state.
41+
class CrcCordState {
42+
public:
43+
// Constructors.
44+
CrcCordState();
45+
CrcCordState(const CrcCordState&);
46+
CrcCordState(CrcCordState&&);
47+
48+
// Destructor. Atomically unreferences the data.
49+
~CrcCordState();
50+
51+
// Copy and move operators.
52+
CrcCordState& operator=(const CrcCordState&);
53+
CrcCordState& operator=(CrcCordState&&);
54+
55+
// A (length, crc) pair.
56+
struct PrefixCrc {
57+
PrefixCrc() = default;
58+
PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg)
59+
: length(length_arg), crc(crc_arg) {}
60+
61+
size_t length = 0;
62+
63+
// TODO(absl-team): Memory stomping often zeros out memory. If this struct
64+
// gets overwritten, we could end up with {0, 0}, which is the correct CRC
65+
// for a string of length 0. Consider storing a scrambled value and
66+
// unscrambling it before verifying it.
67+
absl::crc32c_t crc = absl::crc32c_t{0};
68+
};
69+
70+
// The representation of the chunked CRC32C data.
71+
struct Rep {
72+
// `removed_prefix` is the crc and length of any prefix that has been
73+
// removed from the Cord (for example, by calling
74+
// `CrcCord::RemovePrefix()`). To get the checkum of any prefix of the cord,
75+
// this value must be subtracted from `prefix_crc`. See `Checksum()` for an
76+
// example.
77+
//
78+
// CrcCordState is said to be "normalized" if removed_prefix.length == 0.
79+
PrefixCrc removed_prefix;
80+
81+
// A deque of (length, crc) pairs, representing length and crc of a prefix
82+
// of the Cord, before removed_prefix has been subtracted. The lengths of
83+
// the prefixes are stored in increasing order. If the Cord is not empty,
84+
// the last value in deque is the contains the CRC32C of the entire Cord
85+
// when removed_prefix is subtracted from it.
86+
std::deque<PrefixCrc> prefix_crc;
87+
};
88+
89+
// Returns a reference to the representation of the chunked CRC32C data.
90+
const Rep& rep() const { return refcounted_rep_->rep; }
91+
92+
// Returns a mutable reference to the representation of the chunked CRC32C
93+
// data. Calling this function will copy the data if another instance also
94+
// holds a reference to the data, so it is important to call rep() instead if
95+
// the data may not be mutated.
96+
Rep* mutable_rep() {
97+
if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) {
98+
RefcountedRep* copy = new RefcountedRep;
99+
copy->rep = refcounted_rep_->rep;
100+
Unref(refcounted_rep_);
101+
refcounted_rep_ = copy;
102+
}
103+
return &refcounted_rep_->rep;
104+
}
105+
106+
// Returns the CRC32C of the entire Cord.
107+
absl::crc32c_t Checksum() const;
108+
109+
// Returns true if the chunked CRC32C cached is normalized.
110+
bool IsNormalized() const { return rep().removed_prefix.length == 0; }
111+
112+
// Normalizes the chunked CRC32C checksum cache by substracting any removed
113+
// prefix from the chunks.
114+
void Normalize();
115+
116+
// Returns the number of cached chunks.
117+
size_t NumChunks() const { return rep().prefix_crc.size(); }
118+
119+
// Helper that returns the (length, crc) of the `n`-th cached chunked.
120+
PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const;
121+
122+
// Poisons all chunks to so that Checksum() will likely be incorrect with high
123+
// probability.
124+
void Poison();
125+
126+
private:
127+
struct RefcountedRep {
128+
std::atomic<int32_t> count{1};
129+
Rep rep;
130+
};
131+
132+
// Adds a reference to the shared global empty `RefcountedRep`, and returns a
133+
// pointer to the `RefcountedRep`. This is an optimization to avoid unneeded
134+
// allocations when the allocation is unlikely to ever be used. The returned
135+
// pointer can be `Unref()`ed when it is no longer needed. Since the returned
136+
// instance will always have a reference counter greater than 1, attempts to
137+
// modify it (by calling `mutable_rep()`) will create a new unshared copy.
138+
static RefcountedRep* RefSharedEmptyRep();
139+
140+
static void Ref(RefcountedRep* r) {
141+
assert(r != nullptr);
142+
r->count.fetch_add(1, std::memory_order_relaxed);
143+
}
144+
145+
static void Unref(RefcountedRep* r) {
146+
assert(r != nullptr);
147+
if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
148+
delete r;
149+
}
150+
}
151+
152+
RefcountedRep* refcounted_rep_;
153+
};
154+
155+
} // namespace crc_internal
156+
ABSL_NAMESPACE_END
157+
} // namespace absl
158+
159+
#endif // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_

0 commit comments

Comments
 (0)