diff --git a/src/google/protobuf/btree_split.cc b/src/google/protobuf/btree_split.cc new file mode 100644 index 0000000000000..67bc15bebc8d6 --- /dev/null +++ b/src/google/protobuf/btree_split.cc @@ -0,0 +1,14 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "google/protobuf/btree_split.h" + +namespace google { +namespace protobuf { +namespace internal {} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/btree_split.h b/src/google/protobuf/btree_split.h new file mode 100644 index 0000000000000..9b9aefd1af629 --- /dev/null +++ b/src/google/protobuf/btree_split.h @@ -0,0 +1,464 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef GOOGLE_PROTOBUF_BTREE_SPLIT_H__ +#define GOOGLE_PROTOBUF_BTREE_SPLIT_H__ + +#include +#include +#include +#include +#include + +#include "absl/log/absl_check.h" +#include "absl/types/span.h" +#include "google/protobuf/arena.h" + +// Must be included last. +#include "google/protobuf/port_def.inc" + +namespace google { +namespace protobuf { +namespace internal { + +// DO NOT SUBMIT: Document addressing scheme. + +class BtreeSplitAddress { + public: + explicit constexpr BtreeSplitAddress(uint32_t v) : bits_(v) {} + + constexpr uint32_t bits() const { return bits_; } + constexpr uint32_t ConsumeBits(size_t n_bits) { + uint32_t res = bits_ & ((uint32_t{1} << n_bits) - 1); + bits_ >>= n_bits; + return res; + } + + static constexpr uint32_t CalculateBits(absl::Span steps, + size_t chunk_offset); + + // private: + uint32_t bits_; +}; + +template +class BtreeSplitTypedAddress : public BtreeSplitAddress { + public: + using value_type = T; + + constexpr uint32_t node_offset() const { + const uint32_t b = bits(); + // DO NOT SUBMIT: Use constants + const uint32_t num_hops = b & 3; + return b >> ((num_hops + 1) * 3); + } + + private: + friend BtreeSplitAddress; + using BtreeSplitAddress::BtreeSplitAddress; +}; + +class BtreeSplit { + public: + static constexpr size_t kMaxHopsBits = 3; + static constexpr size_t kChunkPointersBits = 3; + static constexpr size_t kChunkOffsetBits = 6; + static constexpr size_t kMaxDepth = + (28 - kMaxHopsBits - kChunkOffsetBits) / kChunkPointersBits; + + struct Node; + + class NodePtr { + public: + constexpr NodePtr() = default; + explicit constexpr NodePtr(void* node) : ptr_(node) {} + + void* Mutable(size_t offset, Arena* arena, const Node* default_node) { + EnsureAllocated(arena, default_node); + return node()->Mutable(offset); + } + + const void* Get(size_t offset) const { + ABSL_DCHECK_NE(node(), nullptr); + ABSL_DCHECK_LT(offset, sizeof(Node)); + return node()->Get(offset); + } + + NodePtr& sub_mutable(size_t n, Arena* arena, const Node* default_node) { + EnsureAllocated(arena, default_node); + return node()->slots[n].subnode; + } + + const Node* sub(size_t n) const { return node()->sub(n); } + + Node* node() const { return reinterpret_cast(ptr_); } + + private: + void EnsureAllocated(Arena* arena, const Node* default_node) & { + if (ptr_ == default_node) { + ptr_ = Arena::Create(arena, *node()); + } + } + + void* ptr_; + }; + + struct Node { + union Bytes4 { + constexpr Bytes4() = default; + constexpr Bytes4(uint32_t u32) : u32(u32) {} + constexpr Bytes4(int32_t i32) : i32(i32) {} + constexpr Bytes4(float f) : f(f) {} + constexpr Bytes4(bool b0, bool b1, bool b2, bool b3) + : b{b0, b1, b2, b3} {} + + uint32_t u32; + int32_t i32; + float f; + bool b[4]; + }; + union Bytes8 { + constexpr Bytes8() = default; + constexpr Bytes8(const Node* subnode) + : subnode(const_cast(subnode)) {} + constexpr Bytes8(std::nullptr_t) : ptr() {} + constexpr Bytes8(const void* ptr) : ptr(const_cast(ptr)) {} + constexpr Bytes8(uint64_t u64) : u64(u64) {} + constexpr Bytes8(int64_t i64) : i64(i64) {} + constexpr Bytes8(double d) : d(d) {} + constexpr Bytes8(Bytes4 b4_0, Bytes4 b4_1) : b4{b4_0, b4_1} {} + + NodePtr subnode; + void* ptr; + uint64_t u64; + int64_t i64; + double d; + Bytes4 b4[2]; + }; + + constexpr Node() = default; + constexpr Node(Bytes8 b0, Bytes8 b1 = {}, Bytes8 b2 = {}, Bytes8 b3 = {}, + Bytes8 b4 = {}, Bytes8 b5 = {}, Bytes8 b6 = {}, + Bytes8 b7 = {}) + : slots{b0, b1, b2, b3, b4, b5, b6, b7} {} + + template + const T& Get(BtreeSplitTypedAddress address) const { + return *reinterpret_cast(Get(address.node_offset())); + } + const void* Get(size_t offset) const { + return reinterpret_cast(this) + offset; + } + template + T& Mutable(BtreeSplitTypedAddress address) { + return *reinterpret_cast(Mutable(address.node_offset())); + } + void* Mutable(size_t offset) { + return reinterpret_cast(this) + offset; + } + + const Node* sub(size_t n) const { return slots[n].subnode.node(); } + Node* sub(size_t n) { return slots[n].subnode.node(); } + + Bytes8 slots[8]; + }; + + struct ConstNodeWithDefault { + const Node* value; + const Node* default_node; + + ConstNodeWithDefault sub(size_t n) { + return {value->sub(n), default_node->sub(n)}; + } + + bool is_default() const { return value == default_node; } + }; + + struct NodeWithDefault { + Node* value; + const Node* default_node; + + NodeWithDefault sub(size_t n) { + return {value->sub(n), default_node->sub(n)}; + } + + bool is_default() const { return value == default_node; } + }; + + explicit constexpr BtreeSplit(const void* head) + : head_(const_cast(head)) {} + + Node* head() { return head_.node(); } + const Node* head() const { return head_.node(); } + + template + T& AssumeMutable(BtreeSplitTypedAddress address) { + return AssumeMutable(BtreeSplitAddress(address)); + } + + template + T& AssumeMutable(BtreeSplitAddress address) { + return *static_cast(AssumeMutableImpl(address)); + } + + template + T* TryMutable(BtreeSplitTypedAddress address, const Node* default_node) { + return TryMutable(BtreeSplitAddress(address), default_node); + } + + template + T* TryMutable(BtreeSplitAddress address, const Node* default_node) { + return static_cast(TryMutableImpl(address, default_node)); + } + + template + const T& Get(BtreeSplitTypedAddress address) const { + return Get(BtreeSplitAddress(address)); + } + + template + static constexpr auto GetOffsetSequence(std::index_sequence seq) { + if constexpr (num_hops == 0) { + return seq; + } else { + constexpr uint32_t mask = ((uint32_t{1} << kChunkPointersBits) - 1); + return GetOffsetSequence> kChunkPointersBits)>( + std::index_sequence{}); + } + } + + // DO NOT SUBMIT: Document + template + static constexpr auto IntoOffsets() { + return GetOffsetSequence< + BtreeSplitAddress(address).ConsumeBits(kMaxHopsBits), + (address.bits() >> kMaxHopsBits), address.node_offset()>({}); + } + + template + PROTOBUF_ALWAYS_INLINE const void* GetImpl( + std::index_sequence) const { + const Node* it = head_.node(); + ((it = it->sub(chunks)), ...); + return it->Get(offset); + } + + // Template version of the function to move the address decoding to constant + // evaluation. This makes it easier for the optimizer. + template + PROTOBUF_ALWAYS_INLINE const decltype(address)::value_type& Get() const { + constexpr auto bits = IntoOffsets
(); + return *static_cast(GetImpl(bits)); + } + + template + const T& Get(BtreeSplitAddress address) const { + return *static_cast(GetImpl(address)); + } + + const void* Get(BtreeSplitAddress address) const { return GetImpl(address); } + + template + T& Mutable(BtreeSplitTypedAddress address, Msg* msg, + const Node* default_node) { + // This one is for codegen, so we have a fastpath for when it does not need + // to allocate where we can inline the accesses. + if (T* p = TryMutable(address, default_node)) { + return *p; + } + return *static_cast( + Mutable(BtreeSplitAddress(address), msg, default_node)); + } + + template + void* Mutable(BtreeSplitAddress address, Msg* msg, const Node* default_node) { + return MutableImplNoInline(address, msg, default_node); + } + + // DO NOT SUBMIT: fix comment. + // Helper functions to mark as NOINLINE to reduce code bloat in generated + // code. + // + template + void SetPrimitive(BtreeSplitTypedAddress address, Arena* arena, + std::enable_if_t value, const Node* default_node) { + Mutable(address, arena, default_node) = value; + } + + // Assign function that traverses both splits at the same time. + template + PROTOBUF_NOINLINE void AssignFrom(BtreeSplitTypedAddress address, + Arena* arena, const BtreeSplit& from, + const Node* default_node) { + NodePtr* self_it = &head_; + const Node* from_it = from.head(); + + uint32_t num_hops = address.ConsumeBits(kMaxHopsBits); + while (num_hops-- > 0) { + const uint32_t slot = address.ConsumeBits(kChunkPointersBits); + self_it = &self_it->sub_mutable(slot, arena, default_node); + from_it = from_it->sub(slot); + default_node = default_node->sub(slot); + } + + *static_cast(self_it->Mutable(address.bits(), arena, default_node)) = + *static_cast(from_it->Get(address.bits())); + } + + template + PROTOBUF_ALWAYS_INLINE void ClearIfNotDefault( + BtreeSplitTypedAddress address, const Node* default_node) { + if (auto* p = TryMutable(address, default_node)) p->ClearIfNotDefault(); + } + template + PROTOBUF_ALWAYS_INLINE void ClearToEmpty(BtreeSplitTypedAddress address, + const Node* default_node) { + if (auto* p = TryMutable(address, default_node)) p->ClearToEmpty(); + } + template + PROTOBUF_ALWAYS_INLINE void ClearNonDefaultToEmpty( + BtreeSplitTypedAddress address) { + if (auto* p = TryMutable(address)) p->ClearNonDefaultToEmpty(); + } + template + PROTOBUF_NOINLINE void MessageCopyConstruct( + BtreeSplitTypedAddress address, const BtreeSplit& from, + Arena* arena) { + *static_cast(MutableImpl(address, arena)) = + MessageLite::CopyConstruct(arena, *from.Get(address)); + } + template + PROTOBUF_NOINLINE T& RawPtrConstructIfNeeded( + BtreeSplitTypedAddress address, + std::conditional_t msg, + const Node* default_node) { + auto& value = Mutable(address, msg, default_node); + if (value.IsDefault()) { + value.Set(Arena::Create(msg->GetArena())); + } + return value; + } + // This one does not allocate memory, so we don't mark it NOINLINE. It is + // pretty short when the address is a compile time constant. + template + void SetPrimitiveAssumeMutable(BtreeSplitTypedAddress address, + std::enable_if_t value) { + AssumeMutable(address) = value; + } + + template + void SetPrimitiveIfMutable(BtreeSplitTypedAddress address, + std::enable_if_t value, + const Node* default_node) { + if (auto* p = TryMutable(address, default_node)) *p = value; + } + + private: + void* AssumeMutableImpl(BtreeSplitAddress address) { + const Node* it = head_.node(); + + uint32_t num_hops = address.ConsumeBits(kMaxHopsBits); + while (num_hops-- > 0) { + it = it->sub(address.ConsumeBits(kChunkPointersBits)); + } + + void* res = const_cast(it->Get(address.bits())); + PROTOBUF_ASSUME(res != nullptr); + return res; + } + + void* TryMutableImpl(BtreeSplitAddress address, const Node* default_node) { + const Node* it = head_.node(); + if (it == default_node) return nullptr; + + uint32_t num_hops = address.ConsumeBits(kMaxHopsBits); + while (num_hops-- > 0) { + const uint32_t slot = address.ConsumeBits(kChunkPointersBits); + it = it->sub(slot); + default_node = default_node->sub(slot); + if (it == default_node) return nullptr; + } + + void* res = const_cast(it->Get(address.bits())); + PROTOBUF_ASSUME(res != nullptr); + return res; + } + + template + auto ResolveArena(Arena* arena) { + if constexpr (std::is_same_v) { + return arena; + } else { + return arena->GetArena(); + } + } + + void* MutableImpl(BtreeSplitAddress address, Arena* arena, + const Node* default_node) { + NodePtr* it = &head_; + + uint32_t num_hops = address.ConsumeBits(kMaxHopsBits); + while (num_hops-- > 0) { + const uint32_t slot = address.ConsumeBits(kChunkPointersBits); + it = &it->sub_mutable(slot, arena, default_node); + default_node = default_node->sub(slot); + } + return it->Mutable(address.bits(), arena, default_node); + } + + template + PROTOBUF_NOINLINE void* MutableImplNoInline(BtreeSplitAddress address, + const Msg* msg, + const Node* default_node) { + return MutableImpl(address, msg->GetArena(), default_node); + } + + PROTOBUF_NOINLINE void* MutableImplNoInline(BtreeSplitAddress address, + Arena* arena, + const Node* default_node) { + return MutableImpl(address, arena, default_node); + } + + const void* GetImpl(BtreeSplitAddress address) const { + const Node* it = head_.node(); + uint32_t num_hops = address.ConsumeBits(kMaxHopsBits); + while (num_hops-- > 0) { + it = it->sub(address.ConsumeBits(kChunkPointersBits)); + } + return it->Get(address.bits()); + } + + NodePtr head_; +}; + +constexpr uint32_t BtreeSplitAddress::CalculateBits( + absl::Span steps, size_t chunk_offset) { + uint32_t result = steps.size(); + int offset = BtreeSplit::kMaxHopsBits; + for (size_t step : steps) { + result |= step << offset; + offset += BtreeSplit::kChunkPointersBits; + } + result |= chunk_offset << offset; + + BtreeSplitAddress address(result); + ABSL_CHECK_EQ(steps.size(), address.ConsumeBits(BtreeSplit::kMaxHopsBits)); + for (size_t step : steps) { + ABSL_CHECK_EQ(step, address.ConsumeBits(BtreeSplit::kChunkPointersBits)); + } + ABSL_CHECK_EQ(chunk_offset, address.bits()); + + return result; +} + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include "google/protobuf/port_undef.inc" + +#endif // GOOGLE_PROTOBUF_BTREE_SPLIT_H__ diff --git a/src/google/protobuf/btree_split_test.cc b/src/google/protobuf/btree_split_test.cc new file mode 100644 index 0000000000000..a3101d4cc30ad --- /dev/null +++ b/src/google/protobuf/btree_split_test.cc @@ -0,0 +1,139 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "google/protobuf/btree_split.h" + +#include + +#include +#include + +namespace google { +namespace protobuf { +namespace internal { +namespace { + +using testing::FieldsAre; + +constexpr auto n1 = BtreeSplit::Node{ + {5, {true, 0, 0, 0}}, +}; +static_assert(n1.slots[0].b4[0].i32 == 5); +static_assert(n1.slots[0].b4[1].b[0] == true); + +constexpr auto n2 = BtreeSplit::Node{ + uint64_t{111}, + {-1, {true, false, 0, 0}}, +}; + +static_assert(n2.slots[0].u64 == 111); +static_assert(n2.slots[1].b4[0].i32 == -1); +static_assert(n2.slots[1].b4[1].b[0] == true); +static_assert(n2.slots[1].b4[1].b[1] == false); + +constexpr auto n3 = BtreeSplit::Node{ + &n1, + &n2, + {13, -111}, + 1.3, +}; +static_assert(n3.slots[2].b4[0].i32 == 13); +static_assert(n3.slots[2].b4[1].i32 == -111); +static_assert(n3.slots[3].d == 1.3); + +constexpr auto a00 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{0}, 0)); +constexpr auto a01 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{0}, 4)); +constexpr auto a10 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{1}, 0)); +constexpr auto a11 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{1}, 8)); +constexpr auto a12 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{1}, 12)); +constexpr auto a13 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{1}, 13)); +constexpr auto a0 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{}, 16)); +constexpr auto a1 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{}, 20)); +constexpr auto a2 = BtreeSplitTypedAddress( + BtreeSplitAddress::CalculateBits(std::array{}, 24)); + +auto GatherValues(const BtreeSplit& split) { + return std::tuple(std::tuple(split.Get(a00), split.Get(a01)), + std::tuple(split.Get(a10), split.Get(a11), split.Get(a12), + split.Get(a13)), + split.Get(a0), split.Get(a1), split.Get(a2)); +} + +TEST(BtreeSplitTest, ReadOnly) { + BtreeSplit split(&n3); + + EXPECT_THAT(GatherValues(split), + FieldsAre(FieldsAre(5, true), // + FieldsAre(111, -1, true, false), // + 13, -111, 1.3)); +} + +TEST(BtreeSplitTest, MutableOneAtATime) { + Arena arena; + const auto* def = &n3; + BtreeSplit split(def); + ; + + EXPECT_THAT(GatherValues(split), + FieldsAre(FieldsAre(5, true), // + FieldsAre(111, -1, true, false), // + 13, -111, 1.3)); + + size_t used = arena.SpaceUsed(); + split.Mutable(a1, &arena, def) = 14; + // We only copied exactly one node + EXPECT_EQ(arena.SpaceUsed() - used, 64); + + EXPECT_THAT(GatherValues(split), + FieldsAre(FieldsAre(5, true), // + FieldsAre(111, -1, true, false), // + 13, /*changed*/ 14, 1.3)); + + used = arena.SpaceUsed(); + split.Mutable(a01, &arena, def) = false; + // We only copied exactly one node + EXPECT_EQ(arena.SpaceUsed() - used, 64); + + EXPECT_THAT(GatherValues(split), + FieldsAre(FieldsAre(5, /*changed*/ false), // + FieldsAre(111, -1, true, false), // + 13, 14, 1.3)); +} + +TEST(BtreeSplitTest, MutableMultiLevel) { + Arena arena; + const auto* def = &n3; + BtreeSplit split(def); + + EXPECT_THAT(GatherValues(split), + FieldsAre(FieldsAre(5, true), // + FieldsAre(111, -1, true, false), // + 13, -111, 1.3)); + + size_t used = arena.SpaceUsed(); + split.Mutable(a11, &arena, def) = -2; + // We should have copied two nodes. + EXPECT_EQ(arena.SpaceUsed() - used, 2 * 64); + + EXPECT_THAT(GatherValues(split), + FieldsAre(FieldsAre(5, true), // + FieldsAre(111, /*changed*/ -2, true, false), // + 13, -111, 1.3)); +} + +} // namespace +} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/compiler/cpp/field.cc b/src/google/protobuf/compiler/cpp/field.cc index 5819d3f23c6eb..a31eaf646a08f 100644 --- a/src/google/protobuf/compiler/cpp/field.cc +++ b/src/google/protobuf/compiler/cpp/field.cc @@ -46,6 +46,7 @@ using Sub = ::google::protobuf::io::Printer::Sub; std::vector FieldVars(const FieldDescriptor* field, const Options& opts) { bool split = ShouldSplit(field, opts); + auto split_address = absl::StrCat("Impl_::", SplitBtreeAddressName(field)); std::vector vars = { // This will eventually be renamed to "field", once the existing "field" // variable is replaced with "field_" everywhere. @@ -57,15 +58,12 @@ std::vector FieldVars(const FieldDescriptor* field, const Options& opts) { {"number", field->number()}, {"pkg.Msg.field", field->full_name()}, - {"field_", FieldMemberName(field, split)}, + {"split_address", split_address}, {"DeclaredType", DeclaredTypeMethodName(field->type())}, {"Oneof", field->real_containing_oneof() ? "Oneof" : ""}, {"Utf8", IsStrictUtf8String(field, opts) ? "Utf8" : "Raw"}, {"StrType", IsStrictUtf8String(field, opts) ? "String" : "Bytes"}, {"kTagBytes", WireFormat::TagSize(field->number(), field->type())}, - Sub("PrepareSplitMessageForWrite", - split ? "PrepareSplitMessageForWrite();" : "") - .WithSuffix(";"), Sub("DEPRECATED", DeprecatedAttribute(opts, field)).WithSuffix(" "), // These variables are placeholders to pick out the beginning and ends of @@ -93,6 +91,16 @@ std::vector FieldVars(const FieldDescriptor* field, const Options& opts) { .WithSuffix(";"), }; + if (split) { + vars.push_back( + {"field_", absl::StrCat("_impl_._split_.Get<", split_address, ">()")}); + vars.push_back({"mutable_field_", + absl::StrCat("MutableSplitField_(", split_address, ")")}); + } else { + vars.push_back({"field_", FieldMemberName(field, split)}); + vars.push_back({"mutable_field_", FieldMemberName(field, split)}); + } + if (const auto* oneof = field->containing_oneof()) { auto field_name = UnderscoresToCamelCase(field->name(), true); @@ -198,7 +206,7 @@ void FieldGeneratorBase::GenerateOneofCopyConstruct(io::Printer* p) const { ABSL_CHECK(!field_->is_extension()) << "Not supported"; ABSL_CHECK(!field_->is_repeated()) << "Not supported"; ABSL_CHECK(!field_->is_map()) << "Not supported"; - p->Emit("$field_$ = from.$field_$;\n"); + p->Emit("$mutable_field_$ = from.$field_$;\n"); } void FieldGeneratorBase::GenerateAggregateInitializer(io::Printer* p) const { @@ -232,7 +240,7 @@ void FieldGeneratorBase::GenerateCopyConstructorCode(io::Printer* p) const { // There is no copy constructor for the `Split` struct, so we need to copy // the value here. Formatter format(p, variables_); - format("$field_$ = from.$field_$;\n"); + format("$mutable_field_$ = from.$field_$;\n"); } } diff --git a/src/google/protobuf/compiler/cpp/field.h b/src/google/protobuf/compiler/cpp/field.h index af355e76bee6e..67ad09a54dc52 100644 --- a/src/google/protobuf/compiler/cpp/field.h +++ b/src/google/protobuf/compiler/cpp/field.h @@ -118,6 +118,14 @@ class FieldGeneratorBase { virtual std::vector MakeVars() const { return {}; } + virtual void GenerateSplitMemberTypeName(io::Printer* p) const { + ABSL_LOG(FATAL) << typeid(*this).name(); + } + + virtual void GenerateDefaultSplitValue(io::Printer* p) const { + ABSL_LOG(FATAL) << typeid(*this).name(); + } + virtual void GeneratePrivateMembers(io::Printer* p) const = 0; virtual void GenerateStaticMembers(io::Printer* p) const {} @@ -283,6 +291,16 @@ class FieldGenerator { return impl_->RequiresArena(function); } + void GenerateSplitMemberTypeName(io::Printer* p) const { + auto vars = PushVarsForCall(p); + impl_->GenerateSplitMemberTypeName(p); + } + + void GenerateDefaultSplitValue(io::Printer* p) const { + auto vars = PushVarsForCall(p); + impl_->GenerateDefaultSplitValue(p); + } + // Prints private members needed to represent this field. // // These are placed inside the class definition. @@ -509,7 +527,8 @@ class FieldGeneratorTable { void Build(const Options& options, const FieldLayout& field_layout); const FieldGenerator& get(const FieldDescriptor* field) const { - ABSL_CHECK_EQ(field->containing_type(), descriptor_); + ABSL_CHECK_EQ(field->containing_type(), descriptor_) + << field->full_name() << " -- " << descriptor_->full_name(); ABSL_DCHECK_GE(field->index(), 0); return fields_[static_cast(field->index())]; } diff --git a/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc b/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc index 861bd85fc3077..6a32a4fed6897 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/cord_field.cc @@ -210,7 +210,6 @@ void CordFieldGenerator::GenerateInlineAccessorDefinitions( p->Emit(R"cc( inline void $Msg$::set_$name$(const ::absl::Cord& value) { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; _internal_set_$name_internal$(value); $annotate_set$; // @@protoc_insertion_point(field_set:$full_name$) @@ -219,7 +218,6 @@ void CordFieldGenerator::GenerateInlineAccessorDefinitions( p->Emit(R"cc( inline void $Msg$::set_$name$(::absl::string_view value) { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; $set_hasbit$; $field_$ = value; $annotate_set$; diff --git a/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc b/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc index 590d0dc354957..5dabdaa4d8c63 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/enum_field.cc @@ -66,6 +66,14 @@ class SingularEnum : public FieldGeneratorBase { std::vector MakeVars() const override { return Vars(field_, *opts_); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc(int)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($kDefault$)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override { p->Emit(R"cc( int $name$_; @@ -73,21 +81,41 @@ class SingularEnum : public FieldGeneratorBase { } void GenerateMessageClearingCode(io::Printer* p) const override { - p->Emit(R"cc( - this_.$field_$ = $kDefault$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$ = $kDefault$; + )cc"); + } else { + p->Emit(R"cc( + this_.$field_$ = $kDefault$; + )cc"); + } } void GenerateClearingCode(io::Printer* p) const override { - p->Emit(R"cc( - $field_$ = $kDefault$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split$.SetPrimitiveIfMutable($split_address$, $kDefault$, + DefaultSplit_()); + )cc"); + } else { + p->Emit(R"cc( + $field_$ = $kDefault$; + )cc"); + } } void GenerateMergingCode(io::Printer* p) const override { - p->Emit(R"cc( - _this->$field_$ = from.$field_$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + _this->$split$.SetPrimitive($split_address$, _this, $split_field$, + DefaultSplit_()); + )cc"); + } else { + p->Emit(R"cc( + _this->$field_$ = from.$field_$; + )cc"); + } } void GenerateSwappingCode(io::Printer* p) const override { @@ -99,9 +127,16 @@ class SingularEnum : public FieldGeneratorBase { } void GenerateCopyConstructorCode(io::Printer* p) const override { - p->Emit(R"cc( - _this->$field_$ = from.$field_$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + _this->$split$.AssignFrom($split_address$, arena, from.$split$, + DefaultSplit_()); + )cc"); + } else { + p->Emit(R"cc( + _this->$field_$ = from.$field_$; + )cc"); + } } void GenerateSerializeWithCachedSizesToArray(io::Printer* p) const override { @@ -113,10 +148,17 @@ class SingularEnum : public FieldGeneratorBase { } void GenerateByteSize(io::Printer* p) const override { - p->Emit(R"cc( - total_size += $kTagBytes$ + - ::_pbi::WireFormatLite::EnumSize(this_._internal_$name$()); - )cc"); + if (should_split()) { + p->Emit(R"cc( + total_size += + $kTagBytes$ + ::_pbi::WireFormatLite::EnumSize($split_field$); + )cc"); + } else { + p->Emit(R"cc( + total_size += $kTagBytes$ + ::_pbi::WireFormatLite::EnumSize( + this_._internal_$name$()); + )cc"); + } } void GenerateConstexprAggregateInitializer(io::Printer* p) const override { @@ -180,13 +222,12 @@ void SingularEnum::GenerateInlineAccessorDefinitions(io::Printer* p) const { p->Emit(R"cc( inline void $Msg$::set_$name$($Enum$ value) { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; $assert_valid$; if ($not_has_field$) { clear_$oneof_name$(); set_has_$name_internal$(); } - $field_$ = value; + $mutable_field_$ = value; $annotate_set$; // @@protoc_insertion_point(field_set:$pkg.Msg.field$) } @@ -201,7 +242,6 @@ void SingularEnum::GenerateInlineAccessorDefinitions(io::Printer* p) const { p->Emit(R"cc( inline void $Msg$::set_$name$($Enum$ value) { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; _internal_set_$name_internal$(value); $set_hasbit$; $annotate_set$; @@ -214,7 +254,7 @@ void SingularEnum::GenerateInlineAccessorDefinitions(io::Printer* p) const { inline void $Msg$::_internal_set_$name_internal$($Enum$ value) { $TsanDetectConcurrentMutation$; $assert_valid$; - $field_$ = value; + $mutable_field_$ = value; } )cc"); } @@ -233,6 +273,14 @@ class RepeatedEnum : public FieldGeneratorBase { std::vector MakeVars() const override { return Vars(field_, *opts_); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($pbi$::RawPtr<$pb$::RepeatedField>)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($pbi$::kZeroBuffer)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( @@ -253,34 +301,33 @@ class RepeatedEnum : public FieldGeneratorBase { void GenerateMessageClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("this_.$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split_field$.ClearIfNotDefault(); + )cc"); } else { - p->Emit("$field_$.Clear();\n"); + p->Emit("this_.$field_$.Clear();\n"); } } void GenerateClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split$.ClearIfNotDefault($split_address$, DefaultSplit_()); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } } void GenerateMergingCode(io::Printer* p) const override { - // TODO: experiment with simplifying this to be - // `if (!from.empty()) { body(); }` for both split and non-split cases. - auto body = [&] { + if (should_split()) { p->Emit(R"cc( - _this->_internal_mutable_$name$()->MergeFrom(from._internal_$name$()); + _this->_internal_mutable_$name$()->MergeFrom(*$split_field$); )cc"); - }; - if (!should_split()) { - body(); } else { - p->Emit({{"body", body}}, R"cc( - if (!from.$field_$.IsDefault()) { - $body$; + p->Emit(R"cc( + if (auto& f = from._internal_$name$(); !f.empty()) { + _this->_internal_mutable_$name$()->MergeFrom(f); } )cc"); } @@ -296,7 +343,7 @@ class RepeatedEnum : public FieldGeneratorBase { void GenerateDestructorCode(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( - this_.$field_$.DeleteIfNotDefault(); + $split_field$.DeleteIfNotDefault(); )cc"); } } @@ -348,13 +395,7 @@ class RepeatedEnum : public FieldGeneratorBase { } void GenerateCopyConstructorCode(io::Printer* p) const override { - if (should_split()) { - p->Emit(R"cc( - if (!from._internal_$name$().empty()) { - _internal_mutable_$name$()->MergeFrom(from._internal_$name$()); - } - )cc"); - } + ABSL_LOG(FATAL); } void GenerateAccessorDeclarations(io::Printer* p) const override; @@ -504,11 +545,9 @@ void RepeatedEnum::GenerateInlineAccessorDefinitions(io::Printer* p) const { inline $pb$::RepeatedField* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { $TsanDetectConcurrentRead$; - $PrepareSplitMessageForWrite$; - if ($field_$.IsDefault()) { - $field_$.Set($pb$::Arena::Create<$pb$::RepeatedField>(GetArena())); - } - return $field_$.Get(); + return $split$ + .RawPtrConstructIfNeeded($split_address$, this, DefaultSplit_()) + .Get(); } )cc"); } else { @@ -572,16 +611,24 @@ void RepeatedEnum::GenerateSerializeWithCachedSizesToArray( } void RepeatedEnum::GenerateByteSize(io::Printer* p) const { + const auto value = [&] { + if (should_split()) { + p->Emit("(*$split_field$)"); + } else { + p->Emit("this_._internal_$name$()"); + } + }; if (has_cached_size_) { ABSL_CHECK(field_->is_packed()); - p->Emit(R"cc( + p->Emit({Sub{"value", value}.WithSuffix("")}, R"cc( total_size += ::_pbi::WireFormatLite::EnumSizeWithPackedTagSize( - this_._internal_$name$(), $kTagBytes$, this_.$cached_size_$); + $value$, $kTagBytes$, this_.$cached_size_$); )cc"); return; } p->Emit( { + Sub{"value", value}.WithSuffix(""), {"tag_size", [&] { if (field_->is_packed()) { @@ -593,15 +640,13 @@ void RepeatedEnum::GenerateByteSize(io::Printer* p) const { )cc"); } else { p->Emit(R"cc( - ::size_t{$kTagBytes$} * - ::_pbi::FromIntSize(this_._internal_$name$_size()); + ::size_t{$kTagBytes$} * ::_pbi::FromIntSize($value$.size()); )cc"); } }}, }, R"cc( - ::size_t data_size = - ::_pbi::WireFormatLite::EnumSize(this_._internal_$name$()); + ::size_t data_size = ::_pbi::WireFormatLite::EnumSize($value$); ::size_t tag_size = $tag_size$; total_size += data_size + tag_size; )cc"); diff --git a/src/google/protobuf/compiler/cpp/field_generators/map_field.cc b/src/google/protobuf/compiler/cpp/field_generators/map_field.cc index 753da708d8435..bf5debb0bf684 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/map_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/map_field.cc @@ -232,7 +232,6 @@ void Map::GenerateInlineAccessorDefinitions(io::Printer* p) const { )cc"); p->Emit(R"cc( inline $Map$* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { - $PrepareSplitMessageForWrite$; $TsanDetectConcurrentMutation$; return $field_$.MutableMap(); } diff --git a/src/google/protobuf/compiler/cpp/field_generators/message_field.cc b/src/google/protobuf/compiler/cpp/field_generators/message_field.cc index e182d8c13462f..f227d2814395b 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/message_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/message_field.cc @@ -91,7 +91,15 @@ class SingularMessage : public FieldGeneratorBase { ~SingularMessage() override = default; std::vector MakeVars() const override { - return Vars(field_, *opts_, is_weak(), is_weak()); + return Vars(field_, *opts_, is_weak(), should_split() || is_weak()); + } + + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($MemberType$*)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc(nullptr)cc"); } void GeneratePrivateMembers(io::Printer* p) const override { @@ -192,13 +200,30 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { )cc"); }}, }; + if (should_split()) { + // Split uses the hasbit to avoid loading memory that we don't need if + // unset, which is the expected case. + p->Emit(R"cc( + inline const $Submsg$& $Msg$::_internal_$name_internal$() const { + $TsanDetectConcurrentRead$; + $StrongRef$; + if (CheckHasBit($has_bits_array$, $has_mask$)) { + return *$cast_field_$; + } + return $kDefaultRef$; + } + )cc"); + } else { + p->Emit(R"cc( + inline const $Submsg$& $Msg$::_internal_$name_internal$() const { + $TsanDetectConcurrentRead$; + $StrongRef$; + const $Submsg$* p = $cast_field_$; + return p != nullptr ? *p : $kDefaultRef$; + } + )cc"); + } absl::string_view code = R"cc( - inline const $Submsg$& $Msg$::_internal_$name_internal$() const { - $TsanDetectConcurrentRead$; - $StrongRef$; - const $Submsg$* p = $cast_field_$; - return p != nullptr ? *p : $kDefaultRef$; - } inline const $Submsg$& $Msg$::$name$() const ABSL_ATTRIBUTE_LIFETIME_BOUND { $WeakDescriptorSelfPin$; $annotate_get$; @@ -209,13 +234,13 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { $Submsg$* $nullable$ value) { $WeakDescriptorSelfPin$; $TsanDetectConcurrentMutation$; - $PrepareSplitMessageForWrite$; + auto& field = $mutable_field_$; //~ If we're not on an arena, free whatever we were holding before. //~ (If we are on arena, we can just forget the earlier pointer.) if (GetArena() == nullptr) { - delete reinterpret_cast<$pb$::MessageLite*>($field_$); + delete reinterpret_cast<$pb$::MessageLite*>(field); } - $field_$ = reinterpret_cast<$MemberType$*>(value); + field = reinterpret_cast<$MemberType$*>(value); $update_hasbit$; $annotate_set$; // @@protoc_insertion_point(field_unsafe_arena_set_allocated:$pkg.Msg.field$) @@ -225,11 +250,9 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { $TsanDetectConcurrentMutation$; $StrongRef$; $annotate_release$; - $PrepareSplitMessageForWrite$; $clear_hasbit$; - $Submsg$* released = $cast_field_$; - $field_$ = nullptr; + auto* released = ::std::exchange($mutable_field_$, nullptr); if ($pbi$::DebugHardenForceCopyInRelease()) { auto* old = reinterpret_cast<$pb$::MessageLite*>(released); released = $pbi$::DuplicateIfNonNull(released); @@ -241,7 +264,7 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { released = $pbi$::DuplicateIfNonNull(released); } } - return released; + return reinterpret_cast<$Submsg$*>(released); } inline $Submsg$* $nullable$ $Msg$::unsafe_arena_release_$name$() { $WeakDescriptorSelfPin$; @@ -249,28 +272,26 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { $annotate_release$; // @@protoc_insertion_point(field_release:$pkg.Msg.field$) $StrongRef$; - $PrepareSplitMessageForWrite$; $clear_hasbit$; - $Submsg$* temp = $cast_field_$; - $field_$ = nullptr; - return temp; + auto* released = ::std::exchange($mutable_field_$, nullptr); + return reinterpret_cast<$Submsg$*>(released); } inline $Submsg$* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { $TsanDetectConcurrentMutation$; $StrongRef$; - if ($field_$ == nullptr) { - auto* p = Super_::DefaultConstruct<$Submsg$>(GetArena()); - $field_$ = reinterpret_cast<$MemberType$*>(p); + auto*& p = $mutable_field_$; + if (p == nullptr) { + p = reinterpret_cast<$MemberType$*>( + Super_::DefaultConstruct<$Submsg$>(GetArena())); } - return $cast_field_$; + return reinterpret_cast<$Submsg$*>(p); } inline $Submsg$* $nonnull$ $Msg$::mutable_$name$() ABSL_ATTRIBUTE_LIFETIME_BOUND { //~ TODO: add tests to make sure all write accessors are //~ able to prepare split message allocation. $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; $set_hasbit$; $Submsg$* _msg = _internal_mutable_$name_internal$(); $annotate_mutable$; @@ -283,9 +304,11 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { $WeakDescriptorSelfPin$; $pb$::Arena* message_arena = GetArena(); $TsanDetectConcurrentMutation$; - $PrepareSplitMessageForWrite$; + + auto& field = $mutable_field_$; + if (message_arena == nullptr) { - delete reinterpret_cast<$pb$::MessageLite*>($field_$); + delete reinterpret_cast<$pb$::MessageLite*>(field); } if (value != nullptr) { @@ -298,7 +321,7 @@ void SingularMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { $clear_hasbit$; } - $field_$ = reinterpret_cast<$MemberType$*>(value); + field = reinterpret_cast<$MemberType$*>(value); $annotate_set$; // @@protoc_insertion_point(field_set_allocated:$pkg.Msg.field$) } @@ -316,11 +339,16 @@ void SingularMessage::GenerateClearingCode(io::Printer* p) const { void SingularMessage::GenerateMessageClearingCode(io::Printer* p) const { ABSL_CHECK(has_hasbit_); - p->Emit( - R"cc( - $DCHK$(this_.$field_$ != nullptr); - this_.$field_$->Clear(); - )cc"); + if (should_split()) { + p->Emit(R"cc( + if (auto* msg = $split_field$) msg->Clear(); + )cc"); + } else { + p->Emit(R"cc( + $DCHK$(this_.$field_$ != nullptr); + this_.$field_$->Clear(); + )cc"); + } } bool SingularMessage::RequiresArena(GeneratorFunction function) const { @@ -343,8 +371,8 @@ void SingularMessage::GenerateMergingCode(io::Printer* p) const { } else if (should_split()) { p->Emit( R"cc( - _this->_internal_mutable_$name$()->$Submsg$::MergeFrom( - from._internal_$name$()); + _this->_internal_mutable_$name$()->MergeFrom( + *reinterpret_cast($split_field$)); )cc"); } else { // Important: we set `hasbits` after we copied the field. There are cases @@ -370,7 +398,7 @@ void SingularMessage::GenerateSwappingCode(io::Printer* p) const { void SingularMessage::GenerateDestructorCode(io::Printer* p) const { if (should_split()) { p->Emit(R"cc( - delete $cached_split_ptr$->$name$_; + delete $split_field$; )cc"); } else { p->Emit(R"cc( @@ -381,11 +409,19 @@ void SingularMessage::GenerateDestructorCode(io::Printer* p) const { void SingularMessage::GenerateCopyConstructorCode(io::Printer* p) const { ABSL_CHECK(has_hasbit_); - p->Emit(R"cc( - if (CheckHasBit(from.$has_bits_array$, $has_mask$)) { - _this->$field_$ = Super_::CopyConstruct(arena, *from.$field_$); - } - )cc"); + if (should_split()) { + p->Emit(R"cc( + if (CheckHasBit(from.$has_bits_array$, $has_mask$)) { + _this->$split$.MessageCopyConstruct($split_address$, from.$split$, arena); + } + )cc"); + } else { + p->Emit(R"cc( + if (CheckHasBit(from.$has_bits_array$, $has_mask$)) { + _this->$field_$ = Super_::CopyConstruct(arena, *from.$field_$); + } + )cc"); + } } void SingularMessage::GenerateSerializeWithCachedSizesToArray( @@ -406,10 +442,17 @@ void SingularMessage::GenerateSerializeWithCachedSizesToArray( } void SingularMessage::GenerateByteSize(io::Printer* p) const { - p->Emit(R"cc( - total_size += $kTagBytes$ + - $pbi$::WireFormatLite::$DeclaredType$Size(*this_.$field_$); - )cc"); + if (should_split()) { + p->Emit(R"cc( + total_size += $kTagBytes$ + + $pbi$::WireFormatLite::$DeclaredType$Size(*$split_field$); + )cc"); + } else { + p->Emit(R"cc( + total_size += $kTagBytes$ + + $pbi$::WireFormatLite::$DeclaredType$Size(*this_.$field_$); + )cc"); + } } void SingularMessage::GenerateIsInitialized(io::Printer* p) const { @@ -721,6 +764,14 @@ class RepeatedMessage : public FieldGeneratorBase { return Vars(field_, *opts_, is_weak(), is_weak()); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($pbi$::RawPtr<$pb$::$Weak$RepeatedPtrField<$Submsg$>>)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($pbi$::kZeroBuffer)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override; void GenerateAccessorDeclarations(io::Printer* p) const override; void GenerateInlineAccessorDefinitions(io::Printer* p) const override; @@ -913,13 +964,9 @@ void RepeatedMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { inline $pb$::$Weak$RepeatedPtrField<$Submsg$>* $nonnull$ $Msg$::_internal_mutable$_weak$_$name_internal$() { $TsanDetectConcurrentRead$; - $PrepareSplitMessageForWrite$; - if ($field_$.IsDefault()) { - $field_$.Set( - Super_::DefaultConstruct<$pb$::$Weak$RepeatedPtrField<$Submsg$>>( - GetArena())); - } - return $field_$.Get(); + return $split$ + .RawPtrConstructIfNeeded($split_address$, this, DefaultSplit_()) + .Get(); } )cc"); } else { @@ -952,7 +999,9 @@ void RepeatedMessage::GenerateInlineAccessorDefinitions(io::Printer* p) const { void RepeatedMessage::GenerateMessageClearingCode(io::Printer* p) const { if (should_split()) { - p->Emit("this_.$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split_field$.ClearIfNotDefault(); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } @@ -960,29 +1009,25 @@ void RepeatedMessage::GenerateMessageClearingCode(io::Printer* p) const { void RepeatedMessage::GenerateClearingCode(io::Printer* p) const { if (should_split()) { - p->Emit("$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split$.ClearIfNotDefault($split_address$, DefaultSplit_()); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } } void RepeatedMessage::GenerateMergingCode(io::Printer* p) const { - // TODO: experiment with simplifying this to be - // `if (!from.empty()) { body(); }` for both split and non-split cases. - auto body = [&] { + if (!should_split()) { p->Emit(R"cc( _this->_internal_mutable$_weak$_$name$()->InternalMergeFromWithArena( $pb$::MessageLite::internal_visibility(), arena, from._internal$_weak$_$name$()); )cc"); - }; - if (!should_split()) { - body(); } else { - p->Emit({{"body", body}}, R"cc( - if (!from.$field_$.IsDefault()) { - $body$; - } + p->Emit(R"cc( + _this->_internal_mutable$_weak$_$name$()->InternalMergeFromWithArena( + $pb$::MessageLite::internal_visibility(), arena, *$split_field$); )cc"); } } @@ -1011,7 +1056,7 @@ void RepeatedMessage::GenerateCopyConstructorCode(io::Printer* p) const { void RepeatedMessage::GenerateDestructorCode(io::Printer* p) const { if (should_split()) { p->Emit(R"cc( - this_.$field_$.DeleteIfNotDefault(); + $split_field$.DeleteIfNotDefault(); )cc"); } } @@ -1077,13 +1122,23 @@ void RepeatedMessage::GenerateSerializeWithCachedSizesToArray( } void RepeatedMessage::GenerateByteSize(io::Printer* p) const { - p->Emit( - R"cc( - total_size += $kTagBytes$UL * this_._internal_$name$_size(); - for (const auto& msg : this_._internal$_weak$_$name$()) { - total_size += $pbi$::WireFormatLite::$DeclaredType$Size(msg); - } - )cc"); + p->Emit({Sub{"value", + [&] { + if (should_split()) { + p->Emit("*$split_field$"); + } else { + p->Emit("this_._internal_$name$()"); + } + }} + .WithSuffix("")}, + R"cc( + if (auto& value = $value$; true) { + total_size += $kTagBytes$UL * value.size(); + for (const auto& msg : value) { + total_size += $pbi$::WireFormatLite::$DeclaredType$Size(msg); + } + } + )cc"); } void RepeatedMessage::GenerateIsInitialized(io::Printer* p) const { diff --git a/src/google/protobuf/compiler/cpp/field_generators/primitive_field.cc b/src/google/protobuf/compiler/cpp/field_generators/primitive_field.cc index e8ccbe4f86c1d..4bc9637c97f2a 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/primitive_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/primitive_field.cc @@ -98,6 +98,14 @@ class SingularPrimitive final : public FieldGeneratorBase { std::vector MakeVars() const override { return Vars(field_, *opts_); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($Type$)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($kDefault$)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override { p->Emit(R"cc( $Type$ $name$_; @@ -105,21 +113,48 @@ class SingularPrimitive final : public FieldGeneratorBase { } void GenerateMessageClearingCode(io::Printer* p) const override { - p->Emit(R"cc( - this_.$field_$ = $kDefault$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$ = $kDefault$; + )cc"); + } else { + p->Emit(R"cc( + this_.$field_$ = $kDefault$; + )cc"); + } } void GenerateClearingCode(io::Printer* p) const override { - p->Emit(R"cc( - $field_$ = $kDefault$; - )cc"); + if (should_split()) { + if (GetFieldHasbitMode(field_, *opts_) == + internal::cpp::HasbitMode::kTrueHasbit) { + p->Emit(R"cc( + $split$.SetPrimitiveAssumeMutable($split_address$, $kDefault$); + )cc"); + } else { + p->Emit(R"cc( + $split$.SetPrimitiveIfMutable($split_address$, $kDefault$, + DefaultSplit_()); + )cc"); + } + } else { + p->Emit(R"cc( + $field_$ = $kDefault$; + )cc"); + } } void GenerateMergingCode(io::Printer* p) const override { - p->Emit(R"cc( - _this->$field_$ = from.$field_$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + _this->$split$.SetPrimitive($split_address$, _this, $split_field$, + DefaultSplit_()); + )cc"); + } else { + p->Emit(R"cc( + _this->$field_$ = from.$field_$; + )cc"); + } } void GenerateSwappingCode(io::Printer* p) const override { @@ -135,9 +170,16 @@ class SingularPrimitive final : public FieldGeneratorBase { } void GenerateCopyConstructorCode(io::Printer* p) const override { - p->Emit(R"cc( - _this->$field_$ = from.$field_$; - )cc"); + if (should_split()) { + p->Emit(R"cc( + _this->$split$.AssignFrom($split_address$, arena, from.$split$, + DefaultSplit_()); + )cc"); + } else { + p->Emit(R"cc( + _this->$field_$ = from.$field_$; + )cc"); + } } void GenerateConstexprAggregateInitializer(io::Printer* p) const override { @@ -198,7 +240,6 @@ void SingularPrimitive::GenerateInlineAccessorDefinitions( p->Emit(R"cc( inline void $Msg$::set_$name$($Type$ value) { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; if ($not_has_field$) { clear_$oneof_name$(); set_has_$name_internal$(); @@ -218,9 +259,8 @@ void SingularPrimitive::GenerateInlineAccessorDefinitions( p->Emit(R"cc( inline void $Msg$::set_$name$($Type$ value) { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; - _internal_set_$name_internal$(value); $set_hasbit$; + _internal_set_$name_internal$(value); $annotate_set$; // @@protoc_insertion_point(field_set:$pkg.Msg.field$) } @@ -228,11 +268,22 @@ void SingularPrimitive::GenerateInlineAccessorDefinitions( $TsanDetectConcurrentRead$; return $field_$; } - inline void $Msg$::_internal_set_$name_internal$($Type$ value) { - $TsanDetectConcurrentMutation$; - $field_$ = value; - } )cc"); + if (should_split()) { + p->Emit(R"cc( + inline void $Msg$::_internal_set_$name_internal$($Type$ value) { + $TsanDetectConcurrentMutation$; + $split$.SetPrimitive($split_address$, this, value, DefaultSplit_()); + } + )cc"); + } else { + p->Emit(R"cc( + inline void $Msg$::_internal_set_$name_internal$($Type$ value) { + $TsanDetectConcurrentMutation$; + $field_$ = value; + } + )cc"); + } } } @@ -267,22 +318,34 @@ void SingularPrimitive::GenerateByteSize(io::Printer* p) const { p->Emit({{"kFixedBytes", tag_size + *fixed_size}}, R"cc( total_size += $kFixedBytes$; )cc"); + if (should_split()) { + // Silence the unused warning. We don't want to do it on the variable + // itself because it is useful normally. + p->Emit("(void)node;"); + } return; } + const auto value = [&] { + if (should_split()) { + p->Emit("$split_field$"); + } else { + p->Emit("this_._internal_$name$()"); + } + }; + // Adding one is very common and it turns out it can be done for // free inside of WireFormatLite, so we can save an instruction here. if (tag_size == 1) { - p->Emit(R"cc( - total_size += ::_pbi::WireFormatLite::$DeclaredType$SizePlusOne( - this_._internal_$name$()); + p->Emit({{"value", value}}, R"cc( + total_size += ::_pbi::WireFormatLite::$DeclaredType$SizePlusOne($value$); )cc"); return; } - p->Emit(R"cc( - total_size += $kTagBytes$ + ::_pbi::WireFormatLite::$DeclaredType$Size( - this_._internal_$name$()); + p->Emit({{"value", value}}, R"cc( + total_size += + $kTagBytes$ + ::_pbi::WireFormatLite::$DeclaredType$Size($value$); )cc"); } @@ -298,7 +361,9 @@ class RepeatedPrimitive final : public FieldGeneratorBase { void GenerateMessageClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("this_.$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split_field$.ClearIfNotDefault(); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } @@ -306,26 +371,23 @@ class RepeatedPrimitive final : public FieldGeneratorBase { void GenerateClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split$.ClearIfNotDefault($split_address$, DefaultSplit_()); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } } void GenerateMergingCode(io::Printer* p) const override { - // TODO: experiment with simplifying this to be - // `if (!from.empty()) { body(); }` for both split and non-split cases. - auto body = [&] { + if (should_split()) { p->Emit(R"cc( - _this->_internal_mutable_$name$()->MergeFrom(from._internal_$name$()); + _this->_internal_mutable_$name$()->MergeFrom(*$split_field$); )cc"); - }; - if (!should_split()) { - body(); } else { - p->Emit({{"body", body}}, R"cc( - if (!from.$field_$.IsDefault()) { - $body$; + p->Emit(R"cc( + if (auto& f = from._internal_$name$(); !f.empty()) { + _this->_internal_mutable_$name$()->MergeFrom(f); } )cc"); } @@ -341,19 +403,13 @@ class RepeatedPrimitive final : public FieldGeneratorBase { void GenerateDestructorCode(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( - this_.$field_$.DeleteIfNotDefault(); + $split_field$.DeleteIfNotDefault(); )cc"); } } void GenerateCopyConstructorCode(io::Printer* p) const override { - if (should_split()) { - p->Emit(R"cc( - if (!from._internal_$name$().empty()) { - _internal_mutable_$name$()->MergeFrom(from._internal_$name$()); - } - )cc"); - } + ABSL_LOG(FATAL); } void GenerateConstexprAggregateInitializer(io::Printer* p) const override { @@ -404,6 +460,14 @@ class RepeatedPrimitive final : public FieldGeneratorBase { ABSL_LOG(FATAL) << "Not supported"; } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($pbi$::RawPtr<$pb$::RepeatedField<$Type$>>)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($pbi$::kZeroBuffer)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override; void GenerateAccessorDeclarations(io::Printer* p) const override; void GenerateInlineAccessorDefinitions(io::Printer* p) const override; @@ -576,11 +640,9 @@ void RepeatedPrimitive::GenerateInlineAccessorDefinitions( inline $pb$::RepeatedField<$Type$>* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { $TsanDetectConcurrentRead$; - $PrepareSplitMessageForWrite$; - if ($field_$.IsDefault()) { - $field_$.Set($pb$::Arena::Create<$pb$::RepeatedField<$Type$>>(GetArena())); - } - return $field_$.Get(); + return $split$ + .RawPtrConstructIfNeeded($split_address$, this, DefaultSplit_()) + .Get(); } )cc"); } else { @@ -647,31 +709,36 @@ void RepeatedPrimitive::GenerateSerializeWithCachedSizesToArray( } void RepeatedPrimitive::GenerateByteSize(io::Printer* p) const { + const auto value = [&] { + if (should_split()) { + p->Emit("(*$split_field$)"); + } else { + p->Emit("this_._internal_$name$()"); + } + }; if (HasCachedSize()) { ABSL_CHECK(field_->is_packed()); - p->Emit( - R"cc( - total_size += - ::_pbi::WireFormatLite::$DeclaredType$SizeWithPackedTagSize( - this_._internal_$name$(), $kTagBytes$, - this_.$_field_cached_byte_size_$); - )cc"); + p->Emit({Sub{"value", value}.WithSuffix("")}, + R"cc( + total_size += + ::_pbi::WireFormatLite::$DeclaredType$SizeWithPackedTagSize( + $value$, $kTagBytes$, this_.$_field_cached_byte_size_$); + )cc"); return; } p->Emit( { + Sub{"value", value}.WithSuffix(""), {"data_size", [&] { auto fixed_size = FixedSize(field_->type()); if (fixed_size.has_value()) { p->Emit({{"kFixed", *fixed_size}}, R"cc( - ::size_t{$kFixed$} * - ::_pbi::FromIntSize(this_._internal_$name$_size()); + ::size_t{$kFixed$} * ::_pbi::FromIntSize($value$.size()); )cc"); } else { p->Emit(R"cc( - ::_pbi::WireFormatLite::$DeclaredType$Size( - this_._internal_$name$()); + ::_pbi::WireFormatLite::$DeclaredType$Size($value$); )cc"); } }}, @@ -686,8 +753,7 @@ void RepeatedPrimitive::GenerateByteSize(io::Printer* p) const { )cc"); } else { p->Emit(R"cc( - ::size_t{$kTagBytes$} * - ::_pbi::FromIntSize(this_._internal_$name$_size()); + ::size_t{$kTagBytes$} * ::_pbi::FromIntSize($value$.size()); )cc"); } }}, diff --git a/src/google/protobuf/compiler/cpp/field_generators/string_field.cc b/src/google/protobuf/compiler/cpp/field_generators/string_field.cc index d6f53de9ee9a1..2a5487ce0b94a 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/string_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/string_field.cc @@ -76,6 +76,14 @@ class SingularString : public FieldGeneratorBase { bool IsInlined() const override { return is_inlined(); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($pbi$::ArenaStringPtr)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc(&$pbi$::fixed_address_empty_string)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override { // Skips the automatic destruction if inlined; rather calls it explicitly if // allocating arena is null. @@ -101,6 +109,10 @@ class SingularString : public FieldGeneratorBase { } _this->$field_$.Set(from._internal_$name$(), arena); )cc"); + } else if (should_split()) { + p->Emit(R"cc( + _this->_internal_set_$name$($split_field$.Get()); + )cc"); } else { p->Emit(R"cc( _this->_internal_set_$name$(from._internal_$name$()); @@ -119,10 +131,17 @@ class SingularString : public FieldGeneratorBase { } void GenerateByteSize(io::Printer* p) const override { - p->Emit(R"cc( - total_size += $kTagBytes$ + $pbi$::WireFormatLite::$DeclaredType$Size( - this_._internal_$name$()); - )cc"); + if (should_split()) { + p->Emit(R"cc( + total_size += $kTagBytes$ + $pbi$::WireFormatLite::$DeclaredType$Size( + $split_field$.Get()); + )cc"); + } else { + p->Emit(R"cc( + total_size += $kTagBytes$ + $pbi$::WireFormatLite::$DeclaredType$Size( + this_._internal_$name$()); + )cc"); + } } void GenerateCopyAggregateInitializer(io::Printer* p) const override { @@ -286,7 +305,7 @@ void SingularString::ReleaseImpl(io::Printer* p) const { if (!HasHasbit(field_, options_)) { p->Emit(R"cc( - return $field_$.Release(); + return $mutable_field_$.Release(); )cc"); return; } @@ -312,15 +331,15 @@ void SingularString::ReleaseImpl(io::Printer* p) const { if (!EmptyDefault()) { p->Emit(R"cc( - return $field_$.Release(); + return $mutable_field_$.Release(); )cc"); return; } p->Emit(R"cc( - auto* released = $field_$.Release(); + auto* released = $mutable_field_$.Release(); if ($pbi$::DebugHardenForceCopyDefaultString()) { - $field_$.Set("", GetArena()); + $mutable_field_$.Set("", GetArena()); } return released; )cc"); @@ -351,7 +370,7 @@ void SingularString::SetAllocatedImpl(io::Printer* p) const { } p->Emit(R"cc( - $field_$.SetAllocated(value, GetArena()); + $mutable_field_$.SetAllocated(value, GetArena()); )cc"); if (is_inlined()) { @@ -361,7 +380,7 @@ void SingularString::SetAllocatedImpl(io::Printer* p) const { if (EmptyDefault()) { p->Emit(R"cc( if ($pbi$::DebugHardenForceCopyDefaultString() && $field_$.IsDefault()) { - $field_$.Set("", GetArena()); + $mutable_field_$.Set("", GetArena()); } )cc"); } @@ -407,16 +426,14 @@ void SingularString::GenerateInlineAccessorDefinitions(io::Printer* p) const { PROTOBUF_ALWAYS_INLINE void $Msg$::set_$name$(Arg_&& arg, Args_... args) { $WeakDescriptorSelfPin$; $TsanDetectConcurrentMutation$; - $PrepareSplitMessageForWrite$; $update_hasbit$; - $field_$.$Set$(static_cast(arg), args..., GetArena()); + $mutable_field_$.$Set$(static_cast(arg), args..., GetArena()); $annotate_set$; // @@protoc_insertion_point(field_set:$pkg.Msg.field$) } inline ::std::string* $nonnull$ $Msg$::mutable_$name$() ABSL_ATTRIBUTE_LIFETIME_BOUND { $WeakDescriptorSelfPin$; - $PrepareSplitMessageForWrite$; $update_hasbit$; ::std::string* _s = _internal_mutable_$name_internal$(); $annotate_mutable$; @@ -432,24 +449,22 @@ void SingularString::GenerateInlineAccessorDefinitions(io::Printer* p) const { $TsanDetectConcurrentMutation$; //~ Don't use $Set$ here; we always want the std::string variant //~ regardless of whether this is a `bytes` field. - $field_$.Set(value, GetArena()); + $mutable_field_$.Set(value, GetArena()); } inline ::std::string* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { $TsanDetectConcurrentMutation$; - return $field_$.Mutable($lazy_args$, GetArena()); + return $mutable_field_$.Mutable($lazy_args$, GetArena()); } inline ::std::string* $nullable$ $Msg$::$release_name$() { $WeakDescriptorSelfPin$; $TsanDetectConcurrentMutation$; $annotate_release$; - $PrepareSplitMessageForWrite$; // @@protoc_insertion_point(field_release:$pkg.Msg.field$) $release_impl$; } inline void $Msg$::set_allocated_$name$(::std::string* $nullable$ value) { $WeakDescriptorSelfPin$; $TsanDetectConcurrentMutation$; - $PrepareSplitMessageForWrite$; $set_allocated_impl$; $annotate_set$; // @@protoc_insertion_point(field_set_allocated:$pkg.Msg.field$) @@ -465,6 +480,21 @@ void SingularString::GenerateClearingCode(io::Printer* p) const { return; } + if (should_split()) { + if (EmptyDefault()) { + p->Emit(R"cc( + $split$.ClearToEmpty($split_address$, DefaultSplit_()); + )cc"); + } else { + p->Emit(R"cc( + if (auto* p = $split$.TryMutable($split_address$, DefaultSplit_())) { + p->ClearToDefault($lazy_var$, GetArena()); + } + )cc"); + } + return; + } + if (EmptyDefault()) { p->Emit(R"cc( $field_$.ClearToEmpty(); @@ -512,17 +542,29 @@ void SingularString::GenerateMessageClearingCode(io::Printer* p) const { if (!EmptyDefault()) { // Clear to a non-empty default is more involved, as we try to use the // Arena if one is present and may need to reallocate the string. - p->Emit(R"cc( - this_.$field_$.ClearToDefault($lazy_var$, this_.GetArena()); - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$.ClearToDefault($lazy_var$, _this.GetArena()); + )cc"); + } else { + p->Emit(R"cc( + $field_$.ClearToDefault($lazy_var$, GetArena()); + )cc"); + } return; } - p->Emit({{"Clear", HasHasbit(field_, options_) ? "ClearNonDefaultToEmpty" - : "ClearToEmpty"}}, - R"cc( - this_.$field_$.$Clear$(); - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$.ClearToEmpty(); + )cc"); + } else { + p->Emit({{"Clear", HasHasbit(field_, options_) ? "ClearNonDefaultToEmpty" + : "ClearToEmpty"}}, + R"cc( + this_.$field_$.$Clear$(); + )cc"); + } } void SingularString::GenerateSwappingCode(io::Printer* p) const { @@ -549,13 +591,13 @@ void SingularString::GenerateCopyConstructorCode(io::Printer* p) const { ABSL_DCHECK(!is_inlined()); p->Emit(R"cc( - $field_$.InitDefault(); + $mutable_field_$.InitDefault(); )cc"); if (IsString(field_) && EmptyDefault()) { p->Emit(R"cc( if ($pbi$::DebugHardenForceCopyDefaultString()) { - $field_$.Set("", GetArena()); + $mutable_field_$.Set("", GetArena()); } )cc"); } @@ -578,7 +620,7 @@ void SingularString::GenerateCopyConstructorCode(io::Printer* p) const { }}}, R"cc( if ($hazzer$) { - _this->$field_$.Set(from._internal_$name$(), _this->GetArena()); + _this->$mutable_field_$.Set(from._internal_$name$(), _this->GetArena()); } )cc"); } @@ -591,7 +633,7 @@ void SingularString::GenerateDestructorCode(io::Printer* p) const { if (should_split()) { p->Emit(R"cc( - $cached_split_ptr$->$name$_.Destroy(); + $split_field$.Destroy(); )cc"); return; } @@ -659,6 +701,14 @@ class RepeatedString : public FieldGeneratorBase { std::vector MakeVars() const override { return Vars(field_, *opts_); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($pbi$::RawPtr<$pb$::RepeatedPtrField<::std::string>>)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($pbi$::kZeroBuffer)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( @@ -673,7 +723,9 @@ class RepeatedString : public FieldGeneratorBase { void GenerateMessageClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("this_.$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split_field$.ClearIfNotDefault(); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } @@ -681,7 +733,9 @@ class RepeatedString : public FieldGeneratorBase { void GenerateClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split$.ClearIfNotDefault($split_address$, DefaultSplit_()); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } @@ -696,22 +750,16 @@ class RepeatedString : public FieldGeneratorBase { } void GenerateMergingCode(io::Printer* p) const override { - // TODO: experiment with simplifying this to be - // `if (!from.empty()) { body(); }` for both split and non-split cases. - auto body = [&] { + if (!should_split()) { p->Emit(R"cc( _this->_internal_mutable_$name$()->InternalMergeFromWithArena( $pb$::MessageLite::internal_visibility(), arena, from._internal_$name$()); )cc"); - }; - if (!should_split()) { - body(); } else { - p->Emit({{"body", body}}, R"cc( - if (!from.$field_$.IsDefault()) { - $body$; - } + p->Emit(R"cc( + _this->_internal_mutable_$name$()->InternalMergeFromWithArena( + $pb$::MessageLite::internal_visibility(), arena, *$split_field$); )cc"); } } @@ -726,7 +774,7 @@ class RepeatedString : public FieldGeneratorBase { void GenerateDestructorCode(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( - this_.$field_$.DeleteIfNotDefault(); + $split_field$.DeleteIfNotDefault(); )cc"); } } @@ -744,14 +792,23 @@ class RepeatedString : public FieldGeneratorBase { } void GenerateByteSize(io::Printer* p) const override { - p->Emit(R"cc( - total_size += - $kTagBytes$ * $pbi$::FromIntSize(this_._internal_$name$().size()); - for (int i = 0, n = this_._internal_$name$().size(); i < n; ++i) { - total_size += $pbi$::WireFormatLite::$DeclaredType$Size( - this_._internal_$name$().Get(i)); - } - )cc"); + p->Emit({Sub{"value", + [&] { + if (should_split()) { + p->Emit("*$split_field$"); + } else { + p->Emit("this_._internal_$name$()"); + } + }} + .WithSuffix("")}, + R"cc( + if (auto& value = $value$; true) { + total_size += $kTagBytes$ * $pbi$::FromIntSize(value.size()); + for (int i = 0, n = value.size(); i < n; ++i) { + total_size += $pbi$::WireFormatLite::$DeclaredType$Size(value.Get(i)); + } + } + )cc"); } void GenerateAccessorDeclarations(io::Printer* p) const override; @@ -941,13 +998,9 @@ void RepeatedString::GenerateInlineAccessorDefinitions(io::Printer* p) const { inline $pb$::RepeatedPtrField<::std::string>* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { $TsanDetectConcurrentRead$; - $PrepareSplitMessageForWrite$; - if ($field_$.IsDefault()) { - $field_$.Set( - $pb$::Arena::Create<$pb$::RepeatedPtrField<::std::string>>( - GetArena())); - } - return $field_$.Get(); + return $split$ + .RawPtrConstructIfNeeded($split_address$, this, DefaultSplit_()) + .Get(); } )cc"); } else { diff --git a/src/google/protobuf/compiler/cpp/field_generators/string_view_field.cc b/src/google/protobuf/compiler/cpp/field_generators/string_view_field.cc index 4cf233c5933db..cfc6100333a9a 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/string_view_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/string_view_field.cc @@ -90,6 +90,18 @@ class SingularStringView : public FieldGeneratorBase { return "MicroString"; } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit({{"Str", FieldTypeName()}}, R"cc($pbi$::$Str$)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + if (use_micro_string()) { + p->Emit("nullptr"); + } else { + p->Emit(R"cc(&$pbi$::fixed_address_empty_string)cc"); + } + } + void GeneratePrivateMembers(io::Printer* p) const override { // Skips the automatic destruction if inlined; rather calls it explicitly if // allocating arena is null. @@ -114,6 +126,10 @@ class SingularStringView : public FieldGeneratorBase { } _this->$field_$.Set(from._internal_$name$(), arena); )cc"); + } else if (should_split()) { + p->Emit(R"cc( + _this->_internal_set_$name$($split_field$.Get()); + )cc"); } else { p->Emit(R"cc( _this->_internal_set_$name$(from._internal_$name$()); @@ -132,10 +148,17 @@ class SingularStringView : public FieldGeneratorBase { } void GenerateByteSize(io::Printer* p) const override { - p->Emit(R"cc( - total_size += $kTagBytes$ + $pbi$::WireFormatLite::$DeclaredType$Size( - this_._internal_$name$()); - )cc"); + if (should_split()) { + p->Emit(R"cc( + total_size += $kTagBytes$ + $pbi$::WireFormatLite::$DeclaredType$Size( + $split_field$.Get()); + )cc"); + } else { + p->Emit(R"cc( + total_size += $kTagBytes$ + $pbi$::WireFormatLite::$DeclaredType$Size( + this_._internal_$name$()); + )cc"); + } } void GenerateCopyAggregateInitializer(io::Printer* p) const override { @@ -301,9 +324,8 @@ void SingularStringView::GenerateInlineAccessorDefinitions( PROTOBUF_ALWAYS_INLINE void $Msg$::set_$name$(Arg_&& arg) { $WeakDescriptorSelfPin$; $TsanDetectConcurrentMutation$; - $PrepareSplitMessageForWrite$; $update_hasbit$; - $field_$.Set(static_cast(arg), GetArena()); + $mutable_field_$.Set(static_cast(arg), GetArena()); $annotate_set$; // @@protoc_insertion_point(field_set:$pkg.Msg.field$) } @@ -315,7 +337,7 @@ void SingularStringView::GenerateInlineAccessorDefinitions( inline void $Msg$::_internal_set_$name_internal$(::absl::string_view value) { $TsanDetectConcurrentMutation$; $update_hasbit$; - $field_$.Set(value, GetArena()); + $mutable_field_$.Set(value, GetArena()); } )cc"); } @@ -337,19 +359,19 @@ void SingularStringView::GenerateClearingCode(io::Printer* p) const { if (EmptyDefault()) { if (use_micro_string()) { p->Emit(R"cc( - $field_$.Clear(); + $mutable_field_$.Clear(); )cc"); return; } p->Emit(R"cc( - $field_$.ClearToEmpty(); + $mutable_field_$.ClearToEmpty(); )cc"); return; } ABSL_DCHECK(!is_inlined()); p->Emit(R"cc( - $field_$.ClearToDefault($lazy_var$, GetArena()); + $mutable_field_$.ClearToDefault($lazy_var$, GetArena()); )cc"); } @@ -379,24 +401,42 @@ void SingularStringView::GenerateMessageClearingCode(io::Printer* p) const { if (!EmptyDefault()) { // Clear to a non-empty default is more involved, as we try to use the // Arena if one is present and may need to reallocate the string. - p->Emit(R"cc( - this_.$field_$.ClearToDefault($lazy_var$, this_.GetArena()); - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$.ClearToDefault($lazy_var$, _this.GetArena()); + )cc"); + } else { + p->Emit(R"cc( + this_.$field_$.ClearToDefault($lazy_var$, GetArena()); + )cc"); + } return; } if (use_micro_string()) { - p->Emit(R"cc( - this_.$field_$.Clear(); - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$.Clear(); + )cc"); + } else { + p->Emit(R"cc( + $field_$.Clear(); + )cc"); + } return; } - p->Emit({{"Clear", HasHasbit(field_, options_) ? "ClearNonDefaultToEmpty" - : "ClearToEmpty"}}, - R"cc( - this_.$field_$.$Clear$(); - )cc"); + if (should_split()) { + p->Emit(R"cc( + $split_field$.ClearToEmpty(); + )cc"); + } else { + p->Emit({{"Clear", HasHasbit(field_, options_) ? "ClearNonDefaultToEmpty" + : "ClearToEmpty"}}, + R"cc( + this_.$field_$.$Clear$(); + )cc"); + } } void SingularStringView::GenerateSwappingCode(io::Printer* p) const { @@ -430,13 +470,13 @@ void SingularStringView::GenerateCopyConstructorCode(io::Printer* p) const { ABSL_DCHECK(!is_inlined()); p->Emit(R"cc( - $field_$.InitDefault(); + $mutable_field_$.InitDefault(); )cc"); if (EmptyDefault()) { p->Emit(R"cc( if ($pbi$::DebugHardenForceCopyDefaultString()) { - $field_$.Set("", GetArena()); + $mutable_field_$.Set("", GetArena()); } )cc"); } @@ -459,7 +499,7 @@ void SingularStringView::GenerateCopyConstructorCode(io::Printer* p) const { }}}, R"cc( if ($hazzer$) { - _this->$field_$.Set(from._internal_$name$(), _this->GetArena()); + _this->$mutable_field_$.Set(from._internal_$name$(), _this->GetArena()); } )cc"); } @@ -471,8 +511,8 @@ void SingularStringView::GenerateDestructorCode(io::Printer* p) const { } if (should_split()) { - p->Emit(R"cc( - $cached_split_ptr$->$name$_.Destroy(); + p->Emit({{"Str", FieldTypeName()}}, R"cc( + $split_field$.Destroy(); )cc"); return; } @@ -550,6 +590,14 @@ class RepeatedStringView : public FieldGeneratorBase { std::vector MakeVars() const override { return Vars(field_, *opts_); } + void GenerateSplitMemberTypeName(io::Printer* p) const override { + p->Emit(R"cc($pbi$::RawPtr<$pb$::RepeatedPtrField<::std::string>>)cc"); + } + + void GenerateDefaultSplitValue(io::Printer* p) const override { + p->Emit(R"cc($pbi$::kZeroBuffer)cc"); + } + void GeneratePrivateMembers(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( @@ -564,7 +612,9 @@ class RepeatedStringView : public FieldGeneratorBase { void GenerateMessageClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("this_.$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split_field$.ClearIfNotDefault(); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } @@ -572,7 +622,9 @@ class RepeatedStringView : public FieldGeneratorBase { void GenerateClearingCode(io::Printer* p) const override { if (should_split()) { - p->Emit("$field_$.ClearIfNotDefault();\n"); + p->Emit(R"cc( + $split$.ClearIfNotDefault($split_address$, DefaultSplit_()); + )cc"); } else { p->Emit("$field_$.Clear();\n"); } @@ -587,22 +639,16 @@ class RepeatedStringView : public FieldGeneratorBase { } void GenerateMergingCode(io::Printer* p) const override { - // TODO: experiment with simplifying this to be - // `if (!from.empty()) { body(); }` for both split and non-split cases. - auto body = [&] { + if (!should_split()) { p->Emit(R"cc( _this->_internal_mutable_$name$()->InternalMergeFromWithArena( $pb$::MessageLite::internal_visibility(), arena, from._internal_$name$()); )cc"); - }; - if (!should_split()) { - body(); } else { - p->Emit({{"body", body}}, R"cc( - if (!from.$field_$.IsDefault()) { - $body$; - } + p->Emit(R"cc( + _this->_internal_mutable_$name$()->InternalMergeFromWithArena( + $pb$::MessageLite::internal_visibility(), arena, *$split_field$); )cc"); } } @@ -617,7 +663,7 @@ class RepeatedStringView : public FieldGeneratorBase { void GenerateDestructorCode(io::Printer* p) const override { if (should_split()) { p->Emit(R"cc( - this_.$field_$.DeleteIfNotDefault(); + $split_field$.DeleteIfNotDefault(); )cc"); } } @@ -635,14 +681,23 @@ class RepeatedStringView : public FieldGeneratorBase { } void GenerateByteSize(io::Printer* p) const override { - p->Emit(R"cc( - total_size += - $kTagBytes$ * $pbi$::FromIntSize(this_._internal_$name$().size()); - for (int i = 0, n = this_._internal_$name$().size(); i < n; ++i) { - total_size += $pbi$::WireFormatLite::$DeclaredType$Size( - this_._internal_$name$().Get(i)); - } - )cc"); + p->Emit({Sub{"value", + [&] { + if (should_split()) { + p->Emit("*$split_field$"); + } else { + p->Emit("this_._internal_$name$()"); + } + }} + .WithSuffix("")}, + R"cc( + if (auto& value = $value$; true) { + total_size += $kTagBytes$ * $pbi$::FromIntSize(value.size()); + for (int i = 0, n = value.size(); i < n; ++i) { + total_size += $pbi$::WireFormatLite::$DeclaredType$Size(value.Get(i)); + } + } + )cc"); } void GenerateAccessorDeclarations(io::Printer* p) const override; @@ -807,13 +862,9 @@ void RepeatedStringView::GenerateInlineAccessorDefinitions( inline $pb$::RepeatedPtrField<::std::string>* $nonnull$ $Msg$::_internal_mutable_$name_internal$() { $TsanDetectConcurrentRead$; - $PrepareSplitMessageForWrite$; - if ($field_$.IsDefault()) { - $field_$.Set( - $pb$::Arena::Create<$pb$::RepeatedPtrField<::std::string>>( - GetArena())); - } - return $field_$.Get(); + return $split$ + .RawPtrConstructIfNeeded($split_address$, this, DefaultSplit_()) + .Get(); } )cc"); } else { diff --git a/src/google/protobuf/compiler/cpp/helpers.cc b/src/google/protobuf/compiler/cpp/helpers.cc index 095d1cede6e56..b1d170bd44449 100644 --- a/src/google/protobuf/compiler/cpp/helpers.cc +++ b/src/google/protobuf/compiler/cpp/helpers.cc @@ -397,7 +397,8 @@ bool CanInitializeByZeroing(const FieldDescriptor* field, } } -bool CanClearByZeroing(const FieldDescriptor* field) { +bool CanClearByZeroing(const FieldDescriptor* field, const Options& options) { + if (ShouldSplit(field, options)) return false; if (field->is_repeated() || field->is_extension()) return false; switch (field->cpp_type()) { case FieldDescriptor::CPPTYPE_ENUM: @@ -672,7 +673,13 @@ std::string FieldName(const FieldDescriptor* field) { } std::string FieldMemberName(const FieldDescriptor* field, bool split) { + if (split) { + return absl::StrFormat("_impl_._split_.Get(Impl_::%s)", + SplitBtreeAddressName(field)); + } + absl::string_view prefix = "_impl_."; + // DO NOT SUBMIT FIX absl::string_view split_prefix = split ? "_split_->" : ""; if (field->real_containing_oneof() == nullptr) { return absl::StrCat(prefix, split_prefix, FieldName(field), "_"); @@ -946,7 +953,12 @@ std::string DefaultValue(const Options& options, const FieldDescriptor* field) { } else if (value != value) { return "::std::numeric_limits::quiet_NaN()"; } else { - return io::SimpleDtoa(value); + std::string double_value = io::SimpleDtoa(value); + // Make sure it is a double literal. + if (double_value.find_first_of(".eE") == std::string::npos) { + double_value.push_back('.'); + } + return double_value; } } case FieldDescriptor::CPPTYPE_FLOAT: { @@ -959,12 +971,11 @@ std::string DefaultValue(const Options& options, const FieldDescriptor* field) { return "::std::numeric_limits::quiet_NaN()"; } else { std::string float_value = io::SimpleFtoa(value); - // If floating point value contains a period (.) or an exponent - // (either E or e), then append suffix 'f' to make it a float - // literal. - if (float_value.find_first_of(".eE") != std::string::npos) { - float_value.push_back('f'); + // Make sure it is a float literal. + if (float_value.find_first_of(".eE") == std::string::npos) { + float_value.push_back('.'); } + float_value.push_back('f'); return float_value; } } diff --git a/src/google/protobuf/compiler/cpp/helpers.h b/src/google/protobuf/compiler/cpp/helpers.h index 42e145f8ac355..dcdbda4971d59 100644 --- a/src/google/protobuf/compiler/cpp/helpers.h +++ b/src/google/protobuf/compiler/cpp/helpers.h @@ -118,7 +118,7 @@ class MessageSCCAnalyzer; bool CanInitializeByZeroing(const FieldDescriptor* field, const Options& options); // Returns true if it's safe to reset "field" to zero. -bool CanClearByZeroing(const FieldDescriptor* field); +bool CanClearByZeroing(const FieldDescriptor* field, const Options& options); // Determines if swap can be implemented via memcpy. bool HasTrivialSwap(const FieldDescriptor* field, const Options& options); diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index be1128f8b0426..1bd1396726a1c 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -256,6 +256,8 @@ void EmitNonDefaultCheck(io::Printer* p, absl::string_view prefix, ABSL_CHECK(!field->is_repeated()); ABSL_CHECK(!field->containing_oneof() || field->real_containing_oneof()); + // DO NOT SUBMIT: Should we complicate this for split fields? + auto v = p->WithVars({{ {"prefix", prefix}, {"name", FieldName(field)}, @@ -617,6 +619,23 @@ std::vector ClassVars(const Descriptor* desc, Options opts) { return vars; } +size_t SplitMemberSize(const FieldDescriptor* field) { + if (field->is_repeated()) return 8; + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_BOOL: + return 1; + + case FieldDescriptor::CPPTYPE_INT32: + case FieldDescriptor::CPPTYPE_UINT32: + case FieldDescriptor::CPPTYPE_ENUM: + case FieldDescriptor::CPPTYPE_FLOAT: + return 4; + + default: + return 8; + } +} + } // namespace // =================================================================== @@ -1094,7 +1113,7 @@ void MessageGenerator::GenerateSingularFieldHasBits( {Sub{"ASSUME", [&] { if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && - !IsLazy(field, options_)) { + !ShouldSplit(field, options_) && !IsLazy(field, options_)) { // We maintain the invariant that for a submessage x, has_x() // returning true implies that x_ is not null. By giving this // information to the compiler, we allow it to eliminate @@ -1189,7 +1208,15 @@ void MessageGenerator::GenerateFieldClear(const FieldDescriptor* field, )cc"); } else { // TODO: figure out if early return breaks tracking - if (ShouldSplit(field, options_)) { + if (GetFieldHasbitMode(field, options_) == + internal::cpp::HasbitMode::kTrueHasbit) { + auto v = p->WithVars(HasBitVars(field)); + p->Emit(R"cc( + if (ABSL_PREDICT_TRUE(!CheckHasBit( + $has_bits$[$has_array_index$], $has_mask$))) + return; + )cc"); + } else if (ShouldSplit(field, options_)) { p->Emit(R"cc( if (ABSL_PREDICT_TRUE(IsSplitMessageDefault())) return; @@ -1543,23 +1570,65 @@ void MessageGenerator::GenerateImplDefinition(io::Printer* p) { {"decl_split", [&] { if (!ShouldSplit(descriptor_, options_)) return; - p->Emit({{"split_field", + p->Emit(R"cc( + $pbi$::BtreeSplit _split_; + )cc"); + const auto& root_node = options_.split_map->RootFor(descriptor_); + + if (root_node.has_zero_init) { + p->Emit(R"cc( + static constexpr auto& kSplitNode = $pbi$::kZeroBuffer; + )cc"); + } else { + p->Emit(R"cc( + static const $pbi$::BtreeSplit::Node kSplitNode[]; + )cc"); + } + + const auto debug_print = [&] { + std::vector indices; + std::string str; + options_.split_map->RootFor(descriptor_) + .ForEachNode(indices, [&](auto& node) { + str += absl::StrJoin(indices, "-") + "NODE->\n"; + node.ForEachField([&](auto* f, size_t offset) { + if (f == nullptr) return; + absl::StrAppend(&str, absl::StrJoin(indices, "-"), + "offset=", offset, " ", f->full_name(), + "\n"); + }); + }); + return str; + }; + + (void)debug_print; + std::vector indices; + root_node.ForEachNode(indices, [&](auto& node) { + ABSL_CHECK_LE(indices.size(), internal::BtreeSplit::kMaxDepth) + << debug_print(); + node.ForEachField([&](auto* f, size_t offset) { + if (f == nullptr) return; + + p->Emit( + {{"type", [&] { - for (auto field : field_layout_.optimized_order()) { - if (!ShouldSplit(field, options_)) continue; - field_generators_.get(field).GeneratePrivateMembers(p); - } - }}}, + field_generators_.get(f).GenerateSplitMemberTypeName(p); + }}, + {"address", SplitBtreeAddressName(f)}, + {"indices", absl::StrJoin(indices, ",")}, + {"offset", offset}, + {"bits", + // Print as octal because the address uses 3 bits per + // segment. Easier to read. + absl::StrFormat("0%o", + internal::BtreeSplitAddress::CalculateBits( + indices, offset))}}, R"cc( - struct Split { - $split_field$; - using InternalArenaConstructable_ = void; - using DestructorSkippable_ = void; - }; - static_assert(::std::is_trivially_copy_constructible::value); - static_assert(::std::is_trivially_destructible::value); - Split* $nonnull$ _split_; + // idx={$indices$}, offset=$offset$ + static constexpr $pbi$::BtreeSplitTypedAddress<$type$> $address${$bits$}; )cc"); + }); + }); }}, {"oneof_members", [&] { @@ -1758,6 +1827,13 @@ void MessageGenerator::GenerateAnyMethodDefinition(io::Printer* p) { )cc"); } +// DO NOT SUBMIT: +// FIXME: +// SOMETHING IS BROKEN!!!!! +// The last 3 things I added were: MergeFrom uses if-chain, ByteSizeLong uses +// if-chain, Serialize uses if skips. After that searchmark broke. Take these +// back to find where the bug is. + void MessageGenerator::GenerateClassDefinition(io::Printer* p) { if (!ShouldGenerateClass(descriptor_, options_)) return; @@ -2026,10 +2102,18 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* p) { SplitDefaultInstanceName(descriptor_, options_)}}, R"cc( private: + static const auto* PROTOBUF_NONNULL DefaultSplit_() { + return reinterpret_cast( + &Impl_::kSplitNode[0]); + } inline bool IsSplitMessageDefault() const { - return $split$ == reinterpret_cast(&$split_default$); + return _impl_._split_.head() == DefaultSplit_(); + } + template + auto& MutableSplitField_(T address) { + ABSL_DCHECK_NE(this, &default_instance()); + return _impl_._split_.Mutable(address, this, DefaultSplit_()); } - PROTOBUF_NOINLINE void PrepareSplitMessageForWrite(); public: )cc"); @@ -2119,17 +2203,7 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* p) { }}, {"decl_impl", [&] { GenerateImplDefinition(p); }}, {"classdata_type", ClassDataType(descriptor_, options_)}, - {"msg_globals", MsgGlobalsInstanceName(descriptor_, options_)}, - {"split_friend", - [&] { - if (!ShouldSplit(descriptor_, options_)) return; - - p->Emit({{"split_default", - SplitDefaultInstanceType(descriptor_, options_)}}, - R"cc( - friend struct $split_default$; - )cc"); - }}}, + {"msg_globals", MsgGlobalsInstanceName(descriptor_, options_)}}, R"cc( class $dllexport_decl $ $unused $$Msg$ final : public $superclass$ /* @@protoc_insertion_point(class_definition:$full_name$) */ { @@ -2215,6 +2289,7 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* p) { $generated_methods$; $internal_field_number$; $decl_non_simple_base$; + private: static ::absl::string_view FullMessageName() { return "$full_name$"; } $decl_annotate$; @@ -2274,7 +2349,6 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* p) { using InternalArenaConstructable_ = void; using DestructorSkippable_ = void; $decl_impl$; - $split_friend$; //~ The TableStruct struct needs access to the private parts, in //~ order to construct the offsets of all members. friend struct ::$tablename$; @@ -2425,21 +2499,6 @@ void MessageGenerator::GenerateClassMethods(io::Printer* p) { p->Emit("\n"); } - if (ShouldSplit(descriptor_, options_)) { - p->Emit({{"split_default", SplitDefaultInstanceName(descriptor_, options_)}, - {"globals", MsgGlobalsInstanceName(descriptor_, options_)}}, - R"cc( - void $Msg$::PrepareSplitMessageForWrite() { - if (ABSL_PREDICT_TRUE(IsSplitMessageDefault())) { - ABSL_DCHECK_NE(this, &default_instance()); - $pbi$::CreateSplitMessageGeneric( - GetArena(), reinterpret_cast(&$split$), - sizeof(Impl_::Split)); - } - } - )cc"); - } - GenerateVerify(p); GenerateSwap(p); @@ -2524,13 +2583,14 @@ size_t MessageGenerator::GenerateOffsets(io::Printer* p) { const bool has_weak_fields = num_weak_fields_ > 0; // NOTE: We can cleanup two bits from old donated logic. const bool has_inline_strings = false; - const bool has_split = ShouldSplit(descriptor_, options_); + const bool has_split_offset = ShouldSplit(descriptor_, options_); + const bool has_split_size = false; format("$1$, // bitmap\n", // These conditions have to match exactly the order done below make_bitmap(has_has_bits, has_extensions, has_oneofs, has_weak_fields, - has_inline_strings, has_split, has_split, has_has_bits, - has_inline_strings)); + has_inline_strings, has_split_offset, has_split_size, + has_has_bits, has_inline_strings)); // The order of these offsets has to match the reading of them in // MigrationToReflectionSchema. @@ -2546,10 +2606,8 @@ size_t MessageGenerator::GenerateOffsets(io::Printer* p) { if (has_weak_fields) { format("PROTOBUF_FIELD_OFFSET($classtype$, $weak_field_map$),\n"); } - if (has_split) { - format( - "PROTOBUF_FIELD_OFFSET($classtype$, $split$),\n" - "sizeof($classtype$::Impl_::Split),\n"); + if (has_split_offset) { + format("PROTOBUF_FIELD_OFFSET($classtype$, $split$),\n"); } const size_t offsets = num_generated_indices + descriptor_->field_count() + descriptor_->real_oneof_decl_count(); @@ -2569,12 +2627,11 @@ size_t MessageGenerator::GenerateOffsets(io::Printer* p) { } else if (field->real_containing_oneof()) { format("PROTOBUF_FIELD_OFFSET($classtype$, _impl_.$1$_)", field->real_containing_oneof()->name()); + } else if (ShouldSplit(field, options_)) { + format("$classtype$::Impl_::$1$.bits()", SplitBtreeAddressName(field)); } else { - format("PROTOBUF_FIELD_OFFSET($classtype$$1$, $2$)", - ShouldSplit(field, options_) ? "::Impl_::Split" : "", - ShouldSplit(field, options_) - ? absl::StrCat(FieldName(field), "_") - : FieldMemberName(field, /*split=*/false)); + format("PROTOBUF_FIELD_OFFSET($classtype$, $1$)", + FieldMemberName(field, /*split=*/false)); } // Some information about a field is in the pdproto profile. The profile is @@ -2746,7 +2803,7 @@ void MessageGenerator::GenerateImplMemberInit(io::Printer* p, if (ShouldSplit(descriptor_, options_)) { separator(); p->Emit({{"name", SplitDefaultInstanceName(descriptor_, options_)}}, - "_split_{const_cast(&$name$._instance)}"); + R"cc(_split_(&Impl_::kSplitNode[0]))cc"); } }; @@ -3366,11 +3423,19 @@ void MessageGenerator::EmitClearChunks(io::Printer* p, bool is_split) { // hasbit to see if a zero-init is necessary. const int kMaxUnconditionalPrimitiveBytesClear = 4; + std::vector non_split_fields; + // DO NOT SUBMIT: Move into FieldLayout + for (const auto* field : field_layout_.optimized_order()) { + if (!ShouldSplit(field, options_)) { + non_split_fields.push_back(field); + } + } + // Collect fields into chunks. Each chunk may have an if() condition that // checks all hasbits in the chunk and skips it if none are set. int zero_init_bytes = 0; - for (const auto& field : field_layout_.optimized_order()) { - if (CanClearByZeroing(field)) { + for (const auto& field : non_split_fields) { + if (CanClearByZeroing(field, options_)) { zero_init_bytes += EstimateAlignmentSize(field); } } @@ -3378,7 +3443,7 @@ void MessageGenerator::EmitClearChunks(io::Printer* p, bool is_split) { int chunk_count = 0; std::vector chunks = CollectFields( - field_layout_.optimized_order(), options_, + non_split_fields, options_, [&](const FieldDescriptor* a, const FieldDescriptor* b) -> bool { chunk_count++; // This predicate guarantees that there is only a single zero-init @@ -3387,8 +3452,9 @@ void MessageGenerator::EmitClearChunks(io::Printer* p, bool is_split) { field_layout_.GetHasByteIndex(a) == field_layout_.GetHasByteIndex(b) && IsLikelyPresent(a, options_) == IsLikelyPresent(b, options_) && - (CanClearByZeroing(a) == CanClearByZeroing(b) || - (CanClearByZeroing(a) && (chunk_count == 1 || merge_zero_init))); + (CanClearByZeroing(a, options_) == CanClearByZeroing(b, options_) || + (CanClearByZeroing(a, options_) && + (chunk_count == 1 || merge_zero_init))); if (!same) chunk_count = 0; return same; }, @@ -3411,7 +3477,7 @@ void MessageGenerator::EmitClearChunks(io::Printer* p, bool is_split) { bool saw_non_zero_init = false; for (const auto& field : fields) { - if (CanClearByZeroing(field)) { + if (CanClearByZeroing(field, options_)) { ABSL_CHECK(!saw_non_zero_init); if (!memset_start) memset_start = field; memset_end = field; @@ -3452,7 +3518,7 @@ void MessageGenerator::EmitClearChunks(io::Printer* p, bool is_split) { // Clear all non-zero-initializable fields in the chunk. for (const auto& field : fields) { - if (CanClearByZeroing(field)) continue; + if (CanClearByZeroing(field, options_)) continue; // It's faster to just overwrite primitive types, but we should only // clear strings and messages if they were set. // @@ -4092,14 +4158,71 @@ bool MessageGenerator::RequiresArena(GeneratorFunction function, return false; } +void MessageGenerator::GenerateMergeImplForField(io::Printer* p, + const FieldDescriptor* field, + bool is_split, + int cached_has_word_index, + bool check_has_byte) { + const auto& generator = field_generators_.get(field); + + if (!field->is_required() && !HasHasbit(field, options_)) { + // Merge semantics without true field presence: primitive fields are + // merged only if non-zero (numeric) or non-empty (string). + MayEmitMutableIfNonDefaultCheck( + p, "from.", field, is_split, options_, + /*emit_body=*/ + [&]() { generator.GenerateMergingCode(p); }, + /*with_enclosing_braces_always=*/true); + PROTOBUF_IGNORE_DEPRECATION_START + } else if (field->options().weak() || + cached_has_word_index != + field_layout_.GetHasWordIndex(field).value()) { + PROTOBUF_IGNORE_DEPRECATION_STOP + // Check hasbit, not using cached bits. + auto v = p->WithVars(HasBitVars(field)); + p->Emit({{"merge_field", [&] { generator.GenerateMergingCode(p); }}}, + R"cc( + if (CheckHasBit(from.$has_bits$[$has_array_index$], $has_mask$)) { + $merge_field$; + } + )cc"); + } else { + // Check hasbit, using cached bits. + ABSL_CHECK(HasHasbit(field, options_)); + int has_bit_index = field_layout_.GetHasBitIndex(field).value(); + + p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField( + has_bit_index, field, options_)}, + {"merge_field", + [&] { + if (GetFieldHasbitMode(field, options_) == + HasbitMode::kHintHasbit) { + // Merge semantics without true field presence: primitive + // fields are merged only if non-zero (numeric) or + // non-empty (string). + MayEmitMutableIfNonDefaultCheck( + p, "from.", field, is_split, options_, + /*emit_body=*/[&]() { generator.GenerateMergingCode(p); }, + /*with_enclosing_braces_always=*/false); + } else { + ABSL_DCHECK(GetFieldHasbitMode(field, options_) == + HasbitMode::kTrueHasbit); + if (check_has_byte && IsPOD(field)) { + generator.GenerateCopyConstructorCode(p); + } else { + generator.GenerateMergingCode(p); + } + } + }}}, + R"cc( + if ($condition$) { + $merge_field$; + } + )cc"); + } +} + bool MessageGenerator::EmitMergeChunks(io::Printer* p, bool is_split) { - const auto prepare_split = [&] { - if (is_split) { - p->Emit(R"cc( - _this->PrepareSplitMessageForWrite(); - )cc"); - } - }; // cached_has_word_index maintains that: // cached_has_bits = from._has_bits_[cached_has_word_index] // for cached_has_word_index >= 0 @@ -4142,73 +4265,8 @@ bool MessageGenerator::EmitMergeChunks(io::Printer* p, bool is_split) { // Go back and emit merging code for each of the fields we processed. for (const auto* field : fields) { - const auto& generator = field_generators_.get(field); - - if (!field->is_required() && !HasHasbit(field, options_)) { - // Merge semantics without true field presence: primitive fields are - // merged only if non-zero (numeric) or non-empty (string). - MayEmitMutableIfNonDefaultCheck( - p, "from.", field, is_split, options_, - /*emit_body=*/ - [&]() { - prepare_split(); - generator.GenerateMergingCode(p); - }, - /*with_enclosing_braces_always=*/true); - PROTOBUF_IGNORE_DEPRECATION_START - } else if (field->options().weak() || - cached_has_word_index != - field_layout_.GetHasWordIndex(field).value()) { - PROTOBUF_IGNORE_DEPRECATION_STOP - // Check hasbit, not using cached bits. - auto v = p->WithVars(HasBitVars(field)); - p->Emit( - {{"merge_field", - [&] { - prepare_split(); - generator.GenerateMergingCode(p); - }}}, - R"cc( - if (CheckHasBit(from.$has_bits$[$has_array_index$], $has_mask$)) { - $merge_field$; - } - )cc"); - } else { - // Check hasbit, using cached bits. - ABSL_CHECK(HasHasbit(field, options_)); - int has_bit_index = field_layout_.GetHasBitIndex(field).value(); - - p->Emit( - {{"condition", GenerateConditionMaybeWithProbabilityForField( - has_bit_index, field, options_)}, - {"merge_field", - [&] { - prepare_split(); - if (GetFieldHasbitMode(field, options_) == - HasbitMode::kHintHasbit) { - // Merge semantics without true field presence: primitive - // fields are merged only if non-zero (numeric) or - // non-empty (string). - MayEmitMutableIfNonDefaultCheck( - p, "from.", field, is_split, options_, - /*emit_body=*/[&]() { generator.GenerateMergingCode(p); }, - /*with_enclosing_braces_always=*/false); - } else { - ABSL_DCHECK(GetFieldHasbitMode(field, options_) == - HasbitMode::kTrueHasbit); - if (check_has_byte && IsPOD(field)) { - generator.GenerateCopyConstructorCode(p); - } else { - generator.GenerateMergingCode(p); - } - } - }}}, - R"cc( - if ($condition$) { - $merge_field$; - } - )cc"); - } + GenerateMergeImplForField(p, field, is_split, cached_has_word_index, + check_has_byte); } if (check_has_byte) { @@ -4270,7 +4328,7 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { if (RequiresArena(GeneratorFunction::kMergeFrom, /* is_split= */ false)) { p->Emit(R"cc( - $pb$::Arena* arena = _this->GetArena(); + $pb$::Arena* arena [[maybe_unused]] = _this->GetArena(); )cc"); } }}, @@ -5531,6 +5589,43 @@ void MessageGenerator::GenerateIsInitialized(io::Printer* p) { } +void MessageGenerator::GenerateSplitFieldIfChain( + io::Printer* p, const SplitMap::Node& node, bool mutable_iteration, + absl::FunctionRef per_node, + absl::FunctionRef per_field) { + for (int i = 0; i < node.subnodes.size(); ++i) { + p->Emit({{"i", i}, + {"sub", + [&] { + GenerateSplitFieldIfChain(p, node.subnodes[i], + mutable_iteration, per_node, + per_field); + }}}, + R"cc( + if (auto sub = node.sub($i$), node = sub; + ABSL_PREDICT_FALSE(!sub.is_default())) { + $sub$; + } + )cc"); + } + node.ForEachField([&](auto* f, size_t) { + if (f == nullptr) return; + p->Emit({Sub{"split_field", + [&] { + if (mutable_iteration) { + p->Emit(R"cc(node.value->Mutable($split_address$))cc"); + } else { + p->Emit(R"cc(node.value->Get($split_address$))cc"); + } + }} + .WithSuffix(""), + {"per_field", [&] { per_field(f); }}}, + R"cc($per_field$)cc"); + }); + + per_node(node); +} + void MessageGenerator::GenerateSourceDefaultInstance(io::Printer* p) { if (!ShouldGenerateClass(descriptor_, options_)) return; @@ -5573,56 +5668,103 @@ void MessageGenerator::GenerateSourceDefaultInstance(io::Printer* p) { } )cc"); }}, - {"split", + {"splits", [&] { if (!ShouldSplit(descriptor_, options_)) return; + const auto& root_node = options_.split_map->RootFor(descriptor_); + p->Emit( - {{"destroy_fields", + {{"body", [&] { - for (const auto* field : field_layout_.optimized_order()) { - if (!ShouldSplit(field, options_)) continue; - field_generators_.get(field).GenerateDestructorCode(p); - } - }}, - {"get_arena_merge", - [&] { - if (RequiresArena(GeneratorFunction::kMergeFrom, - /* is_split= */ true)) { - p->Emit(R"cc( - $pb$::Arena* arena = _this->GetArena(); - )cc"); - } - }}, - {"merge_fields", - [&] { EmitMergeChunks(p, /* is_split= */ true); }}, - {"clear_fields", - [&] { EmitClearChunks(p, /* is_split= */ true); }}, - {"byte_size_fields", - [&] { EmitByteSizeChunks(p, /* is_split= */ true); }}}, + GenerateSplitFieldIfChain( + p, root_node, true, + [&](auto& node) { + p->Emit(R"cc( + delete node.value; + )cc"); + }, + [&](auto* field) { + field_generators_.get(field).GenerateDestructorCode( + p); + }); + }}}, R"cc( - PROTOBUF_NOINLINE static void DestroySplit($Msg$& this_) { - auto* const $cached_split_ptr$ = this_._impl_._split_; - $destroy_fields$; - delete $cached_split_ptr$; - } - PROTOBUF_NOINLINE static void MergeSplit($Msg$* _this, const $Msg$& from) { - $get_arena_merge$; - ::uint32_t cached_has_bits [[maybe_unused]] = 0; - $merge_fields$; + PROTOBUF_NOINLINE static void DestroySplit($Msg$& _this) { + $pbi$::BtreeSplit::NodeWithDefault node = { + _this.$split$.head(), DefaultSplit_()}; + $body$; } - PROTOBUF_NOINLINE static void ClearSplit($Msg$& this_) { - ::uint32_t cached_has_bits [[maybe_unused]] = 0; - $clear_fields$; - } - - PROTOBUF_NOINLINE static ::size_t ByteSizeSplit(const $Msg$& this_) { - ::size_t total_size = 0; + )cc"); + p->Emit({{"body", + [&] { + GenerateSplitFieldIfChain( + p, root_node, true, [](auto& node) {}, + [&](auto* field) { + field_generators_.get(field) + .GenerateMessageClearingCode(p); + }); + }}}, + R"cc( + PROTOBUF_NOINLINE static void ClearSplit($Msg$& _this) { + $pbi$::BtreeSplit::NodeWithDefault node = { + _this.$split$.head(), DefaultSplit_()}; + $body$; + } + )cc"); + p->Emit( + {{"body", + [&] { + GenerateSplitFieldIfChain( + p, root_node, false, [](auto& node) {}, + [&](auto* field) { + const absl::optional has_word_index = + field_layout_.GetHasWordIndex(field); + if (has_word_index.has_value()) { + p->Emit( + {{"cached_has_word_index", + has_word_index.value()}}, + R"cc( + cached_has_bits = + from.$has_bits$[$cached_has_word_index$]; + )cc"); + } + GenerateMergeImplForField(p, field, true, + has_word_index.value_or(-1), + false); + }); + }}}, + R"cc( + PROTOBUF_NOINLINE static void MergeSplit($Msg$* _this, + const $Msg$& from) { + $pbi$::BtreeSplit::ConstNodeWithDefault node = { + from.$split$.head(), DefaultSplit_()}; ::uint32_t cached_has_bits [[maybe_unused]] = 0; - $byte_size_fields$; - return total_size; + $pb$::Arena* arena [[maybe_unused]] = _this->GetArena(); + $body$; } )cc"); + p->Emit({{"body", + [&] { + GenerateSplitFieldIfChain( + p, root_node, false, [](auto& node) {}, + [&](auto* field) { + int cached_has_work_index = -1; + EmitUpdateByteSizeForField(field, p, + cached_has_work_index); + }); + }}}, + R"cc( + PROTOBUF_NOINLINE static ::size_t ByteSizeSplit( + const $Msg$& this_) { + ::size_t total_size = 0; + $pbi$::BtreeSplit::ConstNodeWithDefault node = { + this_.$split$.head(), DefaultSplit_()}; + ::uint32_t cached_has_bits [[maybe_unused]] = 0; + $body$; + return total_size; + } + )cc"); }}}, R"cc( class $Msg$::_Internal { @@ -5630,46 +5772,93 @@ void MessageGenerator::GenerateSourceDefaultInstance(io::Printer* p) { $has_bit$; $oneof$; $required$; - $split$; + $splits$; }; )cc"); p->Emit("\n"); - } - parse_function_generator_->GenerateParseTableHelperDefinition(p); - p->Emit("\n"); + if (ShouldSplit(descriptor_, options_)) { + const auto print_one = [&](auto* f) { + if (f == nullptr) { + p->Emit("0,"); + return; + } + p->Emit( + {{"name", f->name()}, + Sub{"value", + [&] { field_generators_.get(f).GenerateDefaultSplitValue(p); }} + .WithSuffix("")}, + R"cc( + /* $name$ */ $value$, + )cc"); + }; + const auto print_slot = [&](const auto& slot) { + if (slot.fields.index() == 0) { + print_one(std::get<0>(slot.fields)); + } else { + p->Emit("{"); + for (auto& sub : std::get<1>(slot.fields)) { + if (sub.fields.index() == 0) { + print_one(std::get<0>(sub.fields)); + } else { + p->Emit("{"); + for (auto& sub : std::get<1>(sub.fields)) { + print_one(sub.field); + } + p->Emit("},"); + } + } + p->Emit("},"); + } + }; - // Generate the split instance first because it's needed in the constexpr - // constructor. - if (ShouldSplit(descriptor_, options_)) { - // Use a union to disable the destructor of the _instance member. - // We can constant initialize, but the object will still have a non-trivial - // destructor that we need to elide. - // - // NO_DESTROY is not necessary for correctness. The empty destructor is - // enough. However, the empty destructor fails to be elided in some - // configurations (like non-opt or with certain sanitizers). NO_DESTROY is - // there just to improve performance and binary size in these builds. - p->Emit( - { - {"type", SplitDefaultInstanceType(descriptor_, options_)}, - {"name", SplitDefaultInstanceName(descriptor_, options_)}, - {"default", [&] { GenerateInitDefaultSplitInstance(p); }}, - {"class", absl::StrCat(ClassName(descriptor_), "::Impl_::Split")}, - }, - R"cc( - struct $type$ { - constexpr $type$() : _instance{$default$} {} - union { - $class$ _instance; - }; - }; + std::vector indices; + const auto& root_node = options_.split_map->RootFor(descriptor_); + absl::flat_hash_map, std::string> node_path_to_name; + int nodes_to_create = 0; - PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT$ dllexport_decl$ - PROTOBUF_ATTRIBUTE_INIT_PRIORITY1 const $type$ $name$; - )cc"); + root_node.ForEachNode(indices, [&](auto& node) { + if (!node.has_zero_init) { + node_path_to_name[indices] = + absl::StrCat("kSplitNode[", nodes_to_create++, "]"); + } + }); + + if (nodes_to_create > 0) { + p->Emit({{"count", nodes_to_create}, + {"init", + [&] { + root_node.ForEachNode(indices, [&](auto& node) { + if (node.has_zero_init) return; + p->Emit("{"); + for (int i = 0; i < node.subnodes.size(); ++i) { + indices.push_back(i); + absl::string_view name = node_path_to_name[indices]; + indices.pop_back(); + if (name.empty()) { + p->Emit(R"cc(&$pbi$::kZeroBuffer,)cc"); + } else { + p->Emit({{"sub", name}}, R"cc(&$sub$,)cc"); + } + } + for (auto& slot : node.field_slots) { + print_slot(slot); + } + p->Emit("},\n"); + }); + }}}, + R"cc( + PROTOBUF_ALIGNAS(64) + PROTOBUF_CONSTINIT const $pbi$::BtreeSplit::Node + $Msg$::Impl_::kSplitNode[$count$] = {$init$}; + )cc"); + } + } } + parse_function_generator_->GenerateParseTableHelperDefinition(p); + p->Emit("\n"); + GenerateConstexprConstructor(p); // Always generate PlacementNew_ because we might need it for different diff --git a/src/google/protobuf/compiler/cpp/message.h b/src/google/protobuf/compiler/cpp/message.h index c3157d47c94d2..e01136134fa74 100644 --- a/src/google/protobuf/compiler/cpp/message.h +++ b/src/google/protobuf/compiler/cpp/message.h @@ -78,6 +78,11 @@ class MessageGenerator { void GenerateSourceDefaultInstance(io::Printer* p); + void GenerateSplitFieldIfChain( + io::Printer* p, const SplitMap::Node& node, bool mutable_iteration, + absl::FunctionRef per_node, + absl::FunctionRef per_field); + void GenerateSchema(io::Printer* p, int offset); // Generate the field offsets array. Returns the total number of entries @@ -133,6 +138,10 @@ class MessageGenerator { void GenerateAnyMethodDefinition(io::Printer* p); void GenerateImplDefinition(io::Printer* p); void GenerateClassSpecificMergeImpl(io::Printer* p); + void GenerateMergeImplFields(io::Printer* p, bool split); + void GenerateMergeImplForField(io::Printer* p, const FieldDescriptor* field, + bool is_split, int cached_has_word_index, + bool check_has_byte); void GenerateCopyFrom(io::Printer* p); void GenerateSwap(io::Printer* p); void GenerateIsInitialized(io::Printer* p); diff --git a/src/google/protobuf/compiler/cpp/parse_function_generator.cc b/src/google/protobuf/compiler/cpp/parse_function_generator.cc index b84f47d9a7e2e..9ddff276fb316 100644 --- a/src/google/protobuf/compiler/cpp/parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/parse_function_generator.cc @@ -420,9 +420,6 @@ void ParseFunctionGenerator::GenerateParseTableHelperDefinition( case TailCallTableInfo::kSplitOffset: p->Emit("{_fl::Offset{offsetof($Msg$, _impl_._split_)}},\n"); break; - case TailCallTableInfo::kSplitSizeof: - p->Emit("{_fl::Offset{sizeof($Msg$::Impl_::Split)}},\n"); - break; case TailCallTableInfo::kSubMessageGlobals: p->Emit({{"name", QualifiedMsgGlobalsInstanceName( aux_entry.field->message_type(), options_)}}, @@ -721,9 +718,8 @@ void ParseFunctionGenerator::GenerateFieldEntries(io::Printer* p) { if (weak) { p->Emit("/* weak */ 0,"); } else if (split) { - p->Emit( - "PROTOBUF_FIELD_OFFSET($Msg$::Impl_::Split, " - "$field_name$_),"); + p->Emit({{"address", SplitBtreeAddressName(field)}}, + "Impl_::$address$.bits(),"); } else { p->Emit("PROTOBUF_FIELD_OFFSET($Msg$, $field_member_name$),"); } diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index e73bafedf56ec..03eebf6f25d2d 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -3131,19 +3131,6 @@ const FieldDescriptor* Reflection::FindKnownExtensionByNumber( // These simple template accessors obtain pointers (or references) to // the given field. -void Reflection::PrepareSplitMessageForWrite(Message* message) const { - ABSL_DCHECK_NE(message, schema_.default_instance()); - void** split = MutableSplitField(message); - const void* default_split = GetSplitField(schema_.default_instance()); - if (*split == default_split) { - uint32_t size = schema_.SizeofSplit(); - Arena* arena = message->GetArena(); - *split = (arena == nullptr) ? internal::Allocate(size) - : arena->AllocateAligned(size); - memcpy(*split, default_split, size); - } -} - template static Type* AllocIfDefault(const FieldDescriptor* field, Type*& ptr, Arena* arena) { @@ -3167,14 +3154,15 @@ void* Reflection::MutableRawSplitImpl(Message* message, ABSL_DCHECK(!schema_.InRealOneof(field)) << "Field = " << field->full_name(); const uint32_t field_offset = schema_.GetFieldOffset(field); - PrepareSplitMessageForWrite(message); - void** split = MutableSplitField(message); + auto& split = *MutableSplitField(message); + const auto& default_split = GetSplitField(schema_.default_instance()); + void* ptr = split.Mutable(internal::BtreeSplitAddress(field_offset), message, + default_split.head()); if (internal::SplitFieldHasExtraIndirection(field)) { - return AllocIfDefault(field, - *GetPointerAtOffset(*split, field_offset), + return AllocIfDefault(field, *static_cast(ptr), message->GetArena()); } - return GetPointerAtOffset(*split, field_offset); + return ptr; } const uint32_t* Reflection::GetHasBits(const Message& message) const { @@ -3767,9 +3755,11 @@ void Reflection::PopulateTcParseFieldAux( case internal::TailCallTableInfo::kSplitOffset: field_aux++->offset = schema_.SplitOffset(); break; + /* case internal::TailCallTableInfo::kSplitSizeof: field_aux++->offset = schema_.SizeofSplit(); break; + */ case internal::TailCallTableInfo::kSubTable: case internal::TailCallTableInfo::kSubMessageGlobalsWeak: case internal::TailCallTableInfo::kMessageVerifyFunc: diff --git a/src/google/protobuf/generated_message_reflection.h b/src/google/protobuf/generated_message_reflection.h index efac3b5fea799..02546c9e7252e 100644 --- a/src/google/protobuf/generated_message_reflection.h +++ b/src/google/protobuf/generated_message_reflection.h @@ -65,9 +65,9 @@ inline constexpr uint32_t kInvalidFieldOffsetTag = 0x40000000u; // Mask used on offsets for split fields. inline constexpr uint32_t kSplitFieldOffsetMask = 0x80000000u; -inline constexpr uint32_t kLazyMask = 0x1u; -inline constexpr uint32_t kInlinedMask = 0x1u; -inline constexpr uint32_t kMicroStringMask = 0x2u; +inline constexpr uint32_t kLazyMask = 0x20000000u; +inline constexpr uint32_t kInlinedMask = 0x20000000u; +inline constexpr uint32_t kMicroStringMask = 0x10000000u; // Structs that the code generator emits directly to describe a message. // These should never used directly except to build a ReflectionSchema @@ -252,18 +252,8 @@ class ReflectionSchema { // "unused" or "lazy" or "inlined"). template static uint32_t OffsetValue(uint32_t v, FieldDescriptor::Type type) { - if constexpr (!std::is_void_v) { - // If the type is passed, statically use the alignment for the mask. - // Faster than checking `type`. - return v & ~kSplitFieldOffsetMask & ~(alignof(Type) - 1); - } - if (type == FieldDescriptor::TYPE_MESSAGE || - type == FieldDescriptor::TYPE_STRING || - type == FieldDescriptor::TYPE_BYTES) { - return v & ~kSplitFieldOffsetMask & ~kInlinedMask & ~kLazyMask & - ~kMicroStringMask; - } - return v & (~kSplitFieldOffsetMask); + return v & ~kSplitFieldOffsetMask & ~kInlinedMask & ~kLazyMask & + ~kMicroStringMask; } static bool Inlined(uint32_t v, FieldDescriptor::Type type) { diff --git a/src/google/protobuf/generated_message_tctable_gen.cc b/src/google/protobuf/generated_message_tctable_gen.cc index 804ff1db62c25..203211e179658 100644 --- a/src/google/protobuf/generated_message_tctable_gen.cc +++ b/src/google/protobuf/generated_message_tctable_gen.cc @@ -910,9 +910,8 @@ TailCallTableInfo::TailCallTableInfo( if (std::any_of(ordered_fields.begin(), ordered_fields.end(), [](auto& f) { return f.should_split; })) { static_assert(kSplitOffsetAuxIdx + 1 == kSplitSizeAuxIdx, ""); - aux_entries.resize(kSplitSizeAuxIdx + 1); // Allocate our 2 slots + aux_entries.resize(kSplitSizeAuxIdx); // Allocate our 1 slots aux_entries[kSplitOffsetAuxIdx] = {kSplitOffset}; - aux_entries[kSplitSizeAuxIdx] = {kSplitSizeof}; } field_entries = BuildFieldEntries(descriptor, message_options, ordered_fields, diff --git a/src/google/protobuf/generated_message_tctable_gen.h b/src/google/protobuf/generated_message_tctable_gen.h index 7682e58160547..016812060050c 100644 --- a/src/google/protobuf/generated_message_tctable_gen.h +++ b/src/google/protobuf/generated_message_tctable_gen.h @@ -181,7 +181,6 @@ struct PROTOBUF_EXPORT TailCallTableInfo { enum AuxType { kNothing = 0, kSplitOffset, - kSplitSizeof, kSubMessageGlobals, kSubTable, kSubMessageGlobalsWeak, diff --git a/src/google/protobuf/generated_message_tctable_impl.h b/src/google/protobuf/generated_message_tctable_impl.h index 567cf4ca44a10..34071b3f08c9f 100644 --- a/src/google/protobuf/generated_message_tctable_impl.h +++ b/src/google/protobuf/generated_message_tctable_impl.h @@ -789,27 +789,35 @@ class PROTOBUF_EXPORT TcParser final { } template - static inline T& MaybeCreateRepeatedRefAt(void* x, size_t offset, - MessageLite* msg) { - if (!is_split) return RefAt(x, offset); - void*& ptr = RefAt(x, offset); - if (ptr == DefaultRawPtr()) { - ptr = Arena::Create(msg->GetArena()); + static inline T& MaybeCreateRepeatedRefAt( + MessageLite* msg, const TcParseTableBase* table, + const TcParseTableBase::FieldEntry& entry) { + void* void_ptr = MutableMaybeSplit(msg, table, entry); + if constexpr (is_split) { + RawPtr* raw = static_cast*>(void_ptr); + if (raw->IsDefault()) { + raw->Set(Arena::Create(msg->GetArena())); + } + return *raw->Get(); + } else { + return *static_cast(void_ptr); } - return *static_cast(ptr); } template static inline RepeatedField& MaybeCreateRepeatedFieldRefAt( - void* x, size_t offset, MessageLite* msg) { - return MaybeCreateRepeatedRefAt, is_split>(x, offset, msg); + MessageLite* msg, const TcParseTableBase* table, + const TcParseTableBase::FieldEntry& entry) { + return MaybeCreateRepeatedRefAt, is_split>(msg, table, + entry); } template static inline RepeatedPtrField& MaybeCreateRepeatedPtrFieldRefAt( - void* x, size_t offset, MessageLite* msg) { - return MaybeCreateRepeatedRefAt, is_split>(x, offset, - msg); + MessageLite* msg, const TcParseTableBase* table, + const TcParseTableBase::FieldEntry& entry) { + return MaybeCreateRepeatedRefAt, is_split>(msg, table, + entry); } template @@ -876,9 +884,8 @@ class PROTOBUF_EXPORT TcParser final { // Returns true if the repeated field is empty. This method is not // well-optimized, so it should only be called in debug builds. static bool RepeatedFieldIsEmptySlow( - const MessageLite* msg, const TcParseTableBase* table, - const TcParseTableBase::FieldEntry& entry, const void* base, - bool is_split); + const void* ptr, const TcParseTableBase* table, + const TcParseTableBase::FieldEntry& entry, bool is_split); template PROTOBUF_ALWAYS_INLINE PROTOBUF_CC static const char* FastMpImpl( @@ -889,8 +896,11 @@ class PROTOBUF_EXPORT TcParser final { PROTOBUF_CC static const char* FastVarintS1(PROTOBUF_TC_PARAM_DECL); friend class GeneratedTcTableLiteTest; - static void* MaybeGetSplitBase(MessageLite* msg, bool is_split, - const TcParseTableBase* table); + + template + static void* MutableMaybeSplit(MessageLite* msg, + const TcParseTableBase* table, + const TcParseTableBase::FieldEntry& entry); // Test only access to verify that the right function is being called via // MiniParse. diff --git a/src/google/protobuf/generated_message_tctable_lite.cc b/src/google/protobuf/generated_message_tctable_lite.cc index 7e69489028791..8d85fe4dd7590 100644 --- a/src/google/protobuf/generated_message_tctable_lite.cc +++ b/src/google/protobuf/generated_message_tctable_lite.cc @@ -27,6 +27,7 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "google/protobuf/arenastring.h" +#include "google/protobuf/btree_split.h" #include "google/protobuf/generated_enum_util.h" #include "google/protobuf/generated_message_tctable_decl.h" #include "google/protobuf/generated_message_tctable_impl.h" @@ -112,19 +113,27 @@ absl::Status TcParser::VerifyHasBitConsistency(const MessageLite* msg, continue; } const bool has_bit = ReadHas(entry, msg); - const void* base = msg; - const void* default_base = table->default_instance(); + const void* field; + const void* default_field; + const bool is_split = (entry.type_card & field_layout::kSplitMask) == field_layout::kSplitTrue; + if (is_split) { const size_t offset = table->field_aux(kSplitOffsetAuxIdx)->offset; - base = TcParser::RefAt(base, offset); - default_base = TcParser::RefAt(default_base, offset); + field = TcParser::RefAt(msg, offset) + .Get(BtreeSplitAddress(entry.offset)); + default_field = + TcParser::RefAt(table->default_instance(), offset) + .Get(BtreeSplitAddress(entry.offset)); + } else { + field = &RefAt(msg, entry.offset); + default_field = &RefAt(table->default_instance(), entry.offset); } if (cardinality == fl::kFcRepeated) { if (!has_bit && - !RepeatedFieldIsEmptySlow(msg, table, entry, base, is_split)) { + !RepeatedFieldIsEmptySlow(field, table, entry, is_split)) { return make_error_status(); } continue; @@ -137,20 +146,20 @@ absl::Status TcParser::VerifyHasBitConsistency(const MessageLite* msg, if (has_bit) break; switch (entry.type_card & fl::kRepMask) { case fl::kRep8Bits: - if (RefAt(base, entry.offset) != - RefAt(default_base, entry.offset)) { + if (*static_cast(field) != + *static_cast(default_field)) { return make_error_status(); } break; case fl::kRep32Bits: - if (RefAt(base, entry.offset) != - RefAt(default_base, entry.offset)) { + if (*static_cast(field) != + *static_cast(default_field)) { return make_error_status(); } break; case fl::kRep64Bits: - if (RefAt(base, entry.offset) != - RefAt(default_base, entry.offset)) { + if (*static_cast(field) != + *static_cast(default_field)) { return make_error_status(); } break; @@ -162,7 +171,7 @@ absl::Status TcParser::VerifyHasBitConsistency(const MessageLite* msg, case field_layout::kRepAString: // Must not point to the default if the has bit is on. if (has_bit && - RefAt(base, entry.offset).IsDefault()) { + static_cast(field)->IsDefault()) { return make_error_status(); } else { // We should technically check that the value matches the default @@ -172,16 +181,18 @@ absl::Status TcParser::VerifyHasBitConsistency(const MessageLite* msg, break; case field_layout::kRepCord: // If the has bit is off, it must match the default. - if (!has_bit && (RefAt(base, entry.offset) != - RefAt(default_base, entry.offset))) { + if (!has_bit && (*static_cast(field) != + *static_cast(default_field))) { return make_error_status(); } break; case field_layout::kRepIString: // If the has bit is off, it must match the default. if (!has_bit && - (RefAt(base, entry.offset).Get() != - RefAt(default_base, entry.offset).Get())) { + (RefAt(msg, entry.offset).Get() != + RefAt(table->default_instance(), + entry.offset) + .Get())) { return make_error_status(); } break; @@ -196,7 +207,7 @@ absl::Status TcParser::VerifyHasBitConsistency(const MessageLite* msg, // Note: An off has_bit does not imply a null pointer. We might have // a previous instance that we cached. if (has_bit && - RefAt(base, entry.offset) == nullptr) { + *static_cast(field) == nullptr) { return make_error_status(); } break; @@ -219,12 +230,16 @@ void TcParser::CheckHasBitConsistency(const MessageLite* msg, ABSL_CHECK_OK(VerifyHasBitConsistency(msg, table)); } -bool TcParser::RepeatedFieldIsEmptySlow(const MessageLite* msg, +bool TcParser::RepeatedFieldIsEmptySlow(const void* ptr, const TcParseTableBase* table, const FieldEntry& entry, - const void* base, bool is_split) { + bool is_split) { namespace fl = internal::field_layout; + if (is_split) { + ptr = *static_cast(ptr); + } + switch (entry.type_card & fl::kFkMask) { case fl::kFkVarint: case fl::kFkPackedVarint: @@ -232,22 +247,13 @@ bool TcParser::RepeatedFieldIsEmptySlow(const MessageLite* msg, case fl::kFkPackedFixed: switch (entry.type_card & fl::kRepMask) { case fl::kRep8Bits: { - const auto& repeated_field = - GetRepeatedFieldAt>(base, entry.offset, - msg, is_split); - return repeated_field.empty(); + return static_cast*>(ptr)->empty(); } case fl::kRep32Bits: { - const auto& repeated_field = - GetRepeatedFieldAt>(base, entry.offset, - msg, is_split); - return repeated_field.empty(); + return static_cast*>(ptr)->empty(); } case fl::kRep64Bits: { - const auto& repeated_field = - GetRepeatedFieldAt>(base, entry.offset, - msg, is_split); - return repeated_field.empty(); + return static_cast*>(ptr)->empty(); } default: Unreachable(); @@ -264,10 +270,7 @@ bool TcParser::RepeatedFieldIsEmptySlow(const MessageLite* msg, Unreachable(); } case fl::kRepCord: { - const auto& repeated_field = - GetRepeatedFieldAt>(base, entry.offset, - msg, is_split); - return repeated_field.empty(); + return static_cast*>(ptr)->empty(); } default: Unreachable(); @@ -275,17 +278,15 @@ bool TcParser::RepeatedFieldIsEmptySlow(const MessageLite* msg, ABSL_FALLTHROUGH_INTENDED; } case fl::kFkMessage: { - const auto& repeated_field = GetRepeatedFieldAt( - base, entry.offset, msg, is_split); - return repeated_field.empty(); + return static_cast(ptr)->empty(); } case fl::kFkMap: { const auto* aux = table->field_aux(&entry); const auto map_info = aux[0].map_info; const UntypedMapBase& map_field = map_info.use_lite - ? RefAt(base, entry.offset) - : RefAt(base, entry.offset).GetMap(); + ? *static_cast(ptr) + : static_cast(ptr)->GetMap(); return map_field.empty(); } default: @@ -507,7 +508,7 @@ constexpr TailCallParseFunc TcParser::kMiniParseTable[] = { &MpPackedFixed, // kSplitMask | FieldKind::kFkPackedFixed &MpString, // kSplitMask | FieldKind::kFkString &MpMessage, // kSplitMask | FieldKind::kFkMessage - &MpMap, // kSplitMask | FieldKind::kFkMap + &Error, // kSplitMask | FieldKind::kFkMap }; // We have a constexpr variable for this to workaround issues with ASSUME and @@ -2099,25 +2100,24 @@ uint32_t GetSizeofSplit(const TcParseTableBase* table) { } } // namespace -void* TcParser::MaybeGetSplitBase(MessageLite* msg, const bool is_split, - const TcParseTableBase* table) { - void* out = msg; - if (is_split) { - const uint32_t split_offset = GetSplitOffset(table); - void* default_split = - TcParser::RefAt(table->default_instance(), split_offset); - void*& split = TcParser::RefAt(msg, split_offset); - if (split == default_split) { - // Allocate split instance when needed. - uint32_t size = GetSizeofSplit(table); - Arena* arena = msg->GetArena(); - split = - (arena == nullptr) ? Allocate(size) : arena->AllocateAligned(size); - memcpy(split, default_split, size); - } - out = split; +template +PROTOBUF_ALWAYS_INLINE void* TcParser::MutableMaybeSplit( + MessageLite* msg, const TcParseTableBase* table, + const TcParseTableBase::FieldEntry& entry) { + if constexpr (is_split) { + const size_t offset = table->field_aux(kSplitOffsetAuxIdx)->offset; +#if BTREE_SPLIT_USE_TAGS + return TcParser::RefAt(msg, offset) + .Mutable(BtreeSplitAddress(entry.offset), msg); +#else + const auto& default_split = + TcParser::RefAt(table->default_instance(), offset); + return TcParser::RefAt(msg, offset) + .Mutable(BtreeSplitAddress(entry.offset), msg, default_split.head()); +#endif + } else { + return &RefAt(msg, entry.offset); } - return out; } template @@ -2150,13 +2150,13 @@ PROTOBUF_NOINLINE const char* TcParser::MpFixed(PROTOBUF_TC_PARAM_DECL) { ChangeOneof(table, /*class_data=*/nullptr, entry, data.tag() >> 3, ctx, msg); } - void* const base = MaybeGetSplitBase(msg, is_split, table); + void* void_field = MutableMaybeSplit(msg, table, entry); // Copy the value: if (rep == field_layout::kRep64Bits) { - RefAt(base, entry.offset) = UnalignedLoad(ptr); + *static_cast(void_field) = UnalignedLoad(ptr); ptr += sizeof(uint64_t); } else { - RefAt(base, entry.offset) = UnalignedLoad(ptr); + *static_cast(void_field) = UnalignedLoad(ptr); ptr += sizeof(uint32_t); } PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); @@ -2176,7 +2176,6 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed( SetHasForRepeated(entry, msg); - void* const base = MaybeGetSplitBase(msg, is_split, table); const uint16_t type_card = entry.type_card; const uint16_t rep = type_card & field_layout::kRepMask; Arena* arena = msg->GetArena(); @@ -2184,8 +2183,8 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed( if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED64) { PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS); } - auto& field = MaybeCreateRepeatedFieldRefAt( - base, entry.offset, msg); + auto& field = + MaybeCreateRepeatedFieldRefAt(msg, table, entry); constexpr auto size = sizeof(uint64_t); const char* ptr2 = ptr; uint32_t next_tag; @@ -2202,8 +2201,8 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedFixed( if (decoded_wiretype != WireFormatLite::WIRETYPE_FIXED32) { PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS); } - auto& field = MaybeCreateRepeatedFieldRefAt( - base, entry.offset, msg); + auto& field = + MaybeCreateRepeatedFieldRefAt(msg, table, entry); constexpr auto size = sizeof(uint32_t); const char* ptr2 = ptr; uint32_t next_tag; @@ -2237,18 +2236,17 @@ PROTOBUF_NOINLINE const char* TcParser::MpPackedFixed(PROTOBUF_TC_PARAM_DECL) { SetHasForRepeated(entry, msg); - void* const base = MaybeGetSplitBase(msg, is_split, table); int size = ReadSize(&ptr); uint16_t rep = type_card & field_layout::kRepMask; Arena* arena = msg->GetArena(); if (rep == field_layout::kRep64Bits) { - auto& field = MaybeCreateRepeatedFieldRefAt( - base, entry.offset, msg); + auto& field = + MaybeCreateRepeatedFieldRefAt(msg, table, entry); ptr = ctx->ReadPackedFixed(ptr, arena, size, &field); } else { ABSL_DCHECK_EQ(rep, static_cast(field_layout::kRep32Bits)); - auto& field = MaybeCreateRepeatedFieldRefAt( - base, entry.offset, msg); + auto& field = + MaybeCreateRepeatedFieldRefAt(msg, table, entry); ptr = ctx->ReadPackedFixed(ptr, arena, size, &field); } @@ -2310,14 +2308,14 @@ PROTOBUF_NOINLINE const char* TcParser::MpVarint(PROTOBUF_TC_PARAM_DECL) { msg); } - void* const base = MaybeGetSplitBase(msg, is_split, table); + void* void_field = MutableMaybeSplit(msg, table, entry); if (rep == field_layout::kRep64Bits) { - RefAt(base, entry.offset) = tmp; + *static_cast(void_field) = tmp; } else if (rep == field_layout::kRep32Bits) { - RefAt(base, entry.offset) = static_cast(tmp); + *static_cast(void_field) = static_cast(tmp); } else { ABSL_DCHECK_EQ(rep, static_cast(field_layout::kRep8Bits)); - RefAt(base, entry.offset) = static_cast(tmp); + *static_cast(void_field) = static_cast(tmp); } PROTOBUF_MUSTTAIL return ToTagDispatch(PROTOBUF_TC_PARAM_NO_DATA_PASS); @@ -2336,9 +2334,8 @@ const char* TcParser::MpRepeatedVarintT(PROTOBUF_TC_PARAM_DECL) { const char* ptr2 = ptr; uint32_t next_tag; - void* const base = MaybeGetSplitBase(msg, is_split, table); - auto& field = MaybeCreateRepeatedFieldRefAt( - base, entry.offset, msg); + auto& field = + MaybeCreateRepeatedFieldRefAt(msg, table, entry); Arena* arena = msg->GetArena(); TcParseTableBase::FieldAux aux; @@ -2446,9 +2443,8 @@ const char* TcParser::MpPackedVarintT(PROTOBUF_TC_PARAM_DECL) { const bool is_zigzag = xform_val == field_layout::kTvZigZag; const bool is_validated_enum = xform_val & field_layout::kTvEnum; - void* const base = MaybeGetSplitBase(msg, is_split, table); - auto* field = &MaybeCreateRepeatedFieldRefAt( - base, entry.offset, msg); + auto* field = + &MaybeCreateRepeatedFieldRefAt(msg, table, entry); Arena* arena = msg->GetArena(); if (is_validated_enum) { @@ -2585,10 +2581,10 @@ PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) { } bool is_valid = false; - void* const base = MaybeGetSplitBase(msg, is_split, table); + void* void_field = MutableMaybeSplit(msg, table, entry); switch (rep) { case field_layout::kRepAString: { - auto& field = RefAt(base, entry.offset); + auto& field = *static_cast(void_field); Arena* arena = msg->GetArena(); if (arena) { ptr = ctx->ReadArenaString(ptr, &field, arena); @@ -2605,7 +2601,7 @@ PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) { } case field_layout::kRepMString: { - auto& field = RefAt(base, entry.offset); + auto& field = *static_cast(void_field); ptr = ctx->ReadMicroString(ptr, field, entry.aux_idx, msg->GetArena()); is_valid = MpVerifyUtf8(field.Get(), table, entry, xform_val); break; @@ -2615,9 +2611,9 @@ PROTOBUF_NOINLINE const char* TcParser::MpString(PROTOBUF_TC_PARAM_DECL) { case field_layout::kRepCord: { absl::Cord* field; if (is_oneof) { - field = RefAt(msg, entry.offset); + field = *static_cast(void_field); } else { - field = &RefAt(base, entry.offset); + field = static_cast(void_field); } ptr = InlineCordParser(field, ptr, ctx); if (!ptr) break; @@ -2664,12 +2660,11 @@ PROTOBUF_NOINLINE const char* TcParser::MpRepeatedString( const uint16_t rep = type_card & field_layout::kRepMask; const uint16_t xform_val = type_card & field_layout::kTvMask; - void* const base = MaybeGetSplitBase(msg, is_split, table); auto* arena = msg->GetArena(); switch (rep) { case field_layout::kRepSString: { auto& field = MaybeCreateRepeatedPtrFieldRefAt( - base, entry.offset, msg); + msg, table, entry); const char* ptr2 = ptr; uint32_t next_tag; @@ -2778,8 +2773,8 @@ PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) { SyncHasbits(msg, hasbits, table); - void* const base = MaybeGetSplitBase(msg, is_split, table); - MessageLite*& field = RefAt(base, entry.offset); + void* void_field = MutableMaybeSplit(msg, table, entry); + MessageLite*& field = *static_cast(void_field); if (field == nullptr) { field = NewMessage(class_data, msg->GetArena()); } @@ -2814,10 +2809,9 @@ const char* TcParser::MpRepeatedMessageOrGroup(PROTOBUF_TC_PARAM_DECL) { } } - void* const base = MaybeGetSplitBase(msg, is_split, table); RepeatedPtrFieldBase& field = - MaybeCreateRepeatedRefAt( - base, entry.offset, msg); + MaybeCreateRepeatedRefAt(msg, table, + entry); ABSL_DCHECK_EQ(field.GetArena(), msg->GetArena()); const auto aux = *table->field_aux(&entry); // Captured structured bindings are a C++20 feature. @@ -3057,11 +3051,11 @@ PROTOBUF_NOINLINE const char* TcParser::MpMap(PROTOBUF_TC_PARAM_DECL) { // Otherwise, it points into a MapField and we must synchronize with // reflection. It is done by calling the MutableMap() virtual function on the // field's base class. - void* const base = MaybeGetSplitBase(msg, is_split, table); + static_assert(!is_split); UntypedMapBase& map = map_info.use_lite - ? RefAt(base, entry.offset) - : *RefAt(base, entry.offset).MutableMap(); + ? RefAt(msg, entry.offset) + : *RefAt(msg, entry.offset).MutableMap(); SetHasForRepeated(entry, msg); diff --git a/src/google/protobuf/message.h b/src/google/protobuf/message.h index 1f454e7efac35..b3cdb10254332 100644 --- a/src/google/protobuf/message.h +++ b/src/google/protobuf/message.h @@ -111,6 +111,7 @@ #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "google/protobuf/arena.h" +#include "google/protobuf/btree_split.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/generated_message_reflection.h" #include "google/protobuf/generated_message_tctable_decl.h" @@ -1399,12 +1400,10 @@ class PROTOBUF_EXPORT Reflection final { const FieldDescriptor* field) const; // Returns the `_split_` pointer. Requires: IsSplit() == true. - inline const void* GetSplitField(const Message* message) const; + inline const internal::BtreeSplit& GetSplitField( + const Message* message) const; // Returns the address of the `_split_` pointer. Requires: IsSplit() == true. - inline void** MutableSplitField(Message* message) const; - - // Allocate the split instance if needed. - void PrepareSplitMessageForWrite(Message* message) const; + inline internal::BtreeSplit* MutableSplitField(Message* message) const; // Shallow-swap fields listed in fields vector of two messages. It is the // caller's responsibility to make sure shallow swap is safe. @@ -1786,15 +1785,18 @@ bool Reflection::HasOneofField(const Message& message, static_cast(field->number())); } -const void* Reflection::GetSplitField(const Message* message) const { +const internal::BtreeSplit& Reflection::GetSplitField( + const Message* message) const { ABSL_DCHECK(schema_.IsSplit()); - return *internal::GetConstPointerAtOffset(message, - schema_.SplitOffset()); + return *internal::GetConstPointerAtOffset( + message, schema_.SplitOffset()); } -void** Reflection::MutableSplitField(Message* message) const { +internal::BtreeSplit* Reflection::MutableSplitField(Message* message) const { + ABSL_DCHECK_NE(message, schema_.default_instance()); ABSL_DCHECK(schema_.IsSplit()); - return internal::GetPointerAtOffset(message, schema_.SplitOffset()); + return internal::GetPointerAtOffset( + message, schema_.SplitOffset()); } namespace internal { @@ -1962,11 +1964,12 @@ const Type& Reflection::GetRaw(const Message& message, ABSL_DCHECK(!schema_.InRealOneof(field)) << "Field = " << field->full_name(); - const void* split = GetSplitField(&message); + const auto& split = GetSplitField(&message); + const void* ptr = split.Get(internal::BtreeSplitAddress(field_offset)); if (internal::SplitFieldHasExtraIndirectionStatic(field)) { - return **internal::GetConstPointerAtOffset(split, field_offset); + return **static_cast(ptr); } - return *internal::GetConstPointerAtOffset(split, field_offset); + return *static_cast(ptr); } return internal::GetConstRefAtOffset(message, field_offset); } diff --git a/src/google/protobuf/message_lite.cc b/src/google/protobuf/message_lite.cc index 378e84997d7f4..a53f49a149218 100644 --- a/src/google/protobuf/message_lite.cc +++ b/src/google/protobuf/message_lite.cc @@ -499,7 +499,8 @@ inline uint8_t* SerializeToArrayImpl(const MessageLite& msg, uint8_t* target, target, size, io::CodedOutputStream::IsDefaultSerializationDeterministic()); uint8_t* res = msg._InternalSerialize(target, &out); - ABSL_DCHECK(target + size == res); + ABSL_DCHECK(target + size == res) + << "res-target=" << (res - target) << " size=" << size; return res; } } diff --git a/src/google/protobuf/message_lite.h b/src/google/protobuf/message_lite.h index 96d5b03564c1d..6f1496f66d0e3 100644 --- a/src/google/protobuf/message_lite.h +++ b/src/google/protobuf/message_lite.h @@ -306,6 +306,7 @@ struct TcParseTableBase; class WireFormatLite; class WeakFieldMap; class RustMapHelper; +class BtreeSplit; // We compute sizes as size_t but cache them as int. This function converts a // computed size to a cached size. Since we don't proceed with serialization @@ -1367,7 +1368,7 @@ class PROTOBUF_EXPORT MessageLite { friend class internal::WeakFieldMap; friend class internal::WireFormatLite; friend class internal::RustMapHelper; - + friend internal::BtreeSplit; template friend class Arena::InternalHelper; diff --git a/src/google/protobuf/port.h b/src/google/protobuf/port.h index 325821c7d4f78..fcd443d7ac556 100644 --- a/src/google/protobuf/port.h +++ b/src/google/protobuf/port.h @@ -181,6 +181,11 @@ inline void SizedArrayDelete(void* p, size_t size) { #endif } +constexpr size_t RoundUpTo(size_t n, size_t r) { + assert((r & (r - 1)) == 0 && "Must be power of two"); + return (n + (r - 1)) & ~(r - 1); +} + // Tag type used to invoke the constinit constructor overload of classes // such as ArenaStringPtr and MapFieldBase. Such constructors are internal // implementation details of the library. @@ -296,6 +301,8 @@ inline constexpr bool ForceEagerlyVerifiedLazyInProtoc() { } inline constexpr bool ForceSplitFieldsInProtoc() { + // DO NOT SUBMIT + return true; #if defined(PROTOBUF_FORCE_SPLIT) return true; #else diff --git a/src/google/protobuf/raw_ptr.h b/src/google/protobuf/raw_ptr.h index 9ea01bfb08151..f682ef8e455b7 100644 --- a/src/google/protobuf/raw_ptr.h +++ b/src/google/protobuf/raw_ptr.h @@ -28,6 +28,8 @@ PROTOBUF_EXPORT ABSL_CACHELINE_ALIGNED extern const char template class RawPtr { public: + using value_type = T; + constexpr RawPtr() : RawPtr(kZeroBuffer) { static_assert(sizeof(T) <= sizeof(kZeroBuffer), ""); static_assert(alignof(T) <= ABSL_CACHELINE_SIZE, ""); diff --git a/src/google/protobuf/reflection_ops_unittest.cc b/src/google/protobuf/reflection_ops_unittest.cc index 0fa4280637e21..8c7b6d07b4203 100644 --- a/src/google/protobuf/reflection_ops_unittest.cc +++ b/src/google/protobuf/reflection_ops_unittest.cc @@ -21,6 +21,7 @@ #include "absl/strings/str_join.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/generated_message_util.h" +#include "google/protobuf/port.h" #include "google/protobuf/test_util.h" #include "google/protobuf/unittest.pb.h" #include "google/protobuf/unittest_import.pb.h" @@ -193,14 +194,16 @@ TEST(ReflectionOpsTest, Clear) { // Check that getting embedded messages returns the objects created during // SetAllFields() rather than default instances. - EXPECT_NE(&unittest::TestAllTypes::OptionalGroup::default_instance(), - &message.optionalgroup()); - EXPECT_NE(&unittest::TestAllTypes::NestedMessage::default_instance(), - &message.optional_nested_message()); - EXPECT_NE(&unittest::ForeignMessage::default_instance(), - &message.optional_foreign_message()); - EXPECT_NE(&unittest_import::ImportMessage::default_instance(), - &message.optional_import_message()); + if constexpr (!internal::ForceSplitFieldsInProtoc()) { + EXPECT_NE(&unittest::TestAllTypes::OptionalGroup::default_instance(), + &message.optionalgroup()); + EXPECT_NE(&unittest::TestAllTypes::NestedMessage::default_instance(), + &message.optional_nested_message()); + EXPECT_NE(&unittest::ForeignMessage::default_instance(), + &message.optional_foreign_message()); + EXPECT_NE(&unittest_import::ImportMessage::default_instance(), + &message.optional_import_message()); + } } TEST(ReflectionOpsTest, ClearExtensions) {