Skip to content

Commit f0f2f3d

Browse files
committed
refactor for comments.
1 parent c23f106 commit f0f2f3d

File tree

6 files changed

+97
-77
lines changed

6 files changed

+97
-77
lines changed

Diff for: src/storage/redis_metadata.cc

+5-5
Original file line numberDiff line numberDiff line change
@@ -501,8 +501,8 @@ rocksdb::Status HyperLogLogMetadata::Decode(Slice *input) {
501501

502502
void TDigestMetadata::Encode(std::string *dst) const {
503503
Metadata::Encode(dst);
504-
PutFixed64(dst, compression);
505-
PutFixed64(dst, capacity);
504+
PutFixed32(dst, compression);
505+
PutFixed32(dst, capacity);
506506
PutFixed64(dst, unmerged_nodes);
507507
PutFixed64(dst, merged_nodes);
508508
PutFixed64(dst, total_weight);
@@ -518,12 +518,12 @@ rocksdb::Status TDigestMetadata::Decode(Slice *input) {
518518
return s;
519519
}
520520

521-
if (input->size() < (sizeof(uint64_t) * 8 + sizeof(double) * 2)) {
521+
if (input->size() < (sizeof(uint32_t) * 2 + sizeof(uint64_t) * 6 + sizeof(double) * 2)) {
522522
return rocksdb::Status::InvalidArgument(kErrMetadataTooShort);
523523
}
524524

525-
GetFixed64(input, &compression);
526-
GetFixed64(input, &capacity);
525+
GetFixed32(input, &compression);
526+
GetFixed32(input, &capacity);
527527
GetFixed64(input, &unmerged_nodes);
528528
GetFixed64(input, &merged_nodes);
529529
GetFixed64(input, &total_weight);

Diff for: src/storage/redis_metadata.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -341,18 +341,18 @@ class HyperLogLogMetadata : public Metadata {
341341

342342
class TDigestMetadata : public Metadata {
343343
public:
344-
uint64_t compression;
345-
uint64_t capacity;
344+
uint32_t compression;
345+
uint32_t capacity;
346346
uint64_t unmerged_nodes = 0;
347347
uint64_t merged_nodes = 0;
348348
uint64_t total_weight = 0;
349349
uint64_t merged_weight = 0;
350-
double minimum = std::numeric_limits<double>::infinity();
351-
double maximum = -1 * std::numeric_limits<double>::infinity();
350+
double minimum = std::numeric_limits<double>::max();
351+
double maximum = std::numeric_limits<double>::lowest();
352352
uint64_t total_observations = 0;
353353
uint64_t merge_times = 0;
354354

355-
explicit TDigestMetadata(uint64_t compression, uint64_t capacity, bool generate_version = true)
355+
explicit TDigestMetadata(uint32_t compression, uint32_t capacity, bool generate_version = true)
356356
: Metadata(kRedisTDigest, generate_version), compression(compression), capacity(capacity) {}
357357
explicit TDigestMetadata(bool generate_version = true) : TDigestMetadata(0, 0, generate_version) {}
358358
void Encode(std::string *dst) const override;

Diff for: src/types/redis_tdigest.cc

+20-11
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,12 @@ class DummyCentroids {
9090
};
9191

9292
std::unique_ptr<Iterator> Begin() { return std::make_unique<Iterator>(centroids_.cbegin(), centroids_); }
93-
std::unique_ptr<Iterator> End() { return std::make_unique<Iterator>(centroids_.cend(), centroids_); }
93+
std::unique_ptr<Iterator> End() {
94+
if (centroids_.empty()) {
95+
return std::make_unique<Iterator>(centroids_.cend(), centroids_);
96+
}
97+
return std::make_unique<Iterator>(std::prev(centroids_.cend()), centroids_);
98+
}
9499
double TotalWeight() const { return static_cast<double>(meta_data_.total_weight); }
95100
double Min() const { return meta_data_.minimum; }
96101
double Max() const { return meta_data_.maximum; }
@@ -101,10 +106,15 @@ class DummyCentroids {
101106
const std::vector<Centroid>& centroids_;
102107
};
103108

104-
uint64_t constexpr kMaxElements = 1 * 1024; // 1k doubles
109+
uint32_t constexpr kMaxElements = 1 * 1024; // 1k doubles
110+
uint32_t constexpr kMaxCompression = 1000; // limit the compression to 1k
105111

106112
std::optional<rocksdb::Status> TDigest::Create(engine::Context& ctx, const Slice& digest_name,
107113
const TDigestCreateOptions& options) {
114+
if (options.compression > kMaxCompression) {
115+
return rocksdb::Status::InvalidArgument(fmt::format("compression should be less than {}", kMaxCompression));
116+
}
117+
108118
auto ns_key = AppendNamespacePrefix(digest_name);
109119
auto capacity = options.compression * 6 + 10;
110120
capacity = ((capacity < kMaxElements) ? capacity : kMaxElements);
@@ -207,14 +217,14 @@ rocksdb::Status TDigest::Quantile(engine::Context& ctx, const Slice& digest_name
207217
}
208218

209219
std::vector<Centroid> centroids;
210-
if (auto status = dumpCentroidsAndBuffer(ctx, ns_key, metadata, &centroids); !status.ok()) {
220+
if (auto status = dumpCentroids(ctx, ns_key, metadata, &centroids); !status.ok()) {
211221
return status;
212222
}
213223

214224
auto dump_centroids = DummyCentroids(metadata, centroids);
215225

216226
for (auto q : qs) {
217-
auto status_or_value = TDigestQuantile(dump_centroids, q, Lerp);
227+
auto status_or_value = TDigestQuantile(dump_centroids, q);
218228
if (!status_or_value) {
219229
return rocksdb::Status::InvalidArgument(status_or_value.Msg());
220230
}
@@ -345,8 +355,8 @@ rocksdb::Status TDigest::dumpCentroidsAndBuffer(engine::Context& ctx, const std:
345355
for (uint64_t i = 0; i < metadata.unmerged_nodes; ++i) {
346356
double tmp_value = std::numeric_limits<double>::quiet_NaN();
347357
if (!GetDouble(&buffer_slice, &tmp_value)) {
348-
return rocksdb::Status::Corruption(
349-
fmt::format("metadata has {} records, but get {} failed", metadata.unmerged_nodes, i));
358+
LOG(ERROR) << "metadata has " << metadata.unmerged_nodes << " records, but get " << i << " failed";
359+
return rocksdb::Status::Corruption("corrupted tdigest buffer value");
350360
}
351361
buffer->emplace_back(tmp_value);
352362
}
@@ -378,11 +388,10 @@ rocksdb::Status TDigest::dumpCentroidsAndBuffer(engine::Context& ctx, const std:
378388
return status;
379389
}
380390
centroids->emplace_back(centroid);
381-
}
382-
383-
if (clean_after_dump_batch != nullptr) {
384-
if (auto status = (*clean_after_dump_batch)->DeleteRange(cf_handle_, start_key, guard_key); !status.ok()) {
385-
return status;
391+
if (clean_after_dump_batch != nullptr) {
392+
if (auto status = (*clean_after_dump_batch)->Delete(cf_handle_, iter->key()); !status.ok()) {
393+
return status;
394+
}
386395
}
387396
}
388397

Diff for: src/types/redis_tdigest.h

+22-12
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,14 @@ struct CentroidWithKey {
3838
};
3939

4040
struct TDigestCreateOptions {
41-
uint64_t compression;
41+
uint32_t compression;
4242
};
4343

44-
struct TDigestMergeOptions {};
45-
46-
struct TDigestCDFResult {};
47-
4844
struct TDigestQuantitleResult {
4945
std::vector<double> quantiles;
5046
};
5147

52-
class RedisTDigestTest;
5348
class TDigest : public SubKeyScanner {
54-
friend class RedisTDigestTest;
55-
5649
public:
5750
using Slice = rocksdb::Slice;
5851
explicit TDigest(engine::Storage* storage, const std::string& ns)
@@ -73,10 +66,27 @@ class TDigest : public SubKeyScanner {
7366
rocksdb::Status appendBuffer(engine::Context& ctx, ObserverOrUniquePtr<rocksdb::WriteBatchBase>& batch,
7467
const std::string& ns_key, const std::vector<double>& inputs, TDigestMetadata* metadata);
7568

76-
rocksdb::Status dumpCentroidsAndBuffer(
77-
engine::Context& ctx, const std::string& ns_key, const TDigestMetadata& metadata,
78-
std::vector<Centroid>* centroids, std::vector<double>* buffer = nullptr,
79-
ObserverOrUniquePtr<rocksdb::WriteBatchBase>* clean_after_dump_batch = nullptr);
69+
rocksdb::Status dumpCentroids(engine::Context& ctx, const std::string& ns_key, const TDigestMetadata& metadata,
70+
std::vector<Centroid>* centroids) {
71+
return dumpCentroidsAndBuffer(ctx, ns_key, metadata, centroids, nullptr, nullptr);
72+
}
73+
74+
/**
75+
* @brief Dumps the centroids and buffer of the t-digest.
76+
*
77+
* This function reads the centroids and buffer from persistent storage and removes them from the storage.
78+
* @param ctx The context of the operation.
79+
* @param ns_key The namespace key of the t-digest.
80+
* @param metadata The metadata of the t-digest.
81+
* @param centroids The output vector to store the centroids.
82+
* @param buffer The output vector to store the buffer. If it is nullptr, the buffer will not be read.
83+
* @param clean_after_dump_batch The write batch to store the clean operations. If it is nullptr, the clean operations
84+
* @return rocksdb::Status
85+
*/
86+
rocksdb::Status dumpCentroidsAndBuffer(engine::Context& ctx, const std::string& ns_key,
87+
const TDigestMetadata& metadata, std::vector<Centroid>* centroids,
88+
std::vector<double>* buffer,
89+
ObserverOrUniquePtr<rocksdb::WriteBatchBase>* clean_after_dump_batch);
8090
rocksdb::Status applyNewCentroids(ObserverOrUniquePtr<rocksdb::WriteBatchBase>& batch, const std::string& ns_key,
8191
const TDigestMetadata& metadata, const std::vector<Centroid>& centroids);
8292

Diff for: src/types/tdigest.cc

+33-35
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@ refer to https://github.com/apache/arrow/blob/27bbd593625122a4a25d9471c8aaf5df54
3030

3131
#include <algorithm>
3232
#include <iterator>
33-
#include <memory>
3433
#include <queue>
3534

36-
#include "status.h"
35+
#include "common/status.h"
3736

3837
namespace {
3938
// scale function K0: linear function, as baseline
@@ -57,26 +56,6 @@ struct ScalerK1 {
5756
};
5857
} // namespace
5958

60-
class TDigest {
61-
public:
62-
explicit TDigest(uint64_t delta);
63-
64-
TDigest(const TDigest&) = delete;
65-
TDigest& operator=(const TDigest&) = delete;
66-
TDigest(TDigest&& rhs) = default;
67-
~TDigest() = default;
68-
69-
void Merge(const std::vector<TDigest>& others);
70-
void Add(const std::vector<double>& items);
71-
void Reset(const CentroidsWithDelta& centroid_list);
72-
void Reset();
73-
CentroidsWithDelta DumpCentroids() const;
74-
75-
private:
76-
class TDigestImpl;
77-
std::unique_ptr<TDigestImpl> impl_;
78-
};
79-
8059
template <typename T = ScalerK1>
8160
class TDigestMerger : private T {
8261
public:
@@ -135,7 +114,7 @@ class TDigestMerger : private T {
135114
std::vector<Centroid>* tdigest_;
136115
};
137116

138-
class TDigest::TDigestImpl {
117+
class TDigestImpl {
139118
public:
140119
using Status = rocksdb::Status;
141120
explicit TDigestImpl(uint32_t delta) : delta_(delta > 10 ? delta : 10), merger_(delta_) {
@@ -372,7 +351,27 @@ class TDigest::TDigestImpl {
372351
int current_;
373352
};
374353

375-
TDigest::TDigest(uint64_t delta) : impl_(std::make_unique<TDigestImpl>(delta)) { Reset({}); }
354+
class TDigest {
355+
public:
356+
explicit TDigest(uint64_t delta);
357+
358+
TDigest(const TDigest&) = delete;
359+
TDigest& operator=(const TDigest&) = delete;
360+
TDigest(TDigest&& rhs) = default;
361+
~TDigest() = default;
362+
363+
void Merge(const std::vector<TDigest>& others);
364+
void Add(const std::vector<double>& items);
365+
void Reset(const CentroidsWithDelta& centroid_list);
366+
void Reset();
367+
CentroidsWithDelta DumpCentroids() const;
368+
369+
private:
370+
// class TDigestImpl;
371+
TDigestImpl impl_;
372+
};
373+
374+
TDigest::TDigest(uint64_t delta) : impl_(TDigestImpl(delta)) { Reset({}); }
376375

377376
void TDigest::Merge(const std::vector<TDigest>& others) {
378377
if (others.empty()) {
@@ -382,30 +381,29 @@ void TDigest::Merge(const std::vector<TDigest>& others) {
382381
std::vector<const TDigestImpl*> impls;
383382
impls.reserve(others.size());
384383

385-
std::transform(others.cbegin(), others.cend(), std::back_inserter(impls),
386-
[](const TDigest& i) { return i.impl_.get(); });
384+
std::transform(others.cbegin(), others.cend(), std::back_inserter(impls), [](const TDigest& i) { return &i.impl_; });
387385

388-
impl_->Merge(impls);
386+
impl_.Merge(impls);
389387
}
390388

391389
void TDigest::Reset(const CentroidsWithDelta& centroids_list) {
392-
impl_->Reset(centroids_list.centroids, centroids_list.min, centroids_list.max, centroids_list.total_weight);
390+
impl_.Reset(centroids_list.centroids, centroids_list.min, centroids_list.max, centroids_list.total_weight);
393391
}
394392

395-
void TDigest::Reset() { impl_->Reset(); }
393+
void TDigest::Reset() { impl_.Reset(); }
396394

397395
CentroidsWithDelta TDigest::DumpCentroids() const {
398-
auto centroids = impl_->Centroids();
396+
auto centroids = impl_.Centroids();
399397
return {
400398
.centroids = std::move(centroids),
401-
.delta = impl_->Delta(),
402-
.min = impl_->Min(),
403-
.max = impl_->Max(),
404-
.total_weight = impl_->TotalWeight(),
399+
.delta = impl_.Delta(),
400+
.min = impl_.Min(),
401+
.max = impl_.Max(),
402+
.total_weight = impl_.TotalWeight(),
405403
};
406404
}
407405

408-
void TDigest::Add(const std::vector<double>& items) { impl_->MergeInput(items); }
406+
void TDigest::Add(const std::vector<double>& items) { impl_.MergeInput(items); }
409407

410408
StatusOr<CentroidsWithDelta> TDigestMerge(const std::vector<CentroidsWithDelta>& centroids_list) {
411409
if (centroids_list.empty()) {

Diff for: src/types/tdigest.h

+12-9
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
#include <vector>
2626

27-
#include "status.h"
27+
#include "common/status.h"
2828

2929
struct Centroid {
3030
double mean;
@@ -38,8 +38,6 @@ struct Centroid {
3838

3939
std::string ToString() const { return fmt::format("centroid<mean: {}, weight: {}>", mean, weight); }
4040

41-
Centroid(const Centroid& centroid) = default;
42-
4341
explicit Centroid() = default;
4442
explicit Centroid(double mean, double weight) : mean(mean), weight(weight) {}
4543
};
@@ -81,10 +79,10 @@ class TDSample {
8179
// https://github.com/apache/arrow/blob/27bbd593625122a4a25d9471c8aaf5df54a6dcf9/cpp/src/arrow/util/tdigest.cc#L38
8280
static inline double Lerp(double a, double b, double t) { return a + t * (b - a); }
8381

84-
template <typename TD, typename Lerp>
85-
inline StatusOr<double> TDigestQuantile(TD&& td, double q, Lerp lerp) {
82+
template <typename TD>
83+
inline StatusOr<double> TDigestQuantile(TD&& td, double q) {
8684
if (q < 0 || q > 1 || td.Size() == 0) {
87-
return NAN;
85+
return Status{Status::InvalidArgument, "invalid quantile or empty tdigest"};
8886
}
8987

9088
const double index = q * td.TotalWeight();
@@ -104,6 +102,11 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q, Lerp lerp) {
104102
}
105103
}
106104

105+
// since index is in (1, total_weight - 1), iter should be valid
106+
if (!iter->Valid()) {
107+
return Status{Status::InvalidArgument, "invalid iterator during decoding tdigest centroid"};
108+
}
109+
107110
auto centroid = GET_OR_RET(iter->GetCentroid());
108111

109112
// deviation of index from the centroid center
@@ -122,15 +125,15 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q, Lerp lerp) {
122125
// index larger than center of last bin
123126
auto c = GET_OR_RET(ci_left->GetCentroid());
124127
DCHECK_GE(c.weight, 2);
125-
return lerp(c.mean, td.Max(), diff / (c.weight / 2));
128+
return Lerp(c.mean, td.Max(), diff / (c.weight / 2));
126129
}
127130
ci_right->Next();
128131
} else {
129132
if (ci_left == td.Begin()) {
130133
// index smaller than center of first bin
131134
auto c = GET_OR_RET(ci_left->GetCentroid());
132135
DCHECK_GE(c.weight, 2);
133-
return lerp(td.Min(), c.mean, index / (c.weight / 2));
136+
return Lerp(td.Min(), c.mean, index / (c.weight / 2));
134137
}
135138
ci_left->Prev();
136139
auto lc = GET_OR_RET(ci_left->GetCentroid());
@@ -143,5 +146,5 @@ inline StatusOr<double> TDigestQuantile(TD&& td, double q, Lerp lerp) {
143146

144147
// interpolate from adjacent centroids
145148
diff /= (lc.weight / 2 + rc.weight / 2);
146-
return lerp(lc.mean, rc.mean, diff);
149+
return Lerp(lc.mean, rc.mean, diff);
147150
}

0 commit comments

Comments
 (0)