Skip to content

Commit 90414cc

Browse files
committed
Better estimation for ColumnLowCardinality::Reserve and ColumnString::Reserve
ColumnLowCardinality assumes that not all items are unique, hence dictionary column can be reserved for smaller capacity; ColumnString now allows to set average value size estimation in constructor or on existing instance. If estimation is close to real average value size, then memory is pre-allocations are close to optimum.
1 parent 0f8b396 commit 90414cc

32 files changed

+442
-33
lines changed

clickhouse/columns/array.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,10 @@ size_t ColumnArray::Size() const {
110110
return offsets_->Size();
111111
}
112112

113+
size_t ColumnArray::MemoryUsage() const {
114+
return offsets_->MemoryUsage() + data_->MemoryUsage();
115+
}
116+
113117
void ColumnArray::Swap(Column& other) {
114118
auto & col = dynamic_cast<ColumnArray &>(other);
115119
data_.swap(col.data_);

clickhouse/columns/array.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ class ColumnArray : public Column {
7171
/// Returns count of rows in the column.
7272
size_t Size() const override;
7373

74+
size_t MemoryUsage() const override;
75+
7476
/// Makes slice of the current column.
7577
ColumnRef Slice(size_t, size_t) const override;
7678
ColumnRef CloneEmpty() const override;

clickhouse/columns/column.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ class Column : public std::enable_shared_from_this<Column> {
9090

9191
virtual void Swap(Column&) = 0;
9292

93+
/// Estimated RAM usage by the column in bytes.
94+
virtual size_t MemoryUsage() const = 0;
95+
9396
/// Get a view on raw item data if it is supported by column, will throw an exception if index is out of range.
9497
/// Please note that view is invalidated once column items are added or deleted, column is loaded from strean or destroyed.
9598
virtual ItemView GetItem(size_t) const {

clickhouse/columns/date.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ size_t ColumnDate::Size() const {
6767
return data_->Size();
6868
}
6969

70+
size_t ColumnDate::MemoryUsage() const {
71+
return data_->MemoryUsage();
72+
}
73+
7074
ColumnRef ColumnDate::Slice(size_t begin, size_t len) const {
7175
auto col = data_->Slice(begin, len)->As<ColumnUInt16>();
7276
auto result = std::make_shared<ColumnDate>();
@@ -154,6 +158,10 @@ size_t ColumnDate32::Size() const {
154158
return data_->Size();
155159
}
156160

161+
size_t ColumnDate32::MemoryUsage() const {
162+
return data_->MemoryUsage();
163+
}
164+
157165
ColumnRef ColumnDate32::Slice(size_t begin, size_t len) const {
158166
auto col = data_->Slice(begin, len)->As<ColumnInt32>();
159167
auto result = std::make_shared<ColumnDate32>();
@@ -244,6 +252,10 @@ size_t ColumnDateTime::Size() const {
244252
return data_->Size();
245253
}
246254

255+
size_t ColumnDateTime::MemoryUsage() const {
256+
return data_->MemoryUsage();
257+
}
258+
247259
void ColumnDateTime::Clear() {
248260
data_->Clear();
249261
}
@@ -330,6 +342,10 @@ size_t ColumnDateTime64::Size() const {
330342
return data_->Size();
331343
}
332344

345+
size_t ColumnDateTime64::MemoryUsage() const {
346+
return data_->MemoryUsage();
347+
}
348+
333349
ItemView ColumnDateTime64::GetItem(size_t index) const {
334350
return ItemView(Type::DateTime64, data_->GetItem(index));
335351
}

clickhouse/columns/date.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class ColumnDate : public Column {
5151

5252
/// Returns count of rows in the column.
5353
size_t Size() const override;
54+
size_t MemoryUsage() const override;
5455

5556
/// Makes slice of the current column.
5657
ColumnRef Slice(size_t begin, size_t len) const override;
@@ -109,6 +110,7 @@ class ColumnDate32 : public Column {
109110

110111
/// Returns count of rows in the column.
111112
size_t Size() const override;
113+
size_t MemoryUsage() const override;
112114

113115
/// Makes slice of the current column.
114116
ColumnRef Slice(size_t begin, size_t len) const override;
@@ -170,6 +172,7 @@ class ColumnDateTime : public Column {
170172

171173
/// Returns count of rows in the column.
172174
size_t Size() const override;
175+
size_t MemoryUsage() const override;
173176

174177
/// Makes slice of the current column.
175178
ColumnRef Slice(size_t begin, size_t len) const override;
@@ -223,6 +226,7 @@ class ColumnDateTime64 : public Column {
223226

224227
/// Returns count of rows in the column.
225228
size_t Size() const override;
229+
size_t MemoryUsage() const override;
226230

227231
/// Makes slice of the current column.
228232
ColumnRef Slice(size_t begin, size_t len) const override;

clickhouse/columns/decimal.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ size_t ColumnDecimal::Size() const {
217217
return data_->Size();
218218
}
219219

220+
size_t ColumnDecimal::MemoryUsage() const {
221+
return data_->MemoryUsage();
222+
}
223+
220224
ColumnRef ColumnDecimal::Slice(size_t begin, size_t len) const {
221225
// coundn't use std::make_shared since this c-tor is private
222226
return ColumnRef{new ColumnDecimal(type_, data_->Slice(begin, len))};

clickhouse/columns/decimal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class ColumnDecimal : public Column {
2828
void SaveBody(OutputStream* output) override;
2929
void Clear() override;
3030
size_t Size() const override;
31+
size_t MemoryUsage() const override;
3132
ColumnRef Slice(size_t begin, size_t len) const override;
3233
ColumnRef CloneEmpty() const override;
3334
void Swap(Column& other) override;

clickhouse/columns/enum.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ size_t ColumnEnum<T>::Size() const {
9696
return data_.size();
9797
}
9898

99+
template <typename T>
100+
size_t ColumnEnum<T>::MemoryUsage() const {
101+
return data_.capacity() * sizeof(*data_.begin());
102+
}
103+
99104
template <typename T>
100105
ColumnRef ColumnEnum<T>::Slice(size_t begin, size_t len) const {
101106
return std::make_shared<ColumnEnum<T>>(type_, SliceVector(data_, begin, len));

clickhouse/columns/enum.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class ColumnEnum : public Column {
4747

4848
/// Returns count of rows in the column.
4949
size_t Size() const override;
50+
size_t MemoryUsage() const override;
5051

5152
/// Makes slice of the current column.
5253
ColumnRef Slice(size_t begin, size_t len) const override;

clickhouse/columns/geo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,17 @@ void ColumnGeo<NestedColumnType, type_code>::SaveBody(OutputStream* output) {
7676
data_->SaveBody(output);
7777
}
7878

79+
7980
template <typename NestedColumnType, Type::Code type_code>
8081
size_t ColumnGeo<NestedColumnType, type_code>::Size() const {
8182
return data_->Size();
8283
}
8384

85+
template <typename NestedColumnType, Type::Code type_code>
86+
size_t ColumnGeo<NestedColumnType, type_code>::MemoryUsage() const {
87+
return data_->MemoryUsage();
88+
}
89+
8490
template <typename NestedColumnType, Type::Code type_code>
8591
ColumnRef ColumnGeo<NestedColumnType, type_code>::Slice(size_t begin, size_t len) const {
8692
return std::make_shared<ColumnGeo>(data_->Slice(begin, len));

0 commit comments

Comments
 (0)