Skip to content

Commit da39c8f

Browse files
cyfdecyfartpaul
authored andcommitted
Optimize type name parsing (ClickHouse#42)
* Add TypeAst cache. * Add Type::Code in TypeAst. * Use -O2 optimization to build. * Add benchmark: SELECT from system.numbers
1 parent 8bc31b0 commit da39c8f

File tree

7 files changed

+143
-27
lines changed

7 files changed

+143
-27
lines changed

CMakeLists.txt

+8-2
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.0.2)
33
INCLUDE (cmake/cpp11.cmake)
44
INCLUDE (cmake/subdirs.cmake)
55

6+
OPTION(BUILD_BENCHMARK "Build benchmark" OFF)
7+
68
PROJECT (CLICKHOUSE-CLIENT)
79

810
USE_CXX11()
911

1012
IF (UNIX)
1113
IF (APPLE)
12-
SET (CMAKE_CXX_FLAGS "-Wall -Wextra -Werror")
14+
SET (CMAKE_CXX_FLAGS "-O2 -Wall -Wextra -Werror")
1315
ELSE ()
14-
SET (CMAKE_CXX_FLAGS "-pthread -Wall -Wextra -Werror")
16+
SET (CMAKE_CXX_FLAGS "-O2 -pthread -Wall -Wextra -Werror")
1517
ENDIF ()
1618
SET (CMAKE_EXE_LINKER_FLAGS, "-lpthread")
1719
ENDIF ()
@@ -27,3 +29,7 @@ PROJECT (CLICKHOUSE-CLIENT)
2729
tests/simple
2830
ut
2931
)
32+
33+
IF (BUILD_BENCHMARK)
34+
SUBDIRS(bench)
35+
ENDIF (BUILD_BENCHMARK)

bench/CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ADD_EXECUTABLE (bench
2+
bench.cpp
3+
)
4+
5+
TARGET_LINK_LIBRARIES (bench
6+
clickhouse-cpp-lib
7+
benchmark
8+
)

bench/bench.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#include <benchmark/benchmark.h>
2+
3+
#include <clickhouse/client.h>
4+
5+
namespace clickhouse {
6+
7+
Client g_client(ClientOptions()
8+
.SetHost("localhost")
9+
.SetPingBeforeQuery(false));
10+
11+
static void SelectNumber(benchmark::State& state) {
12+
while (state.KeepRunning()) {
13+
g_client.Select("SELECT number, number, number FROM system.numbers LIMIT 1000",
14+
[](const Block& block) { block.GetRowCount(); }
15+
);
16+
}
17+
}
18+
BENCHMARK(SelectNumber);
19+
20+
static void SelectNumberMoreColumns(benchmark::State& state) {
21+
// Mainly test performance on type name parsing.
22+
while (state.KeepRunning()) {
23+
g_client.Select("SELECT "
24+
"number, number, number, number, number, number, number, number, number, number "
25+
"FROM system.numbers LIMIT 100",
26+
[](const Block& block) { block.GetRowCount(); }
27+
);
28+
}
29+
}
30+
BENCHMARK(SelectNumberMoreColumns);
31+
32+
}
33+
34+
BENCHMARK_MAIN();

clickhouse/columns/factory.cpp

+24-21
Original file line numberDiff line numberDiff line change
@@ -15,43 +15,46 @@ namespace clickhouse {
1515
namespace {
1616

1717
static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
18-
if (ast.name == "UInt8")
18+
switch (ast.code) {
19+
case Type::UInt8:
1920
return std::make_shared<ColumnUInt8>();
20-
if (ast.name == "UInt16")
21+
case Type::UInt16:
2122
return std::make_shared<ColumnUInt16>();
22-
if (ast.name == "UInt32")
23+
case Type::UInt32:
2324
return std::make_shared<ColumnUInt32>();
24-
if (ast.name == "UInt64")
25+
case Type::UInt64:
2526
return std::make_shared<ColumnUInt64>();
2627

27-
if (ast.name == "Int8")
28+
case Type::Int8:
2829
return std::make_shared<ColumnInt8>();
29-
if (ast.name == "Int16")
30+
case Type::Int16:
3031
return std::make_shared<ColumnInt16>();
31-
if (ast.name == "Int32")
32+
case Type::Int32:
3233
return std::make_shared<ColumnInt32>();
33-
if (ast.name == "Int64")
34+
case Type::Int64:
3435
return std::make_shared<ColumnInt64>();
3536

36-
if (ast.name == "UUID")
37+
case Type::UUID:
3738
return std::make_shared<ColumnUUID>();
3839

39-
if (ast.name == "Float32")
40+
case Type::Float32:
4041
return std::make_shared<ColumnFloat32>();
41-
if (ast.name == "Float64")
42+
case Type::Float64:
4243
return std::make_shared<ColumnFloat64>();
4344

44-
if (ast.name == "String")
45+
case Type::String:
4546
return std::make_shared<ColumnString>();
46-
if (ast.name == "FixedString")
47+
case Type::FixedString:
4748
return std::make_shared<ColumnFixedString>(ast.elements.front().value);
4849

49-
if (ast.name == "DateTime")
50+
case Type::DateTime:
5051
return std::make_shared<ColumnDateTime>();
51-
if (ast.name == "Date")
52+
case Type::Date:
5253
return std::make_shared<ColumnDate>();
5354

54-
return nullptr;
55+
default:
56+
return nullptr;
57+
}
5558
}
5659

5760
static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
@@ -92,7 +95,7 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
9295

9396
for (const auto& elem : ast.elements) {
9497
enum_items.push_back(
95-
Type::EnumItem{elem.name.to_string(), (int16_t)elem.value});
98+
Type::EnumItem{elem.name, (int16_t)elem.value});
9699
}
97100

98101
if (ast.name == "Enum8") {
@@ -117,11 +120,11 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
117120

118121
} // namespace
119122

120-
ColumnRef CreateColumnByType(const std::string& type_name) {
121-
TypeAst ast;
122123

123-
if (TypeParser(type_name).Parse(&ast)) {
124-
return CreateColumnFromAst(ast);
124+
ColumnRef CreateColumnByType(const std::string& type_name) {
125+
auto ast = ParseTypeName(type_name);
126+
if (ast != nullptr) {
127+
return CreateColumnFromAst(*ast);
125128
}
126129

127130
return nullptr;

clickhouse/types/type_parser.cpp

+56-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,42 @@
11
#include "type_parser.h"
22
#include "../base/string_utils.h"
33

4-
#include <iostream>
4+
#include <unordered_map>
55

66
namespace clickhouse {
77

8+
static std::unordered_map<std::string, Type::Code> g_type_code = {
9+
{ "Int8", Type::Int8 },
10+
{ "Int16", Type::Int16 },
11+
{ "Int32", Type::Int32 },
12+
{ "Int64", Type::Int64 },
13+
{ "UInt8", Type::UInt8 },
14+
{ "UInt16", Type::UInt16 },
15+
{ "UInt32", Type::UInt32 },
16+
{ "UInt64", Type::UInt64 },
17+
{ "Float32", Type::Float32 },
18+
{ "Float64", Type::Float64 },
19+
{ "String", Type::String },
20+
{ "FixedString", Type::FixedString },
21+
{ "DateTime", Type::DateTime },
22+
{ "Date", Type::Date },
23+
{ "Array", Type::Array },
24+
{ "Nullable", Type::Nullable },
25+
{ "Tuple", Type::Tuple },
26+
{ "Enum8", Type::Enum8 },
27+
{ "Enum16", Type::Enum16 },
28+
{ "UUID", Type::UUID },
29+
};
30+
31+
static Type::Code GetTypeCode(const StringView& name) {
32+
std::string n = name.to_string();
33+
auto it = g_type_code.find(n);
34+
if (it != g_type_code.end()) {
35+
return it->second;
36+
}
37+
return Type::Void;
38+
}
39+
840
static TypeAst::Meta GetTypeMeta(const StringView& name) {
941
if (name == "Array") {
1042
return TypeAst::Array;
@@ -49,11 +81,12 @@ bool TypeParser::Parse(TypeAst* type) {
4981
switch (token.type) {
5082
case Token::Name:
5183
type_->meta = GetTypeMeta(token.value);
52-
type_->name = token.value;
84+
type_->name = token.value.to_string();
85+
type_->code = GetTypeCode(token.value.to_string());
5386
break;
5487
case Token::Number:
5588
type_->meta = TypeAst::Number;
56-
type_->value = FromString<int>(token.value);
89+
type_->value = std::stol(token.value.to_string());
5790
break;
5891
case Token::LPar:
5992
type_->elements.emplace_back(TypeAst());
@@ -130,4 +163,24 @@ TypeParser::Token TypeParser::NextToken() {
130163
return Token{Token::EOS, StringView()};
131164
}
132165

166+
167+
const TypeAst* ParseTypeName(const std::string& type_name) {
168+
// Cache for type_name.
169+
// Usually we won't have too many type names in the cache, so do not try to
170+
// limit cache size.
171+
static std::unordered_map<std::string, TypeAst> ast_cache;
172+
173+
auto it = ast_cache.find(type_name);
174+
if (it != ast_cache.end()) {
175+
return &it->second;
176+
}
177+
178+
auto& ast = ast_cache[type_name];
179+
if (TypeParser(type_name).Parse(&ast)) {
180+
return &ast;
181+
}
182+
ast_cache.erase(type_name);
183+
return nullptr;
184+
}
185+
133186
}

clickhouse/types/type_parser.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "../base/string_view.h"
4+
#include "types.h"
45

56
#include <list>
67
#include <stack>
@@ -21,8 +22,10 @@ struct TypeAst {
2122

2223
/// Type's category.
2324
Meta meta;
25+
Type::Code code;
2426
/// Type's name.
25-
StringView name;
27+
/// Need to cache TypeAst, so can't use StringView for name.
28+
std::string name;
2629
/// Value associated with the node,
2730
/// used for fixed-width types and enum values.
2831
int64_t value = 0;
@@ -66,4 +69,7 @@ class TypeParser {
6669
std::stack<TypeAst*> open_elements_;
6770
};
6871

72+
73+
const TypeAst* ParseTypeName(const std::string& type_name);
74+
6975
}

ut/type_parser_ut.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ TEST(TypeParserCase, ParseTerminals) {
99

1010
ASSERT_EQ(ast.meta, TypeAst::Terminal);
1111
ASSERT_EQ(ast.name, "UInt8");
12+
ASSERT_EQ(ast.code, Type::UInt8);
1213
}
1314

1415
TEST(TypeParserCase, ParseFixedString) {
@@ -17,6 +18,7 @@ TEST(TypeParserCase, ParseFixedString) {
1718

1819
ASSERT_EQ(ast.meta, TypeAst::Terminal);
1920
ASSERT_EQ(ast.name, "FixedString");
21+
ASSERT_EQ(ast.code, Type::FixedString);
2022
ASSERT_EQ(ast.elements.front().value, 24U);
2123
}
2224

@@ -26,6 +28,7 @@ TEST(TypeParserCase, ParseArray) {
2628

2729
ASSERT_EQ(ast.meta, TypeAst::Array);
2830
ASSERT_EQ(ast.name, "Array");
31+
ASSERT_EQ(ast.code, Type::Array);
2932
ASSERT_EQ(ast.elements.front().meta, TypeAst::Terminal);
3033
ASSERT_EQ(ast.elements.front().name, "Int32");
3134
}
@@ -36,6 +39,7 @@ TEST(TypeParserCase, ParseNullable) {
3639

3740
ASSERT_EQ(ast.meta, TypeAst::Nullable);
3841
ASSERT_EQ(ast.name, "Nullable");
42+
ASSERT_EQ(ast.code, Type::Nullable);
3943
ASSERT_EQ(ast.elements.front().meta, TypeAst::Terminal);
4044
ASSERT_EQ(ast.elements.front().name, "Date");
4145
}
@@ -47,6 +51,7 @@ TEST(TypeParserCase, ParseEnum) {
4751
.Parse(&ast);
4852
ASSERT_EQ(ast.meta, TypeAst::Enum);
4953
ASSERT_EQ(ast.name, "Enum8");
54+
ASSERT_EQ(ast.code, Type::Enum8);
5055
ASSERT_EQ(ast.elements.size(), 4u);
5156

5257
std::vector<std::string> names = {"COLOR_red_10_T", "COLOR_green_20_T", "COLOR_blue_30_T", "COLOR_black_30_T"};
@@ -55,6 +60,7 @@ TEST(TypeParserCase, ParseEnum) {
5560
auto element = ast.elements.begin();
5661
for (size_t i = 0; i < 4; ++i) {
5762
ASSERT_EQ(element->name, names[i]);
63+
ASSERT_EQ(element->code, Type::Void);
5864
ASSERT_EQ(element->value, values[i]);
5965
++element;
6066
}

0 commit comments

Comments
 (0)