Skip to content

Commit 2b37a4d

Browse files
authored
feat: impl Create/Drop Index for Ngram Index (#17789)
* feat: impl `Create/Drop `Index for Ngram Index Signed-off-by: Kould <[email protected]> * chore: codefmt Signed-off-by: Kould <[email protected]> --------- Signed-off-by: Kould <[email protected]>
1 parent 80b91b6 commit 2b37a4d

File tree

72 files changed

+1284
-68
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+1284
-68
lines changed

Cargo.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ databend-enterprise-fail-safe = { path = "src/query/ee_features/fail_safe" }
194194
databend-enterprise-hilbert-clustering = { path = "src/query/ee_features/hilbert_clustering" }
195195
databend-enterprise-inverted-index = { path = "src/query/ee_features/inverted_index" }
196196
databend-enterprise-meta = { path = "src/meta/ee" }
197+
databend-enterprise-ngram-index = { path = "src/query/ee_features/ngram_index" }
197198
databend-enterprise-query = { path = "src/query/ee" }
198199
databend-enterprise-resources-management = { path = "src/query/ee_features/resources_management" }
199200
databend-enterprise-storage-encryption = { path = "src/query/ee_features/storage_encryption" }

src/common/license/src/license.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ pub enum Feature {
7575
SystemManagement,
7676
#[serde(alias = "hilbert_clustering", alias = "HILBERT_CLUSTERING")]
7777
HilbertClustering,
78+
#[serde(alias = "ngram_index", alias = "NGRAM_INDEX")]
79+
NgramIndex,
7880
#[serde(other)]
7981
Unknown,
8082
}
@@ -122,6 +124,7 @@ impl fmt::Display for Feature {
122124
Feature::AmendTable => write!(f, "amend_table"),
123125
Feature::SystemManagement => write!(f, "system_management"),
124126
Feature::HilbertClustering => write!(f, "hilbert_clustering"),
127+
Feature::NgramIndex => write!(f, "ngram_index"),
125128
Feature::Unknown => write!(f, "unknown"),
126129
}
127130
}
@@ -169,7 +172,8 @@ impl Feature {
169172
| (Feature::VirtualColumn, Feature::VirtualColumn)
170173
| (Feature::AttacheTable, Feature::AttacheTable)
171174
| (Feature::StorageEncryption, Feature::StorageEncryption)
172-
| (Feature::HilbertClustering, Feature::HilbertClustering) => Ok(true),
175+
| (Feature::HilbertClustering, Feature::HilbertClustering)
176+
| (Feature::NgramIndex, Feature::NgramIndex) => Ok(true),
173177
(_, _) => Ok(false),
174178
}
175179
}
@@ -338,6 +342,11 @@ mod tests {
338342
serde_json::from_str::<Feature>("\"hilbert_clustering\"").unwrap()
339343
);
340344

345+
assert_eq!(
346+
Feature::NgramIndex,
347+
serde_json::from_str::<Feature>("\"NgramIndex\"").unwrap()
348+
);
349+
341350
assert_eq!(
342351
Feature::Unknown,
343352
serde_json::from_str::<Feature>("\"ssss\"").unwrap()
@@ -371,11 +380,12 @@ mod tests {
371380
}),
372381
Feature::AmendTable,
373382
Feature::HilbertClustering,
383+
Feature::NgramIndex,
374384
]),
375385
};
376386

377387
assert_eq!(
378-
"LicenseInfo{ type: enterprise, org: databend, tenants: [databend_tenant,foo], features: [aggregate_index,amend_table,attach_table,compute_quota(threads_num: 1, memory_usage: 1),computed_column,data_mask,hilbert_clustering,inverted_index,license_info,storage_encryption,storage_quota(storage_usage: 1),stream,vacuum,virtual_column] }",
388+
"LicenseInfo{ type: enterprise, org: databend, tenants: [databend_tenant,foo], features: [aggregate_index,amend_table,attach_table,compute_quota(threads_num: 1, memory_usage: 1),computed_column,data_mask,hilbert_clustering,inverted_index,license_info,ngram_index,storage_encryption,storage_quota(storage_usage: 1),stream,vacuum,virtual_column] }",
379389
license_info.to_string()
380390
);
381391
}

src/meta/api/src/schema_api_impl.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2590,6 +2590,7 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
25902590
let version = old_version.unwrap_or(Uuid::new_v4().simple().to_string());
25912591

25922592
let index = TableIndex {
2593+
index_type: req.index_type.clone(),
25932594
name: req.name.clone(),
25942595
column_ids: req.column_ids.clone(),
25952596
sync_creation: req.sync_creation,

src/meta/api/src/schema_api_test_suite.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ use databend_common_meta_app::schema::TableIdHistoryIdent;
117117
use databend_common_meta_app::schema::TableIdList;
118118
use databend_common_meta_app::schema::TableIdToName;
119119
use databend_common_meta_app::schema::TableIdent;
120+
use databend_common_meta_app::schema::TableIndexType;
120121
use databend_common_meta_app::schema::TableInfo;
121122
use databend_common_meta_app::schema::TableMeta;
122123
use databend_common_meta_app::schema::TableNameIdent;
@@ -6171,6 +6172,7 @@ impl SchemaApiTestSuite {
61716172
info!("--- create table index 1");
61726173
let req = CreateTableIndexReq {
61736174
create_option: CreateOption::Create,
6175+
index_type: TableIndexType::Inverted,
61746176
tenant: tenant.clone(),
61756177
table_id,
61766178
name: index_name_1.clone(),
@@ -6196,6 +6198,7 @@ impl SchemaApiTestSuite {
61966198
column_ids: index_column_ids_1.clone(),
61976199
sync_creation: true,
61986200
options: BTreeMap::new(),
6201+
index_type: TableIndexType::Inverted,
61996202
};
62006203
let res = mt.create_table_index(req).await;
62016204
assert!(res.is_err());
@@ -6209,6 +6212,7 @@ impl SchemaApiTestSuite {
62096212
column_ids: index_column_ids_2.clone(),
62106213
sync_creation: true,
62116214
options: BTreeMap::new(),
6215+
index_type: TableIndexType::Inverted,
62126216
};
62136217
let res = mt.create_table_index(req).await;
62146218
assert!(res.is_ok());
@@ -6230,6 +6234,7 @@ impl SchemaApiTestSuite {
62306234
column_ids: index_column_ids_1.clone(),
62316235
sync_creation: true,
62326236
options: BTreeMap::new(),
6237+
index_type: TableIndexType::Inverted,
62336238
};
62346239

62356240
let res = mt.create_table_index(req).await;
@@ -6250,6 +6255,7 @@ impl SchemaApiTestSuite {
62506255
column_ids: index_column_ids_1.clone(),
62516256
sync_creation: true,
62526257
options: BTreeMap::new(),
6258+
index_type: TableIndexType::Inverted,
62536259
};
62546260

62556261
let res = mt.create_table_index(req).await;
@@ -6266,6 +6272,7 @@ impl SchemaApiTestSuite {
62666272
column_ids: index_column_ids_3.clone(),
62676273
sync_creation: true,
62686274
options: BTreeMap::new(),
6275+
index_type: TableIndexType::Inverted,
62696276
};
62706277
let res = mt.create_table_index(req).await;
62716278
assert!(res.is_err());
@@ -6291,6 +6298,7 @@ impl SchemaApiTestSuite {
62916298
{
62926299
info!("--- drop table index");
62936300
let req = DropTableIndexReq {
6301+
index_type: TableIndexType::Inverted,
62946302
tenant: tenant.clone(),
62956303
if_exists: false,
62966304
table_id,
@@ -6302,6 +6310,7 @@ impl SchemaApiTestSuite {
63026310
assert!(res.is_ok());
63036311

63046312
let req = DropTableIndexReq {
6313+
index_type: TableIndexType::Inverted,
63056314
tenant: tenant.clone(),
63066315
if_exists: false,
63076316
table_id,
@@ -6311,6 +6320,7 @@ impl SchemaApiTestSuite {
63116320
assert!(res.is_err());
63126321

63136322
let req = DropTableIndexReq {
6323+
index_type: TableIndexType::Inverted,
63146324
tenant: tenant.clone(),
63156325
if_exists: true,
63166326
table_id,
@@ -6369,6 +6379,7 @@ impl SchemaApiTestSuite {
63696379
column_ids: index_column_ids_1.clone(),
63706380
sync_creation: true,
63716381
options: BTreeMap::new(),
6382+
index_type: TableIndexType::Inverted,
63726383
};
63736384
index2_drop_start_time = Utc::now();
63746385
let res = mt.create_table_index(req).await;

src/meta/app/src/schema/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ pub use table::TableIdList;
120120
pub use table::TableIdToName;
121121
pub use table::TableIdent;
122122
pub use table::TableIndex;
123+
pub use table::TableIndexType;
123124
pub use table::TableInfo;
124125
pub use table::TableMeta;
125126
pub use table::TableNameIdent;

src/meta/app/src/schema/table.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,17 @@ pub struct TableMeta {
283283
pub indexes: BTreeMap<String, TableIndex>,
284284
}
285285

286+
#[derive(
287+
serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq, num_derive::FromPrimitive,
288+
)]
289+
pub enum TableIndexType {
290+
Inverted = 0,
291+
Ngram = 1,
292+
}
293+
286294
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)]
287295
pub struct TableIndex {
296+
pub index_type: TableIndexType,
288297
pub name: String,
289298
pub column_ids: Vec<u32>,
290299
// if true, index will create after data written to databend,
@@ -471,6 +480,19 @@ impl Display for TableInfo {
471480
}
472481
}
473482

483+
impl Display for TableIndexType {
484+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
485+
match self {
486+
TableIndexType::Inverted => {
487+
write!(f, "INVERTED")
488+
}
489+
TableIndexType::Ngram => {
490+
write!(f, "NGRAM")
491+
}
492+
}
493+
}
494+
}
495+
474496
/// Save table name id list history.
475497
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default, Eq, PartialEq)]
476498
pub struct TableIdList {
@@ -846,6 +868,7 @@ pub struct UpdateTableMetaReply {}
846868
#[derive(Clone, Debug, PartialEq, Eq)]
847869
pub struct CreateTableIndexReq {
848870
pub create_option: CreateOption,
871+
pub index_type: TableIndexType,
849872
pub tenant: Tenant,
850873
pub table_id: u64,
851874
pub name: String,
@@ -872,6 +895,7 @@ impl Display for CreateTableIndexReq {
872895

873896
#[derive(Clone, Debug, PartialEq, Eq)]
874897
pub struct DropTableIndexReq {
898+
pub index_type: TableIndexType,
875899
pub tenant: Tenant,
876900
pub if_exists: bool,
877901
pub table_id: u64,

src/meta/proto-conv/src/table_from_to_protobuf_impl.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use databend_common_meta_app::storage::StorageParams;
2828
use databend_common_meta_app::tenant::Tenant;
2929
use databend_common_meta_app_types::non_empty::NonEmptyString;
3030
use databend_common_protos::pb;
31+
use num::FromPrimitive;
3132

3233
use crate::reader_check_msg;
3334
use crate::FromToProto;
@@ -355,6 +356,8 @@ impl FromToProto for mt::TableIndex {
355356
reader_check_msg(p.ver, p.min_reader_ver)?;
356357

357358
let v = Self {
359+
index_type: FromPrimitive::from_i32(p.index_type)
360+
.ok_or_else(|| Incompatible::new(format!("invalid IndexType: {}", p.index_type)))?,
358361
name: p.name,
359362
column_ids: p.column_ids,
360363
sync_creation: p.sync_creation,
@@ -373,6 +376,7 @@ impl FromToProto for mt::TableIndex {
373376
sync_creation: self.sync_creation,
374377
version: self.version.clone(),
375378
options: self.options.clone(),
379+
index_type: self.index_type.clone() as i32,
376380
};
377381
Ok(p)
378382
}

src/meta/proto-conv/src/util.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
154154
(122, "2025-03-11: Add: table_meta and virtual_data_schema"),
155155
(123, "2025-03-27: Add: add compression in user.proto/ParquetFileFormatParam"),
156156
(124, "2025-04-01: Add: add headers in udf.proto/UDFServer"),
157+
(125, "2025-04-16: Add: add index_type in table.proto/TableIndex"),
157158
// Dear developer:
158159
// If you're gonna add a new metadata version, you'll have to add a test for it.
159160
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)

src/meta/proto-conv/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,4 @@ mod v121_avro_format_params;
119119
mod v122_virtual_schema;
120120
mod v123_parquet_format_params;
121121
mod v124_udf_server_headers;
122+
mod v125_table_index;

0 commit comments

Comments
 (0)