From 2e3d6dd2a3aa21045488d124aa49f5d49b713d61 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Tue, 11 Mar 2025 11:30:34 +0100 Subject: [PATCH 1/3] Add failing unit test --- .../src/index_api/rest_handler.rs | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index fbdc8a733dd..da4536b6271 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -1034,6 +1034,29 @@ mod tests { .default_search_fields, ["severity_text", "body"] ); + // test with index_uri at the root of a bucket + { + let resp = warp::test::request() + .path("/indexes") + .method("POST") + .json(&true) + .body(r#"{"version": "0.7", "index_id": "hdfs-logs2", "index_uri": "s3://my-bucket", "doc_mapping": {"field_mappings":[{"name": "timestamp", "type": "i64", "fast": true, "indexed": true}]},"search_settings":{"default_search_fields":["body"]}}"#) + .reply(&index_management_handler) + .await; + let body = std::str::from_utf8(resp.body()).unwrap(); + assert_eq!(resp.status(), 200, "{body}",); + } + { + let resp = warp::test::request() + .path("/indexes/hdfs-logs2") + .method("PUT") + .json(&true) + .body(r#"{"version": "0.7", "index_id": "hdfs-logs2", "index_uri": "s3://my-bucket", "doc_mapping": {"field_mappings":[{"name": "timestamp", "type": "i64", "fast": true, "indexed": true}]},"search_settings":{"default_search_fields":["severity_text", "body"]}}"#) + .reply(&index_management_handler) + .await; + let body = std::str::from_utf8(resp.body()).unwrap(); + assert_eq!(resp.status(), 200, "{body}",); + } } #[tokio::test] From 71fb39040712874d4c348ae00e85fad4e4f435e1 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Tue, 11 Mar 2025 11:31:54 +0100 Subject: [PATCH 2/3] Fix index update endpoint panic when index_uri is the bucket root --- quickwit/quickwit-common/src/uri.rs | 39 ++++++++++++++----- .../src/index_config/serialize.rs | 26 +++++++++---- .../src/index_api/index_resource.rs | 14 +++++-- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/quickwit/quickwit-common/src/uri.rs b/quickwit/quickwit-common/src/uri.rs index 759af328af9..e149981b9af 100644 --- a/quickwit/quickwit-common/src/uri.rs +++ b/quickwit/quickwit-common/src/uri.rs @@ -169,23 +169,30 @@ impl Uri { } /// Returns the parent URI. + /// + /// When the URI points to an object store, this method makes sure that the + /// result targets to a well defined bucket, e.g `s3://` returns `None`. + /// /// Does not apply to PostgreSQL URIs. pub fn parent(&self) -> Option { + self.parent_unchecked().filter(|parent| { + let path = parent.path(); + match self.protocol() { + Protocol::S3 => path.components().count() >= 1, + Protocol::Azure => path.components().count() >= 2, + Protocol::Google => path.components().count() >= 1, + _ => true, + } + }) + } + + /// Same as [Self::parent()] but without the additional check on object stores. + pub fn parent_unchecked(&self) -> Option { if self.protocol().is_database() { return None; } let path = self.path(); let protocol = self.protocol(); - - if protocol == Protocol::S3 && path.components().count() < 2 { - return None; - } - if protocol == Protocol::Azure && path.components().count() < 3 { - return None; - } - if protocol == Protocol::Google && path.components().count() < 2 { - return None; - } let parent_path = path.parent()?; Some(Self { @@ -597,11 +604,23 @@ mod tests { "ram:///foo" ); assert!(Uri::for_test("s3://bucket").parent().is_none()); + assert_eq!( + Uri::for_test("s3://bucket").parent_unchecked().unwrap(), + "s3://" + ); assert!(Uri::for_test("s3://bucket/").parent().is_none()); + assert_eq!( + Uri::for_test("s3://bucket/").parent_unchecked().unwrap(), + "s3://" + ); assert_eq!( Uri::for_test("s3://bucket/foo").parent().unwrap(), "s3://bucket" ); + assert_eq!( + Uri::for_test("s3://bucket/foo").parent_unchecked().unwrap(), + "s3://bucket" + ); assert_eq!( Uri::for_test("s3://bucket/foo/").parent().unwrap(), "s3://bucket" diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index cfbe53026ed..652caf849fe 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -73,12 +73,9 @@ pub fn load_index_config_from_user_config( pub fn load_index_config_update( config_format: ConfigFormat, index_config_bytes: &[u8], + current_index_parent_dir: &Uri, current_index_config: &IndexConfig, ) -> anyhow::Result { - let current_index_parent_dir = ¤t_index_config - .index_uri - .parent() - .expect("index URI should have a parent"); let mut new_index_config = load_index_config_from_user_config( config_format, index_config_bytes, @@ -355,10 +352,11 @@ mod test { index_id: hdfs-logs doc_mapping: {} "#; + let default_root = Uri::for_test("s3://mybucket"); let original_config: IndexConfig = load_index_config_from_user_config( ConfigFormat::Yaml, original_config_yaml.as_bytes(), - &Uri::for_test("s3://mybucket"), + &default_root, ) .unwrap(); { @@ -371,6 +369,7 @@ mod test { let updated_config = load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .unwrap(); @@ -387,6 +386,7 @@ mod test { let updated_config = load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .unwrap(); @@ -403,6 +403,7 @@ mod test { let load_error = load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .unwrap_err(); @@ -432,10 +433,11 @@ mod test { period: 90 days schedule: daily "#; + let default_root = Uri::for_test("s3://mybucket"); let original_config: IndexConfig = load_index_config_from_user_config( ConfigFormat::Yaml, original_config_yaml.as_bytes(), - &Uri::for_test("s3://mybucket"), + &default_root, ) .unwrap(); @@ -452,6 +454,7 @@ mod test { let updated_config = load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .unwrap(); @@ -473,10 +476,11 @@ mod test { index_id: hdfs-logs doc_mapping: {} "#; + let default_root = Uri::for_test("s3://mybucket"); let original_config: IndexConfig = load_index_config_from_user_config( ConfigFormat::Yaml, original_config_yaml.as_bytes(), - &Uri::for_test("s3://mybucket"), + &default_root, ) .unwrap(); @@ -493,6 +497,7 @@ mod test { let updated_config = load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .unwrap(); @@ -513,10 +518,11 @@ mod test { type: datetime fast: true "#; + let default_root = Uri::for_test("s3://mybucket"); let original_config: IndexConfig = load_index_config_from_user_config( ConfigFormat::Yaml, original_config_yaml.as_bytes(), - &Uri::for_test("s3://mybucket"), + &default_root, ) .unwrap(); @@ -539,6 +545,7 @@ mod test { load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .expect_err("mapping changed but uid fixed should error"); @@ -556,6 +563,7 @@ mod test { load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .expect_err("timestamp field removed should error"); @@ -575,6 +583,7 @@ mod test { load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .expect_err("field required for timestamp is absent"); @@ -595,6 +604,7 @@ mod test { load_index_config_update( ConfigFormat::Yaml, updated_config_yaml.as_bytes(), + &default_root, &original_config, ) .expect_err("field required for default search is absent"); diff --git a/quickwit/quickwit-serve/src/index_api/index_resource.rs b/quickwit/quickwit-serve/src/index_api/index_resource.rs index 847eb342816..1db62e59ccb 100644 --- a/quickwit/quickwit-serve/src/index_api/index_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/index_resource.rs @@ -336,9 +336,17 @@ pub async fn update_index( let index_uid = current_index_metadata.index_uid.clone(); let current_index_config = current_index_metadata.into_index_config(); - let new_index_config = - load_index_config_update(config_format, &index_config_bytes, ¤t_index_config) - .map_err(IndexServiceError::InvalidConfig)?; + let current_index_parent_dir = ¤t_index_config + .index_uri + .parent_unchecked() + .ok_or_else(|| IndexServiceError::Internal("index URI should have a parent".to_string()))?; + let new_index_config = load_index_config_update( + config_format, + &index_config_bytes, + current_index_parent_dir, + ¤t_index_config, + ) + .map_err(IndexServiceError::InvalidConfig)?; let update_request = UpdateIndexRequest::try_from_updates( index_uid, From f4790ca0e00b490cde6079dfcb7665fa81c34b69 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Thu, 10 Apr 2025 10:30:27 +0200 Subject: [PATCH 3/3] Simplify update endpoint to use same default URI as create --- docs/reference/rest-api.md | 2 +- quickwit/quickwit-common/src/uri.rs | 39 +++++-------------- .../src/index_config/serialize.rs | 7 ++-- .../src/index_api/index_resource.rs | 9 ++--- .../src/index_api/rest_handler.rs | 7 +++- 5 files changed, 23 insertions(+), 41 deletions(-) diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md index c464f19cf64..1ad5a2ee40d 100644 --- a/docs/reference/rest-api.md +++ b/docs/reference/rest-api.md @@ -348,7 +348,7 @@ Updating the doc mapping doesn't reindex existing data. Queries and results are |---------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------| | `version` | `String` | Config format version, use the same as your Quickwit version. | _required_ | | `index_id` | `String` | Index ID, must be the same index as in the request URI. | _required_ | -| `index_uri` | `String` | Defines where the index files are stored. Cannot be updated. | `{current_index_uri}` | +| `index_uri` | `String` | Defines where the index files are stored. Cannot be updated. | `{default_index_root_uri}/{index_id}` | | `doc_mapping` | `DocMapping` | Doc mapping object as specified in the [index config docs](../configuration/index-config.md#doc-mapping). | _required_ | | `indexing_settings` | `IndexingSettings` | Indexing settings object as specified in the [index config docs](../configuration/index-config.md#indexing-settings). | | | `search_settings` | `SearchSettings` | Search settings object as specified in the [index config docs](../configuration/index-config.md#search-settings). | | diff --git a/quickwit/quickwit-common/src/uri.rs b/quickwit/quickwit-common/src/uri.rs index e149981b9af..759af328af9 100644 --- a/quickwit/quickwit-common/src/uri.rs +++ b/quickwit/quickwit-common/src/uri.rs @@ -169,30 +169,23 @@ impl Uri { } /// Returns the parent URI. - /// - /// When the URI points to an object store, this method makes sure that the - /// result targets to a well defined bucket, e.g `s3://` returns `None`. - /// /// Does not apply to PostgreSQL URIs. pub fn parent(&self) -> Option { - self.parent_unchecked().filter(|parent| { - let path = parent.path(); - match self.protocol() { - Protocol::S3 => path.components().count() >= 1, - Protocol::Azure => path.components().count() >= 2, - Protocol::Google => path.components().count() >= 1, - _ => true, - } - }) - } - - /// Same as [Self::parent()] but without the additional check on object stores. - pub fn parent_unchecked(&self) -> Option { if self.protocol().is_database() { return None; } let path = self.path(); let protocol = self.protocol(); + + if protocol == Protocol::S3 && path.components().count() < 2 { + return None; + } + if protocol == Protocol::Azure && path.components().count() < 3 { + return None; + } + if protocol == Protocol::Google && path.components().count() < 2 { + return None; + } let parent_path = path.parent()?; Some(Self { @@ -604,23 +597,11 @@ mod tests { "ram:///foo" ); assert!(Uri::for_test("s3://bucket").parent().is_none()); - assert_eq!( - Uri::for_test("s3://bucket").parent_unchecked().unwrap(), - "s3://" - ); assert!(Uri::for_test("s3://bucket/").parent().is_none()); - assert_eq!( - Uri::for_test("s3://bucket/").parent_unchecked().unwrap(), - "s3://" - ); assert_eq!( Uri::for_test("s3://bucket/foo").parent().unwrap(), "s3://bucket" ); - assert_eq!( - Uri::for_test("s3://bucket/foo").parent_unchecked().unwrap(), - "s3://bucket" - ); assert_eq!( Uri::for_test("s3://bucket/foo/").parent().unwrap(), "s3://bucket" diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index 652caf849fe..4e40ae0edf8 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -68,18 +68,17 @@ pub fn load_index_config_from_user_config( /// /// Ensures that the new configuration is valid in itself and compared to the /// current index config. If the new configuration omits some fields, the -/// default values will be used, not those of the current index config. The only -/// exception is the index_uri because it cannot be updated. +/// default values will be used, not those of the current index config. pub fn load_index_config_update( config_format: ConfigFormat, index_config_bytes: &[u8], - current_index_parent_dir: &Uri, + default_index_root_uri: &Uri, current_index_config: &IndexConfig, ) -> anyhow::Result { let mut new_index_config = load_index_config_from_user_config( config_format, index_config_bytes, - current_index_parent_dir, + default_index_root_uri, )?; ensure!( current_index_config.index_id == new_index_config.index_id, diff --git a/quickwit/quickwit-serve/src/index_api/index_resource.rs b/quickwit/quickwit-serve/src/index_api/index_resource.rs index 1db62e59ccb..04f0cb3c8e4 100644 --- a/quickwit/quickwit-serve/src/index_api/index_resource.rs +++ b/quickwit/quickwit-serve/src/index_api/index_resource.rs @@ -286,6 +286,7 @@ pub async fn create_index( pub fn update_index_handler( metastore: MetastoreServiceClient, + node_config: Arc, ) -> impl Filter + Clone { warp::path!("indexes" / String) .and(warp::put()) @@ -293,6 +294,7 @@ pub fn update_index_handler( .and(warp::body::content_length_limit(1024 * 1024)) .and(warp::filters::body::bytes()) .and(with_arg(metastore)) + .and(with_arg(node_config)) .then(update_index) .map(log_failure("failed to update index")) .and(extract_format_from_qs()) @@ -325,6 +327,7 @@ pub async fn update_index( config_format: ConfigFormat, index_config_bytes: Bytes, metastore: MetastoreServiceClient, + node_config: Arc, ) -> Result { info!(index_id = %target_index_id, "update-index"); @@ -336,14 +339,10 @@ pub async fn update_index( let index_uid = current_index_metadata.index_uid.clone(); let current_index_config = current_index_metadata.into_index_config(); - let current_index_parent_dir = ¤t_index_config - .index_uri - .parent_unchecked() - .ok_or_else(|| IndexServiceError::Internal("index URI should have a parent".to_string()))?; let new_index_config = load_index_config_update( config_format, &index_config_bytes, - current_index_parent_dir, + &node_config.default_index_root_uri, ¤t_index_config, ) .map_err(IndexServiceError::InvalidConfig)?; diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index da4536b6271..d9ea2ccfd4f 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -89,8 +89,11 @@ pub fn index_management_handlers( // Indexes handlers. get_index_metadata_handler(index_service.metastore()) .or(list_indexes_metadata_handler(index_service.metastore())) - .or(create_index_handler(index_service.clone(), node_config)) - .or(update_index_handler(index_service.metastore())) + .or(create_index_handler( + index_service.clone(), + node_config.clone(), + )) + .or(update_index_handler(index_service.metastore(), node_config)) .or(clear_index_handler(index_service.clone())) .or(delete_index_handler(index_service.clone())) .boxed()