Skip to content

Commit d344aa1

Browse files
authored
Document doc mapping update bug (#5739)
* Create test showing the problem * Add documentation and ignore failing test * Fix incomplete issue links
1 parent 74e82ab commit d344aa1

File tree

3 files changed

+143
-1
lines changed

3 files changed

+143
-1
lines changed

docs/reference/rest-api.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,12 @@ Updates the configurations of an index. This endpoint follows PUT semantics, whi
334334
- The indexing settings update is automatically picked up by the indexer nodes once the control plane emits a new indexing plan.
335335
- The doc mapping update is automatically picked up by the indexer nodes once the control plane emit a new indexing plan.
336336

337+
:::warning
338+
339+
If you use the ingest or ES bulk API (V2), the old doc mapping will still be used to validate new documents that end up being persisted on existing shards (see [#5738](https://github.com/quickwit-oss/quickwit/issues/5738)).
340+
341+
:::
342+
337343
Updating the doc mapping doesn't reindex existing data. Queries and results are mapped on a best-effort basis when querying older splits. For more details, check [the reference](updating-mapper.md) out.
338344

339345
#### PUT payload

docs/reference/updating-mapper.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ Quickwit allows updating the mapping it uses to add more fields to an existing i
66

77
When you update a doc mapping for an index, Quickwit will restart indexing pipelines to take the changes into account. As both this operation and the document ingestion are asynchronous, there is no strict happens-before relationship between ingestion and update. This means a document ingested just before the update may be indexed according to the newer doc mapper, and document ingested just after the update may be indexed with the older doc mapper.
88

9+
:::warning
10+
11+
If you use the ingest or ES bulk API (V2), the old doc mapping will still be used to validate new documents that end up being persisted on existing shards (see [#5738](https://github.com/quickwit-oss/quickwit/issues/5738)).
12+
13+
:::
14+
915
## Querying
1016

1117
Quickwit always validate queries against the most recent mapping.

quickwit/quickwit-integration-tests/src/tests/update_tests/doc_mapping_tests.rs

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::fmt::Write;
1516
use std::time::Duration;
1617

1718
use quickwit_config::service::QuickwitService;
19+
use quickwit_rest_client::models::IngestSource;
20+
use quickwit_rest_client::rest_client::CommitType;
1821
use serde_json::{json, Value};
1922

2023
use super::assert_hits_unordered;
@@ -30,7 +33,6 @@ async fn validate_search_across_doc_mapping_updates(
3033
ingest_after_update: &[Value],
3134
query_and_expect: &[(&str, Result<&[Value], ()>)],
3235
) {
33-
quickwit_common::setup_logging_for_tests();
3436
let sandbox = ClusterSandboxBuilder::build_and_start_standalone().await;
3537

3638
{
@@ -579,3 +581,131 @@ async fn test_update_doc_mapping_add_field_on_strict() {
579581
)
580582
.await;
581583
}
584+
585+
#[tokio::test]
586+
#[ignore]
587+
// TODO(#5738)
588+
async fn test_update_doc_validation() {
589+
quickwit_common::setup_logging_for_tests();
590+
let index_id = "update-doc-validation";
591+
let sandbox = ClusterSandboxBuilder::default()
592+
.add_node([
593+
QuickwitService::Searcher,
594+
QuickwitService::Metastore,
595+
QuickwitService::Indexer,
596+
QuickwitService::ControlPlane,
597+
QuickwitService::Janitor,
598+
])
599+
.build_and_start()
600+
.await;
601+
602+
{
603+
// Wait for indexer to fully start.
604+
// The starting time is a bit long for a cluster.
605+
tokio::time::sleep(Duration::from_secs(3)).await;
606+
let indexing_service_counters = sandbox
607+
.rest_client(QuickwitService::Indexer)
608+
.node_stats()
609+
.indexing()
610+
.await
611+
.unwrap();
612+
assert_eq!(indexing_service_counters.num_running_pipelines, 0);
613+
}
614+
615+
// Create index
616+
sandbox
617+
.rest_client(QuickwitService::Indexer)
618+
.indexes()
619+
.create(
620+
json!({
621+
"version": "0.8",
622+
"index_id": index_id,
623+
"doc_mapping": {
624+
"field_mappings": [
625+
{"name": "body", "type": "u64"}
626+
]
627+
},
628+
"indexing_settings": {
629+
"commit_timeout_secs": 1
630+
},
631+
})
632+
.to_string(),
633+
quickwit_config::ConfigFormat::Json,
634+
false,
635+
)
636+
.await
637+
.unwrap();
638+
639+
assert!(sandbox
640+
.rest_client(QuickwitService::Indexer)
641+
.node_health()
642+
.is_live()
643+
.await
644+
.unwrap());
645+
646+
// Wait until indexing pipelines are started.
647+
sandbox.wait_for_indexing_pipelines(1).await.unwrap();
648+
649+
let unsigned_payload = (0..20).fold(String::new(), |mut buffer, id| {
650+
writeln!(&mut buffer, "{{\"body\": {id}}}").unwrap();
651+
buffer
652+
});
653+
654+
let unsigned_response = sandbox
655+
.rest_client(QuickwitService::Indexer)
656+
.ingest(
657+
index_id,
658+
IngestSource::Str(unsigned_payload.clone()),
659+
None,
660+
None,
661+
CommitType::Auto,
662+
)
663+
.await
664+
.unwrap();
665+
666+
assert_eq!(unsigned_response.num_rejected_docs.unwrap(), 0);
667+
668+
sandbox
669+
.rest_client(QuickwitService::Searcher)
670+
.indexes()
671+
.update(
672+
index_id,
673+
json!({
674+
"version": "0.8",
675+
"index_id": index_id,
676+
"doc_mapping": {
677+
"field_mappings": [
678+
{"name": "body", "type": "i64"}
679+
]
680+
},
681+
"indexing_settings": {
682+
"commit_timeout_secs": 1,
683+
},
684+
})
685+
.to_string(),
686+
quickwit_config::ConfigFormat::Json,
687+
)
688+
.await
689+
.unwrap();
690+
691+
let signed_payload = (-20..0).fold(String::new(), |mut buffer, id| {
692+
writeln!(&mut buffer, "{{\"body\": {id}}}").unwrap();
693+
buffer
694+
});
695+
696+
let signed_response = sandbox
697+
.rest_client(QuickwitService::Indexer)
698+
.ingest(
699+
index_id,
700+
IngestSource::Str(signed_payload.clone()),
701+
None,
702+
None,
703+
CommitType::Auto,
704+
)
705+
.await
706+
.unwrap();
707+
708+
assert_eq!(signed_response.num_rejected_docs.unwrap(), 0);
709+
710+
sandbox.shutdown().await.unwrap();
711+
}

0 commit comments

Comments
 (0)