Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

keep knowledge of ongoing merges across merge pipelines #5633

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ binggan = { version = "0.14" }
bytes = { version = "1", features = ["serde"] }
bytesize = { version = "1.3.0", features = ["serde"] }
bytestring = "1.3.0"
census = "0.4.2"
chitchat = { git = "https://github.com/quickwit-oss/chitchat.git", rev = "54cbc70" }
chrono = { version = "0.4", default-features = false, features = [
"clock",
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ anyhow = { workspace = true }
async-speed-limit = { workspace = true }
async-trait = { workspace = true }
bytesize = { workspace = true }
census = { workspace = true }
coarsetime = { workspace = true }
dyn-clone = { workspace = true }
env_logger = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub mod temp_dir;
pub mod test_utils;
pub mod thread_pool;
pub mod tower;
pub mod tracker;
pub mod type_map;
pub mod uri;

Expand Down
208 changes: 208 additions & 0 deletions quickwit/quickwit-common/src/tracker.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Copyright 2021-Present Datadog, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::ops::Deref;
use std::sync::mpsc::{channel, Receiver, Sender};
use std::sync::{Arc, Mutex};

use census::{Inventory, TrackedObject as InventoredObject};

/// A ressource tracker
///
/// This is used to track whether an object is alive (still in use), or if it's dead (no longer
/// used, but not acknowledged). It does not keep any traces of object that were alive, but were
/// since acknowledged.
#[derive(Clone)]
pub struct Tracker<T: Clone> {
inner_inventory: Inventory<T>,
unacknowledged_drop_receiver: Arc<Mutex<Receiver<T>>>,
return_channel: Sender<T>,
}

/// A single tracked object
#[derive(Debug)]
pub struct TrackedObject<T: Clone> {
inner: Option<InventoredObject<T>>,
return_channel: Sender<T>,
}

impl<T: Clone> TrackedObject<T> {
/// acknoledge an object
pub fn acknowledge(mut self) {
self.inner.take();
}

/// Create an untracked object mostly for tests
pub fn untracked(value: T) -> Self {
Tracker::new().track(value)
}

/// Create an object which is tracked only as long as it's alive,
/// but not once it's dead.
/// The object is tracked through the provided census inventory
pub fn track_alive_in(value: T, inventory: &Inventory<T>) -> Self {
TrackedObject {
inner: Some(inventory.track(value)),
return_channel: channel().0,
}
}
}

impl<T: Clone> AsRef<T> for TrackedObject<T> {
fn as_ref(&self) -> &T {
self
}
}

impl<T: Clone> Deref for TrackedObject<T> {
type Target = T;
fn deref(&self) -> &T {
self.inner
.as_ref()
.expect("inner should only be None during drop")
}
}

impl<T: Clone> Drop for TrackedObject<T> {
fn drop(&mut self) {
if let Some(item) = self.inner.take() {
// if send fails, no one cared about getting that notification, it's fine to
// drop item
let _ = self.return_channel.send(item.as_ref().clone());
}
}
}

impl<T: Clone> Default for Tracker<T> {
fn default() -> Self {
Self::new()
}
}

impl<T: Clone> Tracker<T> {
/// Create a new tracker
pub fn new() -> Self {
let (sender, receiver) = channel();
Tracker {
inner_inventory: Inventory::new(),
unacknowledged_drop_receiver: Arc::new(Mutex::new(receiver)),
return_channel: sender,
}
}

/// Return whether it is safe to recreate this tracker.
///
/// A tracker is considered safe to recreate if this is the only instance left,
/// and it contains no alive object (it may contain dead objects though).
///
/// Once this return true, it will stay that way until [Tracker::track] or [Tracker::clone] are
/// called.
pub fn safe_to_recreate(&self) -> bool {
Arc::strong_count(&self.unacknowledged_drop_receiver) == 1
&& self.inner_inventory.len() == 0
}

/// List object which are considered alive
pub fn list_ongoing(&self) -> Vec<InventoredObject<T>> {
self.inner_inventory.list()
}

/// Take away the list of object considered dead
pub fn take_dead(&self) -> Vec<T> {
let mut res = Vec::new();
let receiver = self.unacknowledged_drop_receiver.lock().unwrap();
while let Ok(dead_entry) = receiver.try_recv() {
res.push(dead_entry);
}
res
}

/// Track a new object.
pub fn track(&self, value: T) -> TrackedObject<T> {
TrackedObject {
inner: Some(self.inner_inventory.track(value)),
return_channel: self.return_channel.clone(),
}
}
}

#[cfg(test)]
mod tests {
use super::{InventoredObject, Tracker};

#[track_caller]
fn assert_tracked_eq<T: PartialEq + std::fmt::Debug>(
got: Vec<InventoredObject<T>>,
expected: Vec<T>,
) {
assert_eq!(
got.len(),
expected.len(),
"expected vec of same lenght, {} != {}",
got.len(),
expected.len()
);
for (got_item, expected_item) in got.into_iter().zip(expected) {
assert_eq!(*got_item, expected_item);
}
}

#[test]
fn test_single_tracker() {
let tracker = Tracker::<u32>::new();

assert!(tracker.list_ongoing().is_empty());
assert!(tracker.take_dead().is_empty());
assert!(tracker.safe_to_recreate());

{
let tracked_1 = tracker.track(1);
assert_tracked_eq(tracker.list_ongoing(), vec![1]);
assert!(tracker.take_dead().is_empty());
assert!(!tracker.safe_to_recreate());
std::mem::drop(tracked_1); // done for clarity and silence unused var warn
}

assert!(tracker.list_ongoing().is_empty());
assert!(tracker.safe_to_recreate());
assert_eq!(tracker.take_dead(), vec![1]);
assert!(tracker.safe_to_recreate());
}

#[test]
fn test_two_tracker() {
let tracker = Tracker::<u32>::new();
let tracker2 = tracker.clone();

assert!(tracker.list_ongoing().is_empty());
assert!(tracker.take_dead().is_empty());
assert!(!tracker.safe_to_recreate());

{
let tracked_1 = tracker.track(1);
assert_tracked_eq(tracker.list_ongoing(), vec![1]);
assert_tracked_eq(tracker2.list_ongoing(), vec![1]);
assert!(tracker.take_dead().is_empty());
assert!(tracker2.take_dead().is_empty());
assert!(!tracker.safe_to_recreate());
std::mem::drop(tracked_1); // done for clarity and silence unused var warn
}

assert!(tracker.list_ongoing().is_empty());
assert!(tracker2.list_ongoing().is_empty());
assert_eq!(tracker2.take_dead(), vec![1]);
// we took awai the dead from tracker2, so they don't show up in tracker
assert!(tracker.take_dead().is_empty());
}
}
126 changes: 124 additions & 2 deletions quickwit/quickwit-indexing/failpoints/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
//! Below we test panics at different steps in the indexing pipeline.

use std::path::Path;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Barrier, Mutex};
use std::time::Duration;

Expand All @@ -42,15 +43,17 @@ use quickwit_common::split_file;
use quickwit_common::temp_dir::TempDirectory;
use quickwit_indexing::actors::MergeExecutor;
use quickwit_indexing::merge_policy::{MergeOperation, MergeTask};
use quickwit_indexing::models::MergeScratch;
use quickwit_indexing::models::{
DetachIndexingPipeline, DetachMergePipeline, MergeScratch, SpawnPipeline,
};
use quickwit_indexing::{get_tantivy_directory_from_split_bundle, TestSandbox};
use quickwit_metastore::{
ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata,
SplitState,
};
use quickwit_proto::indexing::MergePipelineId;
use quickwit_proto::metastore::{ListSplitsRequest, MetastoreService};
use quickwit_proto::types::{IndexUid, NodeId};
use quickwit_proto::types::{IndexUid, NodeId, PipelineUid};
use serde_json::Value as JsonValue;
use tantivy::Directory;

Expand Down Expand Up @@ -346,3 +349,122 @@ async fn test_merge_executor_controlled_directory_kill_switch() -> anyhow::Resul

Ok(())
}

#[tokio::test]
async fn test_no_duplicate_merge_on_pipeline_restart() -> anyhow::Result<()> {
quickwit_common::setup_logging_for_tests();
let doc_mapper_yaml = r#"
field_mappings:
- name: body
type: text
- name: ts
type: datetime
fast: true
timestamp_field: ts
"#;
let indexing_setting_yaml = r#"
split_num_docs_target: 2500
merge_policy:
type: "limit_merge"
max_merge_ops: 1
merge_factor: 4
max_merge_factor: 4
max_finalize_merge_operations: 1
"#;
let search_fields = ["body"];
let index_id = "test-index-merge-duplication";
let mut test_index_builder = TestSandbox::create(
index_id,
doc_mapper_yaml,
indexing_setting_yaml,
&search_fields,
)
.await?;

// 0: start
// 1: 1st merge reached the failpoint
// 11: 1st merge failed
// 12: 2nd merge reached the failpoint
// 22: 2nd merge failed (we don't care about this state)
let state = Arc::new(AtomicU32::new(0));
let state_clone = state.clone();

fail::cfg_callback("before-merge-split", move || {
use std::sync::atomic::Ordering;
state_clone.fetch_add(1, Ordering::Relaxed);
std::thread::sleep(std::time::Duration::from_millis(300));
state_clone.fetch_add(10, Ordering::Relaxed);
panic!("kill merge pipeline");
})
.unwrap();

let batch: Vec<JsonValue> =
std::iter::repeat_with(|| serde_json::json!({"body ": TEST_TEXT, "ts": 1631072713 }))
.take(500)
.collect();
// this sometime fails because the ingest api isn't aware of the index yet?!
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
for _ in 0..4 {
test_index_builder
.add_documents_through_api(batch.clone())
.await?;
}

let (indexing_pipeline, merge_pipeline) = test_index_builder
.take_indexing_and_merge_pipeline()
.await?;

// stop the pipeline
indexing_pipeline.kill().await;
merge_pipeline
.mailbox()
.ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline)
.await?;

tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let pipeline_id = test_index_builder
.indexing_service()
.ask_for_res(SpawnPipeline {
index_id: index_id.to_string(),
source_config: quickwit_config::SourceConfig::ingest_api_default(),
pipeline_uid: PipelineUid::for_test(1u128),
})
.await?;

tokio::time::sleep(std::time::Duration::from_millis(200)).await;
// we shouldn't have had a 2nd split run yet (the 1st one hasn't panicked just yet)
assert_eq!(state.load(Ordering::Relaxed), 1);
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
assert_eq!(state.load(Ordering::Relaxed), 11);

let merge_pipeline_id = pipeline_id.merge_pipeline_id();
let indexing_pipeline = test_index_builder
.indexing_service()
.ask_for_res(DetachIndexingPipeline { pipeline_id })
.await?;
let merge_pipeline = test_index_builder
.indexing_service()
.ask_for_res(DetachMergePipeline {
pipeline_id: merge_pipeline_id,
})
.await?;

indexing_pipeline.kill().await;
merge_pipeline
.mailbox()
.ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline)
.await?;

// stoping the merge pipeline makes it recheck for possible dead merge
// (alternatively, it does that sooner when rebuilding the known split list)
tokio::time::sleep(std::time::Duration::from_millis(200)).await;
// timing-wise, we can't have reached 22, but it would be logically correct to get that state
assert_eq!(state.load(Ordering::Relaxed), 12);

let universe = test_index_builder.universe();
universe.kill();
fail::cfg("before-merge-split", "off").unwrap();
universe.quit().await;

Ok(())
}
Loading