|
1 | 1 | use crate::actors::{ |
2 | 2 | backends::{BackendManagerActor, RequestBackends, StateInterest}, |
3 | | - meta::{MetaStoreActor, ObjectMetas}, |
| 3 | + meta::{MetaStoreActor, ScanMeta}, |
4 | 4 | zstor::{Rebuild, ZstorActor}, |
5 | 5 | }; |
6 | 6 | use actix::prelude::*; |
7 | | -use log::{error, warn}; |
| 7 | +use log::{debug, error, warn}; |
8 | 8 | use std::time::Duration; |
| 9 | +use std::time::{SystemTime, UNIX_EPOCH}; |
9 | 10 |
|
10 | 11 | /// Amount of time between starting a new sweep of the backend objects. |
11 | 12 | const OBJECT_SWEEP_INTERVAL_SECONDS: u64 = 60 * 10; |
@@ -65,52 +66,80 @@ impl Handler<SweepObjects> for RepairActor { |
65 | 66 | let zstor = self.zstor.clone(); |
66 | 67 |
|
67 | 68 | Box::pin(async move { |
68 | | - let obj_metas = match meta.send(ObjectMetas).await { |
69 | | - Err(e) => { |
70 | | - error!("Could not request object metas from metastore: {}", e); |
71 | | - return; |
72 | | - } |
73 | | - Ok(om) => match om { |
74 | | - Err(e) => { |
75 | | - error!("Could not get object metas from metastore: {}", e); |
76 | | - return; |
77 | | - } |
78 | | - Ok(om) => om, |
79 | | - }, |
80 | | - }; |
| 69 | + let start_time = SystemTime::now() |
| 70 | + .duration_since(UNIX_EPOCH) |
| 71 | + .unwrap() |
| 72 | + .as_secs(); |
81 | 73 |
|
82 | | - for (key, metadata) in obj_metas.into_iter() { |
83 | | - let backend_requests = metadata |
84 | | - .shards() |
85 | | - .iter() |
86 | | - .map(|shard_info| shard_info.zdb()) |
87 | | - .cloned() |
88 | | - .collect::<Vec<_>>(); |
89 | | - let backends = match backend_manager |
90 | | - .send(RequestBackends { |
91 | | - backend_requests, |
92 | | - interest: StateInterest::Readable, |
| 74 | + // start scanning from the beginning (cursor == None) and let the metastore choose the backend_id |
| 75 | + let mut cursor = None; |
| 76 | + let mut backend_idx = None; |
| 77 | + loop { |
| 78 | + // scan keys from the metastore |
| 79 | + let (idx, new_cursor, metas) = match meta |
| 80 | + .send(ScanMeta { |
| 81 | + cursor: cursor.clone(), |
| 82 | + backend_idx, |
| 83 | + max_timestamp: Some(start_time), |
93 | 84 | }) |
94 | 85 | .await |
95 | 86 | { |
96 | 87 | Err(e) => { |
97 | | - error!("Failed to request backends: {}", e); |
| 88 | + error!("Could not request meta keys from metastore: {}", e); |
98 | 89 | return; |
99 | 90 | } |
100 | | - Ok(backends) => backends, |
| 91 | + Ok(result) => match result { |
| 92 | + Err(e) => { |
| 93 | + error!("Could not get meta keys from metastore: {}", e); |
| 94 | + return; |
| 95 | + } |
| 96 | + Ok(res) => res, |
| 97 | + }, |
101 | 98 | }; |
102 | | - let must_rebuild = backends.into_iter().any(|b| !matches!(b, Ok(Some(_)))); |
103 | | - if must_rebuild { |
104 | | - if let Err(e) = zstor |
105 | | - .send(Rebuild { |
106 | | - file: None, |
107 | | - key: Some(key), |
| 99 | + |
| 100 | + // iterate over the keys and check if the backends are healthy |
| 101 | + // if not, rebuild the object |
| 102 | + for (key, metadata) in metas.into_iter() { |
| 103 | + let backend_requests = metadata |
| 104 | + .shards() |
| 105 | + .iter() |
| 106 | + .map(|shard_info| shard_info.zdb()) |
| 107 | + .cloned() |
| 108 | + .collect::<Vec<_>>(); |
| 109 | + let backends = match backend_manager |
| 110 | + .send(RequestBackends { |
| 111 | + backend_requests, |
| 112 | + interest: StateInterest::Readable, |
108 | 113 | }) |
109 | 114 | .await |
110 | 115 | { |
111 | | - warn!("Failed to rebuild data: {}", e); |
| 116 | + Err(e) => { |
| 117 | + error!("Failed to request backends: {}", e); |
| 118 | + return; |
| 119 | + } |
| 120 | + Ok(backends) => backends, |
| 121 | + }; |
| 122 | + let must_rebuild = backends.into_iter().any(|b| !matches!(b, Ok(Some(_)))); |
| 123 | + if must_rebuild { |
| 124 | + if let Err(e) = zstor |
| 125 | + .send(Rebuild { |
| 126 | + file: None, |
| 127 | + key: Some(key), |
| 128 | + }) |
| 129 | + .await |
| 130 | + { |
| 131 | + warn!("Failed to rebuild data: {}", e); |
| 132 | + } |
112 | 133 | } |
113 | 134 | } |
| 135 | + |
| 136 | + if new_cursor.is_none() { |
| 137 | + debug!("there is no more old data to rebuild"); |
| 138 | + break; |
| 139 | + } |
| 140 | + |
| 141 | + cursor = new_cursor; |
| 142 | + backend_idx = Some(idx); |
114 | 143 | } |
115 | 144 | }) |
116 | 145 | } |
|
0 commit comments