Skip to content

Commit e497fb1

Browse files
authored
fix: Update view version log timestamp for historical accuracy (#1218)
## What changes are included in this PR? This change updates the behavior when setting a view version as current. When we set a previously existing view version as current, we now update its log timestamp to the current time rather than reusing the original timestamp of the ViewVersion. Lets assume the following changes: 1. ViewVersion 1 is added and set active 2. View Version 2 is added and set active 3. View Version 1 is set active. This resulted in the following `version_log`: ```json "version-log": [ { "version-id": 1, "timestamp-ms": 1744716416168 }, { "version-id": 2, "timestamp-ms": 1744716449754 }, { "version-id": 1, "timestamp-ms": 1744716416168 } ], ``` Note that the last and first entries have the same timestamp, namely the time stamp of the initial ViewVersion creation. I believe this is undesired in a history, where we are interested when a certain change became active. It makes sense to use the exact timestamp of the ViewVersion if it was added in the same set of changes, but re-enabling a previously used view version (maybe years ago) should not add a history for this past timestamp. Java behaviors the same way as rust currently does. @Fokko @nastra if we agree that we should change the behavior, we should also touch Java. ## Are these changes tested? Yes
1 parent 31b0c0a commit e497fb1

File tree

2 files changed

+70
-5
lines changed

2 files changed

+70
-5
lines changed

crates/iceberg/src/spec/view_metadata.rs

+6
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,12 @@ impl ViewVersionLog {
229229
pub fn timestamp(&self) -> Result<DateTime<Utc>> {
230230
timestamp_ms_to_utc(self.timestamp_ms)
231231
}
232+
233+
/// Update the timestamp of this version log.
234+
pub(crate) fn set_timestamp_ms(&mut self, timestamp_ms: i64) -> &mut Self {
235+
self.timestamp_ms = timestamp_ms;
236+
self
237+
}
232238
}
233239

234240
pub(super) mod _serde {

crates/iceberg/src/spec/view_metadata_builder.rs

+64-5
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,20 @@ impl ViewMetadataBuilder {
219219
});
220220
}
221221

222-
self.history_entry = Some(version.log());
222+
// Use the timestamp of the snapshot if it was added in this set of changes,
223+
// otherwise use a current timestamp for the log. The view version was added
224+
// by a past transaction.
225+
let version_added_in_this_changes = self
226+
.changes
227+
.iter()
228+
.any(|update| matches!(update, ViewUpdate::AddViewVersion { view_version } if view_version.version_id() == version_id));
229+
230+
let mut log = version.log();
231+
if !version_added_in_this_changes {
232+
log.set_timestamp_ms(Utc::now().timestamp_millis());
233+
}
234+
235+
self.history_entry = Some(log);
223236

224237
Ok(self)
225238
}
@@ -257,9 +270,8 @@ impl ViewMetadataBuilder {
257270
// in this case. I prefer to add changes as the state of the builder is
258271
// potentially mutated (`last_added_version_id`), thus we should record the change.
259272
if self.last_added_version_id != Some(version_id) {
260-
self.changes.push(ViewUpdate::AddViewVersion {
261-
view_version: view_version.with_version_id(version_id),
262-
});
273+
self.changes
274+
.push(ViewUpdate::AddViewVersion { view_version });
263275
self.last_added_version_id = Some(version_id);
264276
}
265277
return Ok(version_id);
@@ -293,7 +305,6 @@ impl ViewMetadataBuilder {
293305

294306
require_unique_dialects(&view_version)?;
295307

296-
// ToDo Discuss: This check is not present in Java.
297308
// The `TableMetadataBuilder` uses these checks in multiple places - also in Java.
298309
// If we think delayed requests are a problem, I think we should also add it here.
299310
if let Some(last) = self.metadata.version_log.last() {
@@ -852,6 +863,54 @@ mod test {
852863
);
853864
}
854865

866+
#[test]
867+
fn test_use_previously_added_version() {
868+
let v2 = new_view_version(2, 1, "select 1 as count");
869+
let v3 = new_view_version(3, 1, "select count(1) as count from t2");
870+
let schema = Schema::builder().build().unwrap();
871+
872+
let log_v2 = ViewVersionLog::new(2, v2.timestamp_ms());
873+
let log_v3 = ViewVersionLog::new(3, v3.timestamp_ms());
874+
875+
let metadata_v2 = builder_without_changes()
876+
.set_current_version(v2.clone(), schema.clone())
877+
.unwrap()
878+
.build()
879+
.unwrap()
880+
.metadata;
881+
882+
// Log should use the exact timestamp of v1
883+
assert_eq!(metadata_v2.version_log.last().unwrap(), &log_v2);
884+
885+
// Add second version, should use exact timestamp of v2
886+
let metadata_v3 = metadata_v2
887+
.into_builder()
888+
.set_current_version(v3.clone(), schema)
889+
.unwrap()
890+
.build()
891+
.unwrap()
892+
.metadata;
893+
894+
assert_eq!(metadata_v3.version_log[1..], vec![
895+
log_v2.clone(),
896+
log_v3.clone()
897+
]);
898+
899+
// Re-use Version 1, add a new log entry with a new timestamp
900+
let metadata_v4 = metadata_v3
901+
.into_builder()
902+
.set_current_version_id(2)
903+
.unwrap()
904+
.build()
905+
.unwrap()
906+
.metadata;
907+
908+
// Last entry should be equal to v2 but with an updated timestamp
909+
let entry = metadata_v4.version_log.last().unwrap();
910+
assert_eq!(entry.version_id(), 2);
911+
assert!(entry.timestamp_ms() > v2.timestamp_ms());
912+
}
913+
855914
#[test]
856915
fn test_assign_uuid() {
857916
let builder = builder_without_changes();

0 commit comments

Comments
 (0)