Skip to content

Commit 1009542

Browse files
committed
change!: make hashing fallible to prepare for collision detection
This seems better than killing the process as Git does, since we’re in a library context and it would be bad if you could perform denial‐of‐service attacks on a server by sending it hash collisions. (Although there are probably cheaper ways to mount a denial‐of‐service attack.) It does mean a lot of churn across the tree, but the underlying breaking change is to a low‐level API, and the change is usually just an adjustment to variants of an existing error type, so I expect that most downstream users will require little to no adaption for this change.
1 parent 2d82ec0 commit 1009542

File tree

36 files changed

+171
-101
lines changed

36 files changed

+171
-101
lines changed

gix-commitgraph/src/file/verify.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ pub mod checksum {
4545
#[derive(thiserror::Error, Debug)]
4646
#[allow(missing_docs)]
4747
pub enum Error {
48+
#[error("failed to hash commit graph file")]
49+
Hasher(#[from] gix_hash::hasher::Error),
4850
#[error(transparent)]
4951
Verify(#[from] gix_hash::verify::Error),
5052
}
@@ -153,7 +155,7 @@ impl File {
153155
let data_len_without_trailer = self.data.len() - self.hash_len;
154156
let mut hasher = gix_hash::hasher(self.object_hash());
155157
hasher.update(&self.data[..data_len_without_trailer]);
156-
let actual = hasher.digest();
158+
let actual = hasher.try_finalize()?;
157159
actual.verify(self.checksum())?;
158160
Ok(actual)
159161
}

gix-diff/tests/diff/blob/pipeline.rs

+14-14
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ pub(crate) mod convert_to_diffable {
7272

7373
let mut db = ObjectDb::default();
7474
let b_content = "b-content";
75-
let id = db.insert(b_content);
75+
let id = db.insert(b_content)?;
7676

7777
let out = filter.convert_to_diffable(
7878
&id,
@@ -129,7 +129,7 @@ pub(crate) mod convert_to_diffable {
129129
assert_eq!(buf.len(), 0, "it should avoid querying that data in the first place");
130130

131131
let mut db = ObjectDb::default();
132-
let id = db.insert(large_content);
132+
let id = db.insert(large_content)?;
133133
let out = filter.convert_to_diffable(
134134
&id,
135135
EntryKind::Blob,
@@ -211,7 +211,7 @@ pub(crate) mod convert_to_diffable {
211211
drop(tmp);
212212

213213
let mut db = ObjectDb::default();
214-
let id = db.insert(large_content);
214+
let id = db.insert(large_content)?;
215215

216216
let out = filter.convert_to_diffable(
217217
&id,
@@ -397,7 +397,7 @@ pub(crate) mod convert_to_diffable {
397397

398398
let mut db = ObjectDb::default();
399399
let b_content = "b-content\n";
400-
let id = db.insert(b_content);
400+
let id = db.insert(b_content)?;
401401

402402
let out = filter.convert_to_diffable(
403403
&id,
@@ -416,7 +416,7 @@ pub(crate) mod convert_to_diffable {
416416

417417
let mut db = ObjectDb::default();
418418
let b_content = "b\n";
419-
let id = db.insert(b_content);
419+
let id = db.insert(b_content)?;
420420
let out = filter.convert_to_diffable(
421421
&id,
422422
EntryKind::Blob,
@@ -490,7 +490,7 @@ pub(crate) mod convert_to_diffable {
490490

491491
let mut db = ObjectDb::default();
492492
let b_content = "b-co\0ntent\n";
493-
let id = db.insert(b_content);
493+
let id = db.insert(b_content)?;
494494

495495
let out = filter.convert_to_diffable(
496496
&id,
@@ -509,7 +509,7 @@ pub(crate) mod convert_to_diffable {
509509

510510
let platform = attributes.at_entry("c", None, &gix_object::find::Never)?;
511511

512-
let id = db.insert("b");
512+
let id = db.insert("b")?;
513513
let out = filter.convert_to_diffable(
514514
&id,
515515
EntryKind::Blob,
@@ -638,7 +638,7 @@ pub(crate) mod convert_to_diffable {
638638
assert_eq!(out.data, Some(pipeline::Data::Buffer { is_derived: false }));
639639
assert_eq!(buf.as_bstr(), "a\n", "unconditionally use git according to mode");
640640

641-
let id = db.insert("a\n");
641+
let id = db.insert("a\n")?;
642642
for mode in worktree_modes {
643643
let out = filter.convert_to_diffable(
644644
&id,
@@ -714,7 +714,7 @@ pub(crate) mod convert_to_diffable {
714714
assert_eq!(buf.len(), 0, "always cleared");
715715

716716
buf.push(1);
717-
let id = db.insert("link-target");
717+
let id = db.insert("link-target")?;
718718
let out = filter.convert_to_diffable(
719719
&id,
720720
EntryKind::Link,
@@ -761,7 +761,7 @@ pub(crate) mod convert_to_diffable {
761761
assert_eq!(buf.len(), 0, "it's always cleared before any potential use");
762762
}
763763

764-
let id = db.insert("b\n");
764+
let id = db.insert("b\n")?;
765765
for mode in all_modes {
766766
buf.push(1);
767767
let out = filter.convert_to_diffable(
@@ -814,7 +814,7 @@ pub(crate) mod convert_to_diffable {
814814
);
815815
}
816816

817-
let id = db.insert("c\n");
817+
let id = db.insert("c\n")?;
818818
for mode in worktree_modes {
819819
let out = filter.convert_to_diffable(
820820
&id,
@@ -863,7 +863,7 @@ pub(crate) mod convert_to_diffable {
863863
assert_eq!(buf.len(), 0);
864864
}
865865

866-
let id = db.insert("unset\n");
866+
let id = db.insert("unset\n")?;
867867
for mode in all_modes {
868868
let out = filter.convert_to_diffable(
869869
&id,
@@ -890,7 +890,7 @@ pub(crate) mod convert_to_diffable {
890890
}
891891

892892
let platform = attributes.at_entry("d", None, &gix_object::find::Never)?;
893-
let id = db.insert("d-in-db");
893+
let id = db.insert("d-in-db")?;
894894
for mode in worktree_modes {
895895
let out = filter.convert_to_diffable(
896896
&null,
@@ -959,7 +959,7 @@ pub(crate) mod convert_to_diffable {
959959
"no text filter, so git conversion was applied for worktree source"
960960
);
961961

962-
let id = db.insert("e-in-db");
962+
let id = db.insert("e-in-db")?;
963963
let out = filter.convert_to_diffable(
964964
&id,
965965
EntryKind::Blob,

gix-diff/tests/diff/blob/platform.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ fn resources_of_worktree_and_odb_and_check_link() -> crate::Result {
3030

3131
let mut db = ObjectDb::default();
3232
let a_content = "a-content";
33-
let id = db.insert(a_content);
33+
let id = db.insert(a_content)?;
3434
platform.set_resource(
3535
id,
3636
EntryKind::BlobExecutable,
@@ -194,7 +194,7 @@ fn diff_binary() -> crate::Result {
194194

195195
let mut db = ObjectDb::default();
196196
let a_content = "b";
197-
let id = db.insert(a_content);
197+
let id = db.insert(a_content)?;
198198
platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::NewOrDestination, &db)?;
199199

200200
let out = platform.prepare_diff()?;
@@ -235,7 +235,7 @@ fn diff_performed_despite_external_command() -> crate::Result {
235235

236236
let mut db = ObjectDb::default();
237237
let a_content = "b";
238-
let id = db.insert(a_content);
238+
let id = db.insert(a_content)?;
239239
platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::NewOrDestination, &db)?;
240240

241241
let out = platform.prepare_diff()?;
@@ -277,7 +277,7 @@ fn diff_skipped_due_to_external_command_and_enabled_option() -> crate::Result {
277277

278278
let mut db = ObjectDb::default();
279279
let a_content = "b";
280-
let id = db.insert(a_content);
280+
let id = db.insert(a_content)?;
281281
platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::NewOrDestination, &db)?;
282282

283283
let out = platform.prepare_diff()?;

gix-diff/tests/diff/main.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ mod util {
5151

5252
impl ObjectDb {
5353
/// Insert `data` and return its hash. That can be used to find it again.
54-
pub fn insert(&mut self, data: &str) -> gix_hash::ObjectId {
55-
let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes());
54+
pub fn insert(&mut self, data: &str) -> Result<gix_hash::ObjectId, Error> {
55+
let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes())?;
5656
self.data_by_id.insert(id, data.into());
57-
id
57+
Ok(id)
5858
}
5959
}
6060
}

gix-diff/tests/diff/rewrites/tracker.rs

+11-11
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ fn copy_by_similarity_reports_limit_if_encountered() -> crate::Result {
9292
(Change::addition(), "a-cpy-2", "a"),
9393
(Change::modification(), "d", "ab"),
9494
],
95-
);
95+
)?;
9696

9797
let mut calls = 0;
9898
let out = util::assert_emit_with_objects(
@@ -145,7 +145,7 @@ fn copy_by_id() -> crate::Result {
145145
(Change::addition(), "a-cpy-2", "a"),
146146
(Change::modification(), "d", "a"),
147147
],
148-
);
148+
)?;
149149

150150
let mut calls = 0;
151151
let out = util::assert_emit_with_objects(
@@ -218,7 +218,7 @@ fn copy_by_id_search_in_all_sources() -> crate::Result {
218218
(Change::addition(), "a-cpy-1", "a"),
219219
(Change::addition(), "a-cpy-2", "a"),
220220
],
221-
);
221+
)?;
222222

223223
let mut calls = 0;
224224
let content_id = hex_to_id("2e65efe2a145dda7ee51d1741299f848e5bf752e");
@@ -299,7 +299,7 @@ fn copy_by_50_percent_similarity() -> crate::Result {
299299
(Change::addition(), "a-cpy-2", "a\nc"),
300300
(Change::modification(), "d", "a"),
301301
],
302-
);
302+
)?;
303303

304304
let mut calls = 0;
305305
let out = util::assert_emit_with_objects(
@@ -377,7 +377,7 @@ fn copy_by_id_in_additions_only() -> crate::Result {
377377
(Change::modification(), "a", "a"),
378378
(Change::modification(), "a-cpy-1", "a"),
379379
],
380-
);
380+
)?;
381381

382382
let mut calls = 0;
383383
let out = util::assert_emit_with_objects(
@@ -429,7 +429,7 @@ fn rename_by_similarity_reports_limit_if_encountered() -> crate::Result {
429429
(Change::addition(), "b", "firt\nsecond\n"),
430430
(Change::addition(), "c", "second\nunrelated\n"),
431431
],
432-
);
432+
)?;
433433

434434
let mut calls = 0;
435435
let out = util::assert_emit_with_objects(
@@ -475,7 +475,7 @@ fn rename_by_50_percent_similarity() -> crate::Result {
475475
(Change::addition(), "b", "firt\nsecond\n"),
476476
(Change::addition(), "c", "second\nunrelated\n"),
477477
],
478-
);
478+
)?;
479479

480480
let mut calls = 0;
481481
let out = util::assert_emit_with_objects(
@@ -596,7 +596,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
596596
(Change::deletion(), "a", "first\nsecond\n"),
597597
(Change::addition(), "b", "firt\nsecond\n"),
598598
],
599-
);
599+
)?;
600600

601601
let mut calls = 0;
602602
let out = util::assert_emit_with_objects(
@@ -803,16 +803,16 @@ mod util {
803803
pub fn add_retained_blobs<'a>(
804804
tracker: &mut rewrites::Tracker<Change>,
805805
blobs: impl IntoIterator<Item = (Change, &'a str, &'a str)>,
806-
) -> ObjectDb {
806+
) -> crate::Result<ObjectDb> {
807807
let mut db = ObjectDb::default();
808808
for (mut change, location, data) in blobs {
809-
change.id = db.insert(data);
809+
change.id = db.insert(data)?;
810810
assert!(
811811
tracker.try_push_change(change, location.into()).is_none(),
812812
"input changes must be tracked"
813813
);
814814
}
815-
db
815+
Ok(db)
816816
}
817817

818818
pub fn assert_emit(

gix-filter/src/ident.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ pub mod apply {
4848
pub enum Error {
4949
#[error("Could not allocate buffer")]
5050
OutOfMemory(#[from] std::collections::TryReserveError),
51+
#[error("Could not hash blob")]
52+
Hasher(#[from] gix_hash::hasher::Error),
5153
}
5254
}
5355

@@ -66,7 +68,7 @@ pub fn apply(src: &[u8], object_hash: gix_hash::Kind, buf: &mut Vec<u8>) -> Resu
6668
while let Some(pos) = src[ofs..].find(b"$Id$") {
6769
let id = match id {
6870
None => {
69-
let new_id = gix_object::compute_hash(object_hash, gix_object::Kind::Blob, src);
71+
let new_id = gix_object::compute_hash(object_hash, gix_object::Kind::Blob, src)?;
7072
id = new_id.into();
7173
clear_and_set_capacity(buf, src.len() + HASH_LEN)?; // pre-allocate for one ID
7274
new_id

gix-hash/src/hasher/io.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ use crate::{hasher, Hasher};
66
pub enum Error {
77
#[error(transparent)]
88
Io(#[from] std::io::Error),
9+
#[error("Failed to hash data")]
10+
Hasher(#[from] hasher::Error),
911
}
1012

1113
/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
@@ -74,7 +76,7 @@ pub fn bytes_with_hasher(
7476
}
7577
}
7678

77-
let id = hasher.digest();
79+
let id = hasher.try_finalize()?;
7880
progress.show_throughput(start);
7981
Ok(id)
8082
}

gix-hash/src/hasher/mod.rs

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
/// The error returned by [`Hasher::try_finalize()`].
2+
#[derive(Debug, thiserror::Error)]
3+
#[allow(missing_docs)]
4+
pub enum Error {}
5+
16
/// A implementation of the Sha1 hash, which can be used once.
27
#[derive(Default, Clone)]
38
pub struct Hasher(gix_features::hash::Hasher);
@@ -8,8 +13,8 @@ impl Hasher {
813
self.0.update(bytes);
914
}
1015
/// Finalize the hash and produce an object ID.
11-
pub fn digest(self) -> crate::ObjectId {
12-
self.0.digest().into()
16+
pub fn try_finalize(self) -> Result<crate::ObjectId, Error> {
17+
Ok(self.0.digest().into())
1318
}
1419
}
1520

gix-hash/tests/object_id/mod.rs

+19-7
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,26 @@ mod sha1 {
4747

4848
use gix_hash::{hasher, Kind, ObjectId};
4949

50-
fn hash_contents(s: &[u8]) -> ObjectId {
50+
fn hash_contents(s: &[u8]) -> Result<ObjectId, hasher::Error> {
5151
let mut hasher = hasher(Kind::Sha1);
5252
hasher.update(s);
53-
hasher.digest()
53+
hasher.try_finalize()
5454
}
5555

5656
#[test]
5757
fn empty_blob() {
58-
assert_eq!(ObjectId::empty_blob(Kind::Sha1), hash_contents(b"blob 0\0"));
58+
assert_eq!(
59+
ObjectId::empty_blob(Kind::Sha1),
60+
hash_contents(b"blob 0\0").expect("empty blob to not collide"),
61+
);
5962
}
6063

6164
#[test]
6265
fn empty_tree() {
63-
assert_eq!(ObjectId::empty_tree(Kind::Sha1), hash_contents(b"tree 0\0"));
66+
assert_eq!(
67+
ObjectId::empty_tree(Kind::Sha1),
68+
hash_contents(b"tree 0\0").expect("empty tree to not collide"),
69+
);
6470
}
6571

6672
/// Check the test vectors from RFC 3174.
@@ -83,7 +89,7 @@ mod sha1 {
8389
];
8490
for (input, output) in fixtures {
8591
assert_eq!(
86-
hash_contents(input),
92+
hash_contents(input).expect("RFC inputs to not collide"),
8793
ObjectId::from_str(&output.to_lowercase().replace(' ', "")).expect("RFC digests to be valid"),
8894
);
8995
}
@@ -103,7 +109,13 @@ mod sha1 {
103109
// BUG: These should be detected as a collision attack.
104110
let expected =
105111
ObjectId::from_str("8ac60ba76f1999a1ab70223f225aefdc78d4ddc0").expect("Shambles digest to be valid");
106-
assert_eq!(hash_contents(message_a), expected);
107-
assert_eq!(hash_contents(message_b), expected);
112+
assert_eq!(
113+
hash_contents(message_a).expect("collision attacks to not be detected"),
114+
expected,
115+
);
116+
assert_eq!(
117+
hash_contents(message_b).expect("collision attacks to not be detected"),
118+
expected,
119+
);
108120
}
109121
}

0 commit comments

Comments
 (0)