Skip to content

Commit befba3b

Browse files
committed
disable LRU cache if we have to get statistics
This slows down the algorithm by 20% or more, but at least shows correct stats. This issue will go away once we can stream packs.
1 parent 4b204b8 commit befba3b

File tree

4 files changed

+49
-10
lines changed

4 files changed

+49
-10
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ The CLI uses various crates, please see _'Development Status'_ for details.
6868
* [ ] encode
6969
* [ ] create new packs
7070
* [x] verify pack with statistics
71+
* [ ] pack streaming (i.e. indexing + resolution)
72+
* [ ] use pack streaming for verification for performance and correctness
7173
* [ ] API documentation with examples
7274
* **git-transport**
7375
* [ ] via ssh

git-odb/src/pack/index/verify.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::pack::DecodeEntryResult;
1+
use crate::pack::{cache, DecodeEntryResult};
22
use crate::{pack, pack::index};
33
use git_features::progress::{self, Progress};
44
use git_object::SHA1_SIZE;
@@ -88,16 +88,18 @@ impl index::File {
8888
/// If `pack` is provided, it is expected (and validated to be) the pack belonging to this index.
8989
/// It will be used to validate internal integrity of the pack before checking each objects integrity
9090
/// is indeed as advertised via its SHA1 as stored in this index, as well as the CRC32 hash.
91-
pub fn verify_checksum_of_index<P>(
91+
pub fn verify_checksum_of_index<P, C>(
9292
&self,
9393
pack: Option<&pack::File>,
9494
progress: Option<P>,
95+
make_cache: impl Fn() -> C + Send + Sync,
9596
) -> Result<(git_object::Id, Option<PackFileChecksumResult>), ChecksumError>
9697
where
9798
P: Progress,
9899
<P as Progress>::SubProgress: Send,
100+
C: cache::DecodeEntry,
99101
{
100-
use crate::pack::{cache, ResolvedBase};
102+
use crate::pack::ResolvedBase;
101103
use git_features::parallel::{self, in_parallel_if};
102104

103105
let mut root = progress::DoOrDiscard::from(progress);
@@ -224,7 +226,7 @@ impl index::File {
224226
.init(Some(self.num_objects()), Some("objects"));
225227
let state_per_thread = |index| {
226228
(
227-
cache::DecodeEntryLRU::default(),
229+
make_cache(),
228230
Vec::with_capacity(2048),
229231
reduce_progress
230232
.lock()

git-odb/tests/pack/index.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ mod method {
103103

104104
use common_macros::b_tree_map;
105105
use git_features::progress::Discard;
106+
use git_odb::pack::cache::DecodeEntryNoop;
106107

107108
#[test]
108109
fn pack_lookup() {
@@ -113,14 +114,19 @@ fn pack_lookup() {
113114
index::PackFileChecksumResult {
114115
average: DecodeEntryResult {
115116
kind: git_object::Kind::Tree,
116-
num_deltas: 0,
117+
num_deltas: 1,
117118
decompressed_size: 3456,
118119
compressed_size: 1725,
119120
object_size: 9621,
120121
},
121122
objects_per_chain_length: b_tree_map! {
122123
0 => 18,
123-
1 => 12
124+
1 => 4,
125+
2 => 3,
126+
3 => 1,
127+
4 => 2,
128+
5 => 1,
129+
6 => 1,
124130
},
125131
total_compressed_entries_size: 51753,
126132
total_decompressed_entries_size: 103701,
@@ -162,7 +168,8 @@ fn pack_lookup() {
162168
},
163169
objects_per_chain_length: b_tree_map! {
164170
0 => 30,
165-
1 => 12
171+
1 => 6,
172+
2 => 6,
166173
},
167174
total_compressed_entries_size: 3604,
168175
total_decompressed_entries_size: 4997,
@@ -177,7 +184,7 @@ fn pack_lookup() {
177184
assert_eq!(pack.kind(), pack::Kind::V2);
178185
assert_eq!(pack.num_objects(), idx.num_objects());
179186
assert_eq!(
180-
idx.verify_checksum_of_index(Some(&pack), Discard.into())
187+
idx.verify_checksum_of_index(Some(&pack), Discard.into(), || DecodeEntryNoop)
181188
.unwrap(),
182189
(idx.checksum_of_index(), Some(stats.to_owned()))
183190
);
@@ -221,7 +228,8 @@ fn iter() {
221228
assert_eq!(idx.version(), *version);
222229
assert_eq!(idx.num_objects(), *num_objects);
223230
assert_eq!(
224-
idx.verify_checksum_of_index(None, Discard.into()).unwrap(),
231+
idx.verify_checksum_of_index(None, Discard.into(), || DecodeEntryNoop)
232+
.unwrap(),
225233
(idx.checksum_of_index(), None)
226234
);
227235
assert_eq!(idx.checksum_of_index(), hex_to_id(index_checksum));

gitoxide-core/src/lib.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use anyhow::{anyhow, Context, Result};
22
use bytesize::ByteSize;
33
use git_features::progress::Progress;
4+
use git_object::Kind;
45
use git_odb::pack::{self, index};
56
use std::{io, path::Path};
67

@@ -38,7 +39,33 @@ where
3839
Err(e)
3940
})
4041
.ok();
41-
idx.verify_checksum_of_index(pack.as_ref(), progress)?
42+
enum EitherCache {
43+
Left(pack::cache::DecodeEntryNoop),
44+
Right(pack::cache::DecodeEntryLRU),
45+
};
46+
impl pack::cache::DecodeEntry for EitherCache {
47+
fn put(&mut self, offset: u64, data: &[u8], kind: Kind, compressed_size: usize) {
48+
match self {
49+
EitherCache::Left(v) => v.put(offset, data, kind, compressed_size),
50+
EitherCache::Right(v) => v.put(offset, data, kind, compressed_size),
51+
}
52+
}
53+
54+
fn get(&mut self, offset: u64, out: &mut Vec<u8>) -> Option<(Kind, usize)> {
55+
match self {
56+
EitherCache::Left(v) => v.get(offset, out),
57+
EitherCache::Right(v) => v.get(offset, out),
58+
}
59+
}
60+
}
61+
idx.verify_checksum_of_index(pack.as_ref(), progress, || -> EitherCache {
62+
if output_statistics {
63+
// turn off acceleration as we need to see entire chains all the time
64+
EitherCache::Left(pack::cache::DecodeEntryNoop)
65+
} else {
66+
EitherCache::Right(pack::cache::DecodeEntryLRU::default())
67+
}
68+
})?
4269
}
4370
ext => {
4471
return Err(anyhow!(

0 commit comments

Comments
 (0)