Skip to content

Commit 030d8ec

Browse files
committed
pack-objects: refactor path-walk delta phase
Previously, the --path-walk option to 'git pack-objects' would compute deltas inline with the path-walk logic. This would make the progress indicator look like it is taking a long time to enumerate objects, and then very quickly computed deltas. Instead of computing deltas on each region of objects organized by tree, store a list of regions corresponding to these groups. These can later be pulled from the list for delta compression before doing the "global" delta search. This presents a new progress indicator that can be used in tests to verify that this stage is happening. The current implementation is not integrated with threads, but could be done in a future update. Since we do not attempt to sort objects by size until after exploring all trees, we can remove the previous change to t5530 due to a different error message appearing first. Signed-off-by: Derrick Stolee <[email protected]>
1 parent 0f3040b commit 030d8ec

File tree

4 files changed

+74
-33
lines changed

4 files changed

+74
-33
lines changed

builtin/pack-objects.c

+56-25
Original file line numberDiff line numberDiff line change
@@ -3204,6 +3204,50 @@ static int should_attempt_deltas(struct object_entry *entry)
32043204
return 1;
32053205
}
32063206

3207+
static void find_deltas_for_region(struct object_entry *list UNUSED,
3208+
struct packing_region *region,
3209+
unsigned int *processed)
3210+
{
3211+
struct object_entry **delta_list;
3212+
uint32_t delta_list_nr = 0;
3213+
3214+
ALLOC_ARRAY(delta_list, region->nr);
3215+
for (uint32_t i = 0; i < region->nr; i++) {
3216+
struct object_entry *entry = to_pack.objects + region->start + i;
3217+
if (should_attempt_deltas(entry))
3218+
delta_list[delta_list_nr++] = entry;
3219+
}
3220+
3221+
QSORT(delta_list, delta_list_nr, type_size_sort);
3222+
find_deltas(delta_list, &delta_list_nr, window, depth, processed);
3223+
free(delta_list);
3224+
}
3225+
3226+
static void find_deltas_by_region(struct object_entry *list,
3227+
struct packing_region *regions,
3228+
uint32_t start, uint32_t nr)
3229+
{
3230+
unsigned int processed = 0;
3231+
uint32_t progress_nr;
3232+
3233+
if (!nr)
3234+
return;
3235+
3236+
progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
3237+
3238+
if (progress)
3239+
progress_state = start_progress(_("Compressing objects by path"),
3240+
progress_nr);
3241+
3242+
while (nr--)
3243+
find_deltas_for_region(list,
3244+
&regions[start++],
3245+
&processed);
3246+
3247+
display_progress(progress_state, progress_nr);
3248+
stop_progress(&progress_state);
3249+
}
3250+
32073251
static void prepare_pack(int window, int depth)
32083252
{
32093253
struct object_entry **delta_list;
@@ -3228,6 +3272,10 @@ static void prepare_pack(int window, int depth)
32283272
if (!to_pack.nr_objects || !window || !depth)
32293273
return;
32303274

3275+
if (path_walk)
3276+
find_deltas_by_region(to_pack.objects, to_pack.regions,
3277+
0, to_pack.nr_regions);
3278+
32313279
ALLOC_ARRAY(delta_list, to_pack.nr_objects);
32323280
nr_deltas = n = 0;
32333281

@@ -4165,10 +4213,8 @@ static int add_objects_by_path(const char *path,
41654213
enum object_type type,
41664214
void *data)
41674215
{
4168-
struct object_entry **delta_list;
41694216
size_t oe_start = to_pack.nr_objects;
41704217
size_t oe_end;
4171-
unsigned int sub_list_size;
41724218
unsigned int *processed = data;
41734219

41744220
/*
@@ -4201,32 +4247,17 @@ static int add_objects_by_path(const char *path,
42014247
if (oe_end == oe_start || !window)
42024248
return 0;
42034249

4204-
sub_list_size = 0;
4205-
ALLOC_ARRAY(delta_list, oe_end - oe_start);
4250+
ALLOC_GROW(to_pack.regions,
4251+
to_pack.nr_regions + 1,
4252+
to_pack.nr_regions_alloc);
42064253

4207-
for (size_t i = 0; i < oe_end - oe_start; i++) {
4208-
struct object_entry *entry = to_pack.objects + oe_start + i;
4254+
to_pack.regions[to_pack.nr_regions].start = oe_start;
4255+
to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
4256+
to_pack.nr_regions++;
42094257

4210-
if (!should_attempt_deltas(entry))
4211-
continue;
4258+
*processed += oids->nr;
4259+
display_progress(progress_state, *processed);
42124260

4213-
delta_list[sub_list_size++] = entry;
4214-
}
4215-
4216-
/*
4217-
* Find delta bases among this list of objects that all match the same
4218-
* path. This causes the delta compression to be interleaved in the
4219-
* object walk, which can lead to confusing progress indicators. This is
4220-
* also incompatible with threaded delta calculations. In the future,
4221-
* consider creating a list of regions in the full to_pack.objects array
4222-
* that could be picked up by the threaded delta computation.
4223-
*/
4224-
if (sub_list_size && window) {
4225-
QSORT(delta_list, sub_list_size, type_size_sort);
4226-
find_deltas(delta_list, &sub_list_size, window, depth, processed);
4227-
}
4228-
4229-
free(delta_list);
42304261
return 0;
42314262
}
42324263

pack-objects.h

+12
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,23 @@ struct object_entry {
118118
unsigned ext_base:1; /* delta_idx points outside packlist */
119119
};
120120

121+
/**
122+
* A packing region is a section of the packing_data.objects array
123+
* as given by a starting index and a number of elements.
124+
*/
125+
struct packing_region {
126+
uint32_t start;
127+
uint32_t nr;
128+
};
129+
121130
struct packing_data {
122131
struct repository *repo;
123132
struct object_entry *objects;
124133
uint32_t nr_objects, nr_alloc;
125134

135+
struct packing_region *regions;
136+
uint32_t nr_regions, nr_regions_alloc;
137+
126138
int32_t *index;
127139
uint32_t index_size;
128140

t/t5300-pack-object.sh

+6-2
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,9 @@ done
677677
# Basic "repack everything" test
678678
test_expect_success '--path-walk pack everything' '
679679
git -C server rev-parse HEAD >in &&
680-
git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
680+
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
681+
--stdout --revs --path-walk --progress <in >out.pack 2>err &&
682+
grep "Compressing objects by path" err &&
681683
git -C server index-pack --stdin <out.pack
682684
'
683685

@@ -687,7 +689,9 @@ test_expect_success '--path-walk thin pack' '
687689
$(git -C server rev-parse HEAD)
688690
^$(git -C server rev-parse HEAD~2)
689691
EOF
690-
git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
692+
GIT_PROGRESS_DELAY=0 git -C server pack-objects \
693+
--thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
694+
grep "Compressing objects by path" err &&
691695
git -C server index-pack --fix-thin --stdin <out.pack
692696
'
693697

t/t5530-upload-pack-error.sh

-6
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
3535
hexsz=$(test_oid hexsz) &&
3636
printf "%04xwant %s\n00000009done\n0000" \
3737
$(($hexsz + 10)) $head >input &&
38-
39-
# The current implementation of path-walk causes a different
40-
# error message. This will be changed by a future refactoring.
41-
GIT_TEST_PACK_PATH_WALK=0 &&
42-
export GIT_TEST_PACK_PATH_WALK &&
43-
4438
test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
4539
test_grep "unable to read" output.err &&
4640
test_grep "pack-objects died" output.err

0 commit comments

Comments
 (0)