Skip to content

Commit b08a9c2

Browse files
dschoKevin Willford
authored and
Kevin Willford
committed
Merge 'gvfs/midx-verify'
This is part 2 of 3 in hardening the MIDX feature and preparing to re-do the MIDX OID fanout change from an earlier attempt. This PR simply creates a new mode for the MIDX builtin: `git midx --verify`. This will check the MIDX file for incorrect values in several ways, including an incorrect offset value (which was the real bug before). I anticipate adding this check to the OS Repo Tests to give us an extra layer of validation before merging changes into GVFS. (That step will auto-delete the MIDX if the verify fails.)
2 parents aefefc5 + 03b0500 commit b08a9c2

File tree

7 files changed

+386
-23
lines changed

7 files changed

+386
-23
lines changed

Documentation/git-midx.txt

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,34 @@ OPTIONS
2424

2525
--clear::
2626
If specified, delete the midx file specified by midx-head, and
27-
midx-head. (Cannot be combined with --write or --read.)
27+
midx-head. (Cannot be combined with `--write`, `--read`, or
28+
`--verify`.)
29+
30+
--verify::
31+
If specified, check the midx file specified by midx-head for
32+
corruption or invalid data. (Cannot be combined with `--write`,
33+
`--read`, or `--clear`.)
2834

2935
--read::
3036
If specified, read a midx file specified by the midx-head file
3137
and output basic details about the midx file. (Cannot be combined
32-
with --write or --clear.)
38+
with `--write`, `--clear`, or `--verify`.)
3339

3440
--midx-id <oid>::
35-
If specified with --read, use the given oid to read midx-[oid].midx
41+
If specified with `--read`, use the given oid to read midx-[oid].midx
3642
instead of using midx-head.
3743

3844
--write::
3945
If specified, write a new midx file to the pack directory using
4046
the packfiles present. Outputs the hash of the result midx file.
41-
(Cannot be combined with --read or --clear.)
47+
(Cannot be combined with `--read`, `--clear`, or `--verify`.)
4248

4349
--update-head::
44-
If specified with --write, update the midx-head file to point to
50+
If specified with `--write`, update the midx-head file to point to
4551
the written midx file.
4652

4753
--delete-expired::
48-
If specified with --write and --update-head, delete the midx file
54+
If specified with `--write` and `--update-head`, delete the midx file
4955
previously pointed to by midx-head (if changed).
5056

5157
EXAMPLES

builtin/midx.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ static char const * const builtin_midx_usage[] ={
1414
N_("git midx --write [--pack-dir <packdir>] [--update-head] [--delete-expired]"),
1515
N_("git midx --read [--midx-id=<oid>]"),
1616
N_("git midx --clear [--pack-dir <packdir>]"),
17+
N_("git midx --verify [--pack-dir <packdir>]"),
1718
NULL
1819
};
1920

@@ -25,6 +26,7 @@ static struct opts_midx {
2526
int read;
2627
const char *midx_id;
2728
int clear;
29+
int verify;
2830
int has_existing;
2931
struct object_id old_midx_oid;
3032
} opts;
@@ -307,6 +309,8 @@ int cmd_midx(int argc, const char **argv, const char *prefix)
307309
N_("read midx file")),
308310
OPT_BOOL('c', "clear", &opts.clear,
309311
N_("clear midx file and midx-head")),
312+
OPT_BOOL(0, "verify", &opts.verify,
313+
N_("verify the contents of a midx file")),
310314
{ OPTION_STRING, 'M', "midx-id", &opts.midx_id,
311315
N_("oid"),
312316
N_("An OID for a specific midx file in the pack-dir."),
@@ -325,7 +329,7 @@ int cmd_midx(int argc, const char **argv, const char *prefix)
325329
builtin_midx_options,
326330
builtin_midx_usage, 0);
327331

328-
if (opts.write + opts.read + opts.clear > 1)
332+
if (opts.write + opts.read + opts.clear + opts.verify > 1)
329333
usage_with_options(builtin_midx_usage, builtin_midx_options);
330334

331335
if (!opts.pack_dir) {
@@ -343,6 +347,8 @@ int cmd_midx(int argc, const char **argv, const char *prefix)
343347
return midx_read();
344348
if (opts.clear)
345349
return midx_clear();
350+
if (opts.verify)
351+
return midx_verify(opts.pack_dir, opts.midx_id);
346352

347353
return 0;
348354
}

midx.c

Lines changed: 163 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "pack.h"
44
#include "packfile.h"
55
#include "midx.h"
6+
#include "object-store.h"
67

78
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
89
#define MIDX_CHUNKID_PACKLOOKUP 0x504c4f4f /* "PLOO" */
@@ -134,17 +135,19 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char
134135

135136
hdr = midx_map;
136137
if (ntohl(hdr->midx_signature) != MIDX_SIGNATURE) {
138+
uint32_t signature = ntohl(hdr->midx_signature);
137139
munmap(midx_map, midx_size);
138140
close(fd);
139141
die("midx signature %X does not match signature %X",
140-
ntohl(hdr->midx_signature), MIDX_SIGNATURE);
142+
signature, MIDX_SIGNATURE);
141143
}
142144

143145
if (ntohl(hdr->midx_version) != MIDX_VERSION) {
146+
uint32_t version = ntohl(hdr->midx_version);
144147
munmap(midx_map, midx_size);
145148
close(fd);
146149
die("midx version %X does not match version %X",
147-
ntohl(hdr->midx_version), MIDX_VERSION);
150+
version, MIDX_VERSION);
148151
}
149152

150153
/* Time to fill a midx struct */
@@ -193,16 +196,19 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char
193196
midx->chunk_large_offsets = data + chunk_offset;
194197
break;
195198

196-
case 0:
197-
break;
198-
199199
default:
200-
munmap(midx_map, midx_size);
201-
close(fd);
202-
die("Unrecognized MIDX chunk id: %08x", chunk_id);
200+
/* We allow optional MIDX chunks, so ignore unrecognized chunk ids */
201+
break;
203202
}
204203
}
205204

205+
if (!midx->chunk_oid_fanout)
206+
die("midx missing OID Fanout chunk");
207+
if (!midx->chunk_pack_lookup)
208+
die("midx missing Packfile Name Lookup chunk");
209+
if (!midx->chunk_pack_names)
210+
die("midx missing Packfile Name chunk");
211+
206212
midx->num_objects = ntohl(*((uint32_t*)(midx->chunk_oid_fanout + 255 * 4)));
207213
midx->num_packs = ntohl(midx->hdr->num_packs);
208214

@@ -215,6 +221,10 @@ static struct midxed_git *load_midxed_git_one(const char *midx_file, const char
215221

216222
for (i = 0; i < midx->num_packs; i++) {
217223
uint32_t name_offset = ntohl(*(uint32_t*)(midx->chunk_pack_lookup + 4 * i));
224+
225+
if (midx->chunk_pack_names + name_offset >= midx->data + midx->data_len)
226+
die("invalid packfile name lookup");
227+
218228
midx->pack_names[i] = (const char*)(midx->chunk_pack_names + name_offset);
219229
}
220230
}
@@ -862,7 +872,8 @@ const char *write_midx_file(const char *pack_dir,
862872
break;
863873

864874
default:
865-
die("unrecognized MIDX chunk id: %08x", chunk_ids[chunk]);
875+
BUG("midx tried to write an invalid chunk ID %08X", chunk_ids[chunk]);
876+
break;
866877
}
867878
}
868879

@@ -928,3 +939,146 @@ void close_all_midx(void)
928939

929940
midxed_git = 0;
930941
}
942+
943+
static int verify_midx_error = 0;
944+
945+
static void midx_report(const char *fmt, ...)
946+
{
947+
va_list ap;
948+
struct strbuf sb = STRBUF_INIT;
949+
verify_midx_error = 1;
950+
951+
va_start(ap, fmt);
952+
strbuf_vaddf(&sb, fmt, ap);
953+
954+
fprintf(stderr, "%s\n", sb.buf);
955+
strbuf_release(&sb);
956+
va_end(ap);
957+
}
958+
959+
int midx_verify(const char *pack_dir, const char *midx_id)
960+
{
961+
uint32_t i, cur_fanout_pos = 0;
962+
struct midxed_git *m;
963+
const char *midx_head_path;
964+
struct object_id cur_oid, prev_oid, checksum;
965+
struct hashfile *f;
966+
int devnull, checksum_fail = 0;
967+
968+
if (midx_id) {
969+
size_t sz;
970+
struct strbuf sb = STRBUF_INIT;
971+
strbuf_addf(&sb, "%s/midx-%s.midx", pack_dir, midx_id);
972+
midx_head_path = strbuf_detach(&sb, &sz);
973+
} else {
974+
midx_head_path = get_midx_head_filename_dir(pack_dir);
975+
}
976+
977+
m = load_midxed_git_one(midx_head_path, pack_dir);
978+
979+
if (!m) {
980+
midx_report("failed to find specified midx file");
981+
goto cleanup;
982+
}
983+
984+
985+
devnull = open("/dev/null", O_WRONLY);
986+
f = hashfd(devnull, NULL);
987+
hashwrite(f, m->data, m->data_len - m->hdr->hash_len);
988+
finalize_hashfile(f, checksum.hash, CSUM_CLOSE);
989+
if (hashcmp(checksum.hash, m->data + m->data_len - m->hdr->hash_len)) {
990+
midx_report(_("the midx file has incorrect checksum and is likely corrupt"));
991+
verify_midx_error = 0;
992+
checksum_fail = 1;
993+
}
994+
995+
if (m->hdr->hash_version != MIDX_OID_VERSION)
996+
midx_report("invalid hash version");
997+
if (m->hdr->hash_len != MIDX_OID_LEN)
998+
midx_report("invalid hash length");
999+
1000+
if (verify_midx_error)
1001+
goto cleanup;
1002+
1003+
if (!m->chunk_oid_lookup)
1004+
midx_report("missing OID Lookup chunk");
1005+
if (!m->chunk_object_offsets)
1006+
midx_report("missing Object Offset chunk");
1007+
1008+
if (verify_midx_error)
1009+
goto cleanup;
1010+
1011+
for (i = 0; i < m->num_packs; i++) {
1012+
if (prepare_midx_pack(m, i)) {
1013+
midx_report("failed to prepare pack %s",
1014+
m->pack_names[i]);
1015+
continue;
1016+
}
1017+
1018+
if (!m->packs[i]->index_data &&
1019+
open_pack_index(m->packs[i]))
1020+
midx_report("failed to open index for pack %s",
1021+
m->pack_names[i]);
1022+
}
1023+
1024+
if (verify_midx_error)
1025+
goto cleanup;
1026+
1027+
for (i = 0; i < m->num_objects; i++) {
1028+
struct pack_midx_details details;
1029+
uint32_t index_pos, pack_id;
1030+
struct packed_git *p;
1031+
off_t pack_offset;
1032+
1033+
hashcpy(cur_oid.hash, m->chunk_oid_lookup + m->hdr->hash_len * i);
1034+
1035+
while (cur_oid.hash[0] > cur_fanout_pos) {
1036+
uint32_t fanout_value = get_be32(m->chunk_oid_fanout + cur_fanout_pos * sizeof(uint32_t));
1037+
if (i != fanout_value)
1038+
midx_report("midx has incorrect fanout value: fanout[%d] = %u != %u",
1039+
cur_fanout_pos, fanout_value, i);
1040+
1041+
cur_fanout_pos++;
1042+
}
1043+
1044+
if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
1045+
midx_report("midx has incorrect OID order: %s then %s",
1046+
oid_to_hex(&prev_oid),
1047+
oid_to_hex(&cur_oid));
1048+
1049+
oidcpy(&prev_oid, &cur_oid);
1050+
1051+
if (!nth_midxed_object_details(m, i, &details)) {
1052+
midx_report("nth_midxed_object_details failed with n=%d", i);
1053+
continue;
1054+
}
1055+
1056+
pack_id = details.pack_int_id;
1057+
if (pack_id >= m->num_packs) {
1058+
midx_report("pack-int-id for object n=%d is invalid: %u",
1059+
pack_id);
1060+
continue;
1061+
}
1062+
1063+
p = m->packs[pack_id];
1064+
1065+
if (!find_pack_entry_pos(cur_oid.hash, p, &index_pos)) {
1066+
midx_report("midx contains object not present in packfile: %s",
1067+
oid_to_hex(&cur_oid));
1068+
continue;
1069+
}
1070+
1071+
pack_offset = nth_packed_object_offset(p, index_pos);
1072+
if (details.offset != pack_offset)
1073+
midx_report("midx has incorrect offset for %s : %"PRIx64" != %"PRIx64,
1074+
oid_to_hex(&cur_oid),
1075+
details.offset,
1076+
pack_offset);
1077+
}
1078+
1079+
cleanup:
1080+
if (m)
1081+
close_midx(m);
1082+
free(m);
1083+
return verify_midx_error | checksum_fail;
1084+
}

midx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,4 +132,6 @@ extern const char *write_midx_file(const char *pack_dir,
132132
extern int close_midx(struct midxed_git *m);
133133
extern void close_all_midx(void);
134134

135+
int midx_verify(const char *pack_dir, const char *midx_id);
136+
135137
#endif

packfile.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,20 +1872,27 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
18721872
}
18731873
}
18741874

1875-
off_t find_pack_entry_one(const unsigned char *sha1,
1876-
struct packed_git *p)
1875+
int find_pack_entry_pos(const unsigned char *sha1,
1876+
struct packed_git *p,
1877+
uint32_t *result)
18771878
{
18781879
const unsigned char *index = p->index_data;
18791880
struct object_id oid;
1880-
uint32_t result;
18811881

18821882
if (!index) {
18831883
if (open_pack_index(p))
18841884
return 0;
18851885
}
18861886

18871887
hashcpy(oid.hash, sha1);
1888-
if (bsearch_pack(&oid, p, &result))
1888+
return bsearch_pack(&oid, p, result);
1889+
}
1890+
1891+
off_t find_pack_entry_one(const unsigned char *sha1,
1892+
struct packed_git *p)
1893+
{
1894+
uint32_t result;
1895+
if (find_pack_entry_pos(sha1, p, &result))
18891896
return nth_packed_object_offset(p, result);
18901897
return 0;
18911898
}

packfile.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ extern const struct object_id *nth_packed_object_oid(struct object_id *, struct
123123
*/
124124
extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t n);
125125

126+
int find_pack_entry_pos(const unsigned char *sha1,
127+
struct packed_git *p,
128+
uint32_t *result);
129+
126130
/*
127131
* If the object named sha1 is present in the specified packfile,
128132
* return its offset within the packfile; otherwise, return 0.

0 commit comments

Comments
 (0)