aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtin/fsck.c46
-rw-r--r--cache.h13
-rw-r--r--sha1_file.c180
-rwxr-xr-xt/t1450-fsck.sh86
4 files changed, 284 insertions, 41 deletions
diff --git a/builtin/fsck.c b/builtin/fsck.c
index f01b81eeb..4b91ee95e 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -362,18 +362,6 @@ static int fsck_obj(struct object *obj)
return 0;
}
-static int fsck_sha1(const unsigned char *sha1)
-{
- struct object *obj = parse_object(sha1);
- if (!obj) {
- errors_found |= ERROR_OBJECT;
- return error("%s: object corrupt or missing",
- sha1_to_hex(sha1));
- }
- obj->flags |= HAS_OBJ;
- return fsck_obj(obj);
-}
-
static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
unsigned long size, void *buffer, int *eaten)
{
@@ -488,9 +476,41 @@ static void get_default_heads(void)
}
}
+static struct object *parse_loose_object(const unsigned char *sha1,
+ const char *path)
+{
+ struct object *obj;
+ void *contents;
+ enum object_type type;
+ unsigned long size;
+ int eaten;
+
+ if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
+ return NULL;
+
+ if (!contents && type != OBJ_BLOB)
+ die("BUG: read_loose_object streamed a non-blob");
+
+ obj = parse_object_buffer(sha1, type, size, contents, &eaten);
+
+ if (!eaten)
+ free(contents);
+ return obj;
+}
+
static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
{
- if (fsck_sha1(sha1))
+ struct object *obj = parse_loose_object(sha1, path);
+
+ if (!obj) {
+ errors_found |= ERROR_OBJECT;
+ error("%s: object corrupt or missing: %s",
+ sha1_to_hex(sha1), path);
+ return 0; /* keep checking other objects */
+ }
+
+ obj->flags = HAS_OBJ;
+ if (fsck_obj(obj))
errors_found |= ERROR_OBJECT;
return 0;
}
diff --git a/cache.h b/cache.h
index 00a029af3..5cac28788 100644
--- a/cache.h
+++ b/cache.h
@@ -1143,6 +1143,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
extern int has_sha1_pack(const unsigned char *sha1);
/*
+ * Open the loose object at path, check its sha1, and return the contents,
+ * type, and size. If the object is a blob, then "contents" may return NULL,
+ * to allow streaming of large blobs.
+ *
+ * Returns 0 on success, negative on error (details may be written to stderr).
+ */
+int read_loose_object(const char *path,
+ const unsigned char *expected_sha1,
+ enum object_type *type,
+ unsigned long *size,
+ void **contents);
+
+/*
* Return true iff we have an object named sha1, whether local or in
* an alternate object database, and whether packed or loose. This
* function does not respect replace references.
diff --git a/sha1_file.c b/sha1_file.c
index b5e827ac9..c40ef7111 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1630,39 +1630,54 @@ int git_open_cloexec(const char *name, int flags)
return fd;
}
-static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
+/*
+ * Find "sha1" as a loose object in the local repository or in an alternate.
+ * Returns 0 on success, negative on failure.
+ *
+ * The "path" out-parameter will give the path of the object we found (if any).
+ * Note that it may point to static storage and is only valid until another
+ * call to sha1_file_name(), etc.
+ */
+static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
+ const char **path)
{
struct alternate_object_database *alt;
- if (!lstat(sha1_file_name(sha1), st))
+ *path = sha1_file_name(sha1);
+ if (!lstat(*path, st))
return 0;
prepare_alt_odb();
errno = ENOENT;
for (alt = alt_odb_list; alt; alt = alt->next) {
- const char *path = alt_sha1_path(alt, sha1);
- if (!lstat(path, st))
+ *path = alt_sha1_path(alt, sha1);
+ if (!lstat(*path, st))
return 0;
}
return -1;
}
-static int open_sha1_file(const unsigned char *sha1)
+/*
+ * Like stat_sha1_file(), but actually open the object and return the
+ * descriptor. See the caveats on the "path" parameter above.
+ */
+static int open_sha1_file(const unsigned char *sha1, const char **path)
{
int fd;
struct alternate_object_database *alt;
int most_interesting_errno;
- fd = git_open(sha1_file_name(sha1));
+ *path = sha1_file_name(sha1);
+ fd = git_open(*path);
if (fd >= 0)
return fd;
most_interesting_errno = errno;
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
- const char *path = alt_sha1_path(alt, sha1);
- fd = git_open(path);
+ *path = alt_sha1_path(alt, sha1);
+ fd = git_open(*path);
if (fd >= 0)
return fd;
if (most_interesting_errno == ENOENT)
@@ -1672,12 +1687,21 @@ static int open_sha1_file(const unsigned char *sha1)
return -1;
}
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+/*
+ * Map the loose object at "path" if it is not NULL, or the path found by
+ * searching for a loose object named "sha1".
+ */
+static void *map_sha1_file_1(const char *path,
+ const unsigned char *sha1,
+ unsigned long *size)
{
void *map;
int fd;
- fd = open_sha1_file(sha1);
+ if (path)
+ fd = git_open(path);
+ else
+ fd = open_sha1_file(sha1, &path);
map = NULL;
if (fd >= 0) {
struct stat st;
@@ -1686,7 +1710,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
*size = xsize_t(st.st_size);
if (!*size) {
/* mmap() is forbidden on empty files */
- error("object file %s is empty", sha1_file_name(sha1));
+ error("object file %s is empty", path);
return NULL;
}
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
@@ -1696,6 +1720,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
return map;
}
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+{
+ return map_sha1_file_1(NULL, sha1, size);
+}
+
unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
@@ -2806,8 +2835,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
* object even exists.
*/
if (!oi->typep && !oi->typename && !oi->sizep) {
+ const char *path;
struct stat st;
- if (stat_sha1_file(sha1, &st) < 0)
+ if (stat_sha1_file(sha1, &st, &path) < 0)
return -1;
if (oi->disk_sizep)
*oi->disk_sizep = st.st_size;
@@ -3003,6 +3033,8 @@ void *read_sha1_file_extended(const unsigned char *sha1,
{
void *data;
const struct packed_git *p;
+ const char *path;
+ struct stat st;
const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
errno = 0;
@@ -3018,12 +3050,9 @@ void *read_sha1_file_extended(const unsigned char *sha1,
die("replacement %s not found for %s",
sha1_to_hex(repl), sha1_to_hex(sha1));
- if (has_loose_object(repl)) {
- const char *path = sha1_file_name(sha1);
-
+ if (!stat_sha1_file(repl, &st, &path))
die("loose object %s (stored in %s) is corrupt",
sha1_to_hex(repl), path);
- }
if ((p = has_packed_and_bad(repl)) != NULL)
die("packed object %s (stored in %s) is corrupt",
@@ -3793,3 +3822,122 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
}
return r ? r : pack_errors;
}
+
+static int check_stream_sha1(git_zstream *stream,
+ const char *hdr,
+ unsigned long size,
+ const char *path,
+ const unsigned char *expected_sha1)
+{
+ git_SHA_CTX c;
+ unsigned char real_sha1[GIT_SHA1_RAWSZ];
+ unsigned char buf[4096];
+ unsigned long total_read;
+ int status = Z_OK;
+
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, hdr, stream->total_out);
+
+ /*
+ * We already read some bytes into hdr, but the ones up to the NUL
+ * do not count against the object's content size.
+ */
+ total_read = stream->total_out - strlen(hdr) - 1;
+
+ /*
+ * This size comparison must be "<=" to read the final zlib packets;
+ * see the comment in unpack_sha1_rest for details.
+ */
+ while (total_read <= size &&
+ (status == Z_OK || status == Z_BUF_ERROR)) {
+ stream->next_out = buf;
+ stream->avail_out = sizeof(buf);
+ if (size - total_read < stream->avail_out)
+ stream->avail_out = size - total_read;
+ status = git_inflate(stream, Z_FINISH);
+ git_SHA1_Update(&c, buf, stream->next_out - buf);
+ total_read += stream->next_out - buf;
+ }
+ git_inflate_end(stream);
+
+ if (status != Z_STREAM_END) {
+ error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
+ return -1;
+ }
+ if (stream->avail_in) {
+ error("garbage at end of loose object '%s'",
+ sha1_to_hex(expected_sha1));
+ return -1;
+ }
+
+ git_SHA1_Final(real_sha1, &c);
+ if (hashcmp(expected_sha1, real_sha1)) {
+ error("sha1 mismatch for %s (expected %s)", path,
+ sha1_to_hex(expected_sha1));
+ return -1;
+ }
+
+ return 0;
+}
+
+int read_loose_object(const char *path,
+ const unsigned char *expected_sha1,
+ enum object_type *type,
+ unsigned long *size,
+ void **contents)
+{
+ int ret = -1;
+ int fd = -1;
+ void *map = NULL;
+ unsigned long mapsize;
+ git_zstream stream;
+ char hdr[32];
+
+ *contents = NULL;
+
+ map = map_sha1_file_1(path, NULL, &mapsize);
+ if (!map) {
+ error_errno("unable to mmap %s", path);
+ goto out;
+ }
+
+ if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
+ error("unable to unpack header of %s", path);
+ goto out;
+ }
+
+ *type = parse_sha1_header(hdr, size);
+ if (*type < 0) {
+ error("unable to parse header of %s", path);
+ git_inflate_end(&stream);
+ goto out;
+ }
+
+ if (*type == OBJ_BLOB) {
+ if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
+ goto out;
+ } else {
+ *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
+ if (!*contents) {
+ error("unable to unpack contents of %s", path);
+ git_inflate_end(&stream);
+ goto out;
+ }
+ if (check_sha1_signature(expected_sha1, *contents,
+ *size, typename(*type))) {
+ error("sha1 mismatch for %s (expected %s)", path,
+ sha1_to_hex(expected_sha1));
+ free(*contents);
+ goto out;
+ }
+ }
+
+ ret = 0; /* everything checks out */
+
+out:
+ if (map)
+ munmap(map, mapsize);
+ if (fd >= 0)
+ close(fd);
+ return ret;
+}
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index ee7d4736d..8975b4d1b 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -43,13 +43,13 @@ test_expect_success 'HEAD is part of refs, valid objects appear valid' '
test_expect_success 'setup: helpers for corruption tests' '
sha1_file() {
- echo "$*" | sed "s#..#.git/objects/&/#"
+ remainder=${1#??} &&
+ firsttwo=${1%$remainder} &&
+ echo ".git/objects/$firsttwo/$remainder"
} &&
remove_object() {
- file=$(sha1_file "$*") &&
- test -e "$file" &&
- rm -f "$file"
+ rm "$(sha1_file "$1")"
}
'
@@ -535,13 +535,6 @@ test_expect_success 'fsck --connectivity-only' '
)
'
-remove_loose_object () {
- sha1="$(git rev-parse "$1")" &&
- remainder=${sha1#??} &&
- firsttwo=${sha1%$remainder} &&
- rm .git/objects/$firsttwo/$remainder
-}
-
test_expect_success 'fsck --name-objects' '
rm -rf name-objects &&
git init name-objects &&
@@ -550,11 +543,80 @@ test_expect_success 'fsck --name-objects' '
test_commit julius caesar.t &&
test_commit augustus &&
test_commit caesar &&
- remove_loose_object $(git rev-parse julius:caesar.t) &&
+ remove_object $(git rev-parse julius:caesar.t) &&
test_must_fail git fsck --name-objects >out &&
tree=$(git rev-parse --verify julius:) &&
grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out
)
'
+test_expect_success 'alternate objects are correctly blamed' '
+ test_when_finished "rm -rf alt.git .git/objects/info/alternates" &&
+ git init --bare alt.git &&
+ echo "../../alt.git/objects" >.git/objects/info/alternates &&
+ mkdir alt.git/objects/12 &&
+ >alt.git/objects/12/34567890123456789012345678901234567890 &&
+ test_must_fail git fsck >out 2>&1 &&
+ grep alt.git out
+'
+
+test_expect_success 'fsck errors in packed objects' '
+ git cat-file commit HEAD >basis &&
+ sed "s/</one/" basis >one &&
+ sed "s/</foo/" basis >two &&
+ one=$(git hash-object -t commit -w one) &&
+ two=$(git hash-object -t commit -w two) &&
+ pack=$(
+ {
+ echo $one &&
+ echo $two
+ } | git pack-objects .git/objects/pack/pack
+ ) &&
+ test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
+ remove_object $one &&
+ remove_object $two &&
+ test_must_fail git fsck 2>out &&
+ grep "error in commit $one.* - bad name" out &&
+ grep "error in commit $two.* - bad name" out &&
+ ! grep corrupt out
+'
+
+test_expect_success 'fsck finds problems in duplicate loose objects' '
+ rm -rf broken-duplicate &&
+ git init broken-duplicate &&
+ (
+ cd broken-duplicate &&
+ test_commit duplicate &&
+ # no "-d" here, so we end up with duplicates
+ git repack &&
+ # now corrupt the loose copy
+ file=$(sha1_file "$(git rev-parse HEAD)") &&
+ rm "$file" &&
+ echo broken >"$file" &&
+ test_must_fail git fsck
+ )
+'
+
+test_expect_success 'fsck detects trailing loose garbage (commit)' '
+ git cat-file commit HEAD >basis &&
+ echo bump-commit-sha1 >>basis &&
+ commit=$(git hash-object -w -t commit basis) &&
+ file=$(sha1_file $commit) &&
+ test_when_finished "remove_object $commit" &&
+ chmod +w "$file" &&
+ echo garbage >>"$file" &&
+ test_must_fail git fsck 2>out &&
+ test_i18ngrep "garbage.*$commit" out
+'
+
+test_expect_success 'fsck detects trailing loose garbage (blob)' '
+ blob=$(echo trailing | git hash-object -w --stdin) &&
+ file=$(sha1_file $blob) &&
+ test_when_finished "remove_object $blob" &&
+ chmod +w "$file" &&
+ echo garbage >>"$file" &&
+ test_must_fail git fsck 2>out &&
+ test_i18ngrep "garbage.*$blob" out
+'
+
test_done