From 1af1c2b63db6a413fbeb9b08cd55dcb735d7597d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:08 -0700 Subject: read-cache/write-cache: optionally return cache checksum SHA1. read_cache_1() and write_cache_1() takes an extra parameter *sha1 that returns the checksum of the index file when non-NULL. Signed-off-by: Junio C Hamano --- cache.h | 5 ++++- read-cache.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/cache.h b/cache.h index 69801b02d..8c9947ef4 100644 --- a/cache.h +++ b/cache.h @@ -138,8 +138,11 @@ extern const char *prefix_filename(const char *prefix, int len, const char *path #define alloc_nr(x) (((x)+16)*3/2) /* Initialize and use the cache information */ +extern int read_cache_1(unsigned char *); +extern int write_cache_1(int, struct cache_entry **, int, unsigned char *); extern int read_cache(void); -extern int write_cache(int newfd, struct cache_entry **cache, int entries); +extern int write_cache(int, struct cache_entry **, int); + extern int cache_name_pos(const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/read-cache.c b/read-cache.c index f97f92d90..50e094e05 100644 --- a/read-cache.c +++ b/read-cache.c @@ -496,10 +496,12 @@ int add_cache_entry(struct cache_entry *ce, int option) return 0; } -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned char *sha1) { SHA_CTX c; - unsigned char sha1[20]; + unsigned char sha1_buf[20]; + if (!sha1) + sha1 = sha1_buf; if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error("bad signature"); @@ -513,7 +515,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) return 0; } -int read_cache(void) +int read_cache_1(unsigned char *cache_sha1) { int fd, i; struct stat st; @@ -547,7 +549,7 @@ int read_cache(void) die("index file mmap failed (%s)", strerror(errno)); hdr = map; - if (verify_hdr(hdr, size) < 0) + if (verify_hdr(hdr, size, cache_sha1) < 0) goto unmap; active_nr = ntohl(hdr->hdr_entries); @@ -595,7 +597,7 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) return 0; } -static int ce_flush(SHA_CTX *context, int fd) +static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1) { unsigned int left = write_buffer_len; @@ -612,7 +614,8 @@ static int ce_flush(SHA_CTX *context, int fd) } /* Append the SHA1 signature at the end */ - SHA1_Final(write_buffer + left, context); + SHA1_Final(sha1, context); + memcpy(write_buffer + left, sha1, 20); left += 20; if (write(fd, write_buffer, left) != left) return -1; @@ -663,11 +666,14 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) } } -int write_cache(int newfd, struct cache_entry **cache, int entries) +int write_cache_1(int newfd, struct cache_entry **cache, int entries, + unsigned char *cache_sha1) { SHA_CTX c; struct cache_header hdr; int i, removed; + int status; + unsigned char sha1[20]; for (i = removed = 0; i < entries; i++) if (!cache[i]->ce_mode) @@ -691,5 +697,18 @@ int write_cache(int newfd, struct cache_entry **cache, int entries) if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } - return ce_flush(&c, newfd); + status = ce_flush(&c, newfd, sha1); + if (cache_sha1) + memcpy(cache_sha1, sha1, 20); + return status; +} + +int read_cache(void) +{ + return read_cache_1(NULL); +} + +int write_cache(int newfd, struct cache_entry **cache, int entries) +{ + return write_cache_1(newfd, cache, entries, NULL); } -- cgit v1.2.1 From 749864627c2d3c33bbc56d7ba0b5542af698cc40 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:20 -0700 Subject: Add cache-tree. The cache_tree data structure is to cache tree object names that would result from the current index file. The idea is to have an optional file to record each tree object name that corresponds to a directory path in the cache when we run write_cache(), and read it back when we run read_cache(). During various index manupulations, we selectively invalidate the parts so that the next write-tree can bypass regenerating tree objects for unchanged parts of the directory hierarchy. We could perhaps make the cache-tree data an optional part of the index file, but that would involve the index format updates, so unless we need it for performance reasons, the current plan is to use a separate file, $GIT_DIR/index.aux to store this information and link it with the index file with the checksum that is already used for index file integrity check. Signed-off-by: Junio C Hamano --- Makefile | 2 +- cache-tree.c | 519 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cache-tree.h | 29 ++++ 3 files changed, 549 insertions(+), 1 deletion(-) create mode 100644 cache-tree.c create mode 100644 cache-tree.h diff --git a/Makefile b/Makefile index d9a3a82fe..518c3c176 100644 --- a/Makefile +++ b/Makefile @@ -204,7 +204,7 @@ DIFF_OBJS = \ diffcore-delta.o log-tree.o LIB_OBJS = \ - blob.o commit.o connect.o csum-file.o \ + blob.o commit.o connect.o csum-file.o cache-tree.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ diff --git a/cache-tree.c b/cache-tree.c new file mode 100644 index 000000000..f6d1dd1d7 --- /dev/null +++ b/cache-tree.c @@ -0,0 +1,519 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +#define DEBUG 0 + +struct cache_tree *cache_tree(void) +{ + struct cache_tree *it = xcalloc(1, sizeof(struct cache_tree)); + it->entry_count = -1; + return it; +} + +void cache_tree_free(struct cache_tree *it) +{ + int i; + + if (!it) + return; + for (i = 0; i < it->subtree_nr; i++) + cache_tree_free(it->down[i]->cache_tree); + free(it->down); + free(it); +} + +static struct cache_tree_sub *find_subtree(struct cache_tree *it, + const char *path, + int pathlen, + int create) +{ + int i; + struct cache_tree_sub *down; + for (i = 0; i < it->subtree_nr; i++) { + down = it->down[i]; + if (down->namelen == pathlen && + !memcmp(down->name, path, pathlen)) + return down; + } + if (!create) + return NULL; + if (it->subtree_alloc <= it->subtree_nr) { + it->subtree_alloc = alloc_nr(it->subtree_alloc); + it->down = xrealloc(it->down, it->subtree_alloc * + sizeof(*it->down)); + } + down = xmalloc(sizeof(*down) + pathlen + 1); + down->cache_tree = NULL; /* cache_tree(); */ + down->namelen = pathlen; + memcpy(down->name, path, pathlen); + down->name[pathlen] = 0; /* not strictly needed */ + it->down[it->subtree_nr++] = down; + return down; +} + +void cache_tree_invalidate_path(struct cache_tree *it, const char *path) +{ + /* a/b/c + * ==> invalidate self + * ==> find "a", have it invalidate "b/c" + * a + * ==> invalidate self + * ==> if "a" exists as a subtree, remove it. + */ + const char *slash; + int namelen; + struct cache_tree_sub *down; + + if (!it) + return; + slash = strchr(path, '/'); + it->entry_count = -1; + if (!slash) { + int i; + namelen = strlen(path); + for (i = 0; i < it->subtree_nr; i++) { + if (it->down[i]->namelen == namelen && + !memcmp(it->down[i]->name, path, namelen)) + break; + } + if (i < it->subtree_nr) { + cache_tree_free(it->down[i]->cache_tree); + free(it->down[i]); + /* 0 1 2 3 4 5 + * ^ ^subtree_nr = 6 + * i + * move 4 and 5 up one place (2 entries) + * 2 = 6 - 3 - 1 = subtree_nr - i - 1 + */ + memmove(it->down+i, it->down+i+1, + sizeof(struct cache_tree_sub *) * + (it->subtree_nr - i - 1)); + it->subtree_nr--; + } + return; + } + namelen = slash - path; + down = find_subtree(it, path, namelen, 0); + if (down) + cache_tree_invalidate_path(down->cache_tree, slash + 1); +} + +static int verify_cache(struct cache_entry **cache, + int entries) +{ + int i, funny; + + /* Verify that the tree is merged */ + funny = 0; + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + if (ce_stage(ce)) { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "%s: unmerged (%s)\n", + ce->name, sha1_to_hex(ce->sha1)); + } + } + if (funny) + return -1; + + /* Also verify that the cache does not have path and path/file + * at the same time. At this point we know the cache has only + * stage 0 entries. + */ + funny = 0; + for (i = 0; i < entries - 1; i++) { + /* path/file always comes after path because of the way + * the cache is sorted. Also path can appear only once, + * which means conflicting one would immediately follow. + */ + const char *this_name = cache[i]->name; + const char *next_name = cache[i+1]->name; + int this_len = strlen(this_name); + if (this_len < strlen(next_name) && + strncmp(this_name, next_name, this_len) == 0 && + next_name[this_len] == '/') { + if (10 < ++funny) { + fprintf(stderr, "...\n"); + break; + } + fprintf(stderr, "You have both %s and %s\n", + this_name, next_name); + } + } + if (funny) + return -1; + return 0; +} + +static void discard_unused_subtrees(struct cache_tree *it) +{ + struct cache_tree_sub **down = it->down; + int nr = it->subtree_nr; + int dst, src; + for (dst = src = 0; src < nr; src++) { + struct cache_tree_sub *s = down[src]; + if (s->used) + down[dst++] = s; + else { + cache_tree_free(s->cache_tree); + free(s); + it->subtree_nr--; + } + } +} + +static int update_one(struct cache_tree *it, + struct cache_entry **cache, + int entries, + const char *base, + int baselen, + int missing_ok) +{ + unsigned long size, offset; + char *buffer; + int i; + + if (0 <= it->entry_count) + return it->entry_count; + + /* + * We first scan for subtrees and update them; we start by + * marking existing subtrees -- the ones that are unmarked + * should not be in the result. + */ + for (i = 0; i < it->subtree_nr; i++) + it->down[i]->used = 0; + + /* + * Find the subtrees and update them. + */ + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, sublen, subcnt; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (!slash) + continue; + /* + * a/bbb/c (base = a/, slash = /c) + * ==> + * path+baselen = bbb/c, sublen = 3 + */ + sublen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, sublen, 1); + if (!sub->cache_tree) + sub->cache_tree = cache_tree(); + subcnt = update_one(sub->cache_tree, + cache + i, entries - i, + path, + baselen + sublen + 1, + missing_ok); + i += subcnt - 1; + sub->used = 1; + } + + discard_unused_subtrees(it); + + /* + * Then write out the tree object for this level. + */ + size = 8192; + buffer = xmalloc(size); + offset = 0; + + for (i = 0; i < entries; i++) { + struct cache_entry *ce = cache[i]; + struct cache_tree_sub *sub; + const char *path, *slash; + int pathlen, entlen; + const unsigned char *sha1; + unsigned mode; + + path = ce->name; + pathlen = ce_namelen(ce); + if (pathlen <= baselen || memcmp(base, path, baselen)) + break; /* at the end of this level */ + + slash = strchr(path + baselen, '/'); + if (slash) { + entlen = slash - (path + baselen); + sub = find_subtree(it, path + baselen, entlen, 0); + if (!sub) + die("cache-tree.c: '%.*s' in '%s' not found", + entlen, path + baselen, path); + i += sub->cache_tree->entry_count - 1; + sha1 = sub->cache_tree->sha1; + mode = S_IFDIR; + } + else { + sha1 = ce->sha1; + mode = ntohl(ce->ce_mode); + entlen = pathlen - baselen; + } + if (!missing_ok && !has_sha1_file(sha1)) + return error("invalid object %s", sha1_to_hex(sha1)); + + if (!ce->ce_mode) + continue; /* entry being removed */ + + if (size < offset + entlen + 100) { + size = alloc_nr(offset + entlen + 100); + buffer = xrealloc(buffer, size); + } + offset += sprintf(buffer + offset, + "%o %.*s", mode, entlen, path + baselen); + buffer[offset++] = 0; + memcpy(buffer + offset, sha1, 20); + offset += 20; + +#if DEBUG + fprintf(stderr, "cache-tree %o %.*s\n", + mode, entlen, path + baselen); +#endif + } + + write_sha1_file(buffer, offset, tree_type, it->sha1); + free(buffer); + it->entry_count = i; +#if DEBUG + fprintf(stderr, "cache-tree (%d ent, %d subtree) %s\n", + it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); +#endif + return i; +} + +int cache_tree_update(struct cache_tree *it, + struct cache_entry **cache, + int entries, + int missing_ok) +{ + int i; + i = verify_cache(cache, entries); + if (i) + return i; + i = update_one(it, cache, entries, "", 0, missing_ok); + if (i < 0) + return i; + return 0; +} + +static void *write_one(struct cache_tree *it, + char *path, + int pathlen, + char *buffer, + unsigned long *size, + unsigned long *offset) +{ + int i; + + /* One "cache-tree" entry consists of the following: + * path (NUL terminated) + * entry_count, subtree_nr ("%d %d\n") + * tree-sha1 (missing if invalid) + * subtree_nr "cache-tree" entries for subtrees. + */ + if (*size < *offset + pathlen + 100) { + *size = alloc_nr(*offset + pathlen + 100); + buffer = xrealloc(buffer, *size); + } + *offset += sprintf(buffer + *offset, "%.*s%c%d %d\n", + pathlen, path, 0, + it->entry_count, it->subtree_nr); + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%.*s> (%d ent, %d subtree) %s\n", + pathlen, path, it->entry_count, it->subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%.*s> (%d subtree) invalid\n", + pathlen, path, it->subtree_nr); +#endif + + if (0 <= it->entry_count) { + memcpy(buffer + *offset, it->sha1, 20); + *offset += 20; + } + for (i = 0; i < it->subtree_nr; i++) { + struct cache_tree_sub *down = it->down[i]; + buffer = write_one(down->cache_tree, down->name, down->namelen, + buffer, size, offset); + } + return buffer; +} + +static void *cache_tree_write(const unsigned char *cache_sha1, + struct cache_tree *root, + unsigned long *offset_p) +{ + char path[PATH_MAX]; + unsigned long size = 8192; + char *buffer = xmalloc(size); + + /* the cache checksum of the corresponding index file. */ + memcpy(buffer, cache_sha1, 20); + *offset_p = 20; + path[0] = 0; + return write_one(root, path, 0, buffer, &size, offset_p); +} + +static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) +{ + const char *buf = *buffer; + unsigned long size = *size_p; + struct cache_tree *it; + int i, subtree_nr; + + it = NULL; + /* skip name, but make sure name exists */ + while (size && *buf) { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + it = cache_tree(); + if (sscanf(buf, "%d %d\n", &it->entry_count, &subtree_nr) != 2) + goto free_return; + while (size && *buf && *buf != '\n') { + size--; + buf++; + } + if (!size) + goto free_return; + buf++; size--; + if (0 <= it->entry_count) { + if (size < 20) + goto free_return; + memcpy(it->sha1, buf, 20); + buf += 20; + size -= 20; + } + +#if DEBUG + if (0 <= it->entry_count) + fprintf(stderr, "cache-tree <%s> (%d ent, %d subtree) %s\n", + *buffer, it->entry_count, subtree_nr, + sha1_to_hex(it->sha1)); + else + fprintf(stderr, "cache-tree <%s> (%d subtrees) invalid\n", + *buffer, subtree_nr); +#endif + + /* + * Just a heuristic -- we do not add directories that often but + * we do not want to have to extend it immediately when we do, + * hence +2. + */ + it->subtree_alloc = subtree_nr + 2; + it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *)); + for (i = 0; i < subtree_nr; i++) { + /* read each subtree */ + struct cache_tree *sub; + const char *name = buf; + int namelen; + sub = read_one(&buf, &size); + if (!sub) + goto free_return; + namelen = strlen(name); + it->down[i] = find_subtree(it, name, namelen, 1); + it->down[i]->cache_tree = sub; + } + if (subtree_nr != it->subtree_nr) + die("cache-tree: internal error"); + *buffer = buf; + *size_p = size; + return it; + + free_return: + cache_tree_free(it); + return NULL; +} + +static struct cache_tree *cache_tree_read(unsigned char *sha1, + const char *buffer, + unsigned long size) +{ + /* check the cache-tree matches the index */ + if (memcmp(buffer, sha1, 20)) + return NULL; /* checksum mismatch */ + if (buffer[20]) + return NULL; /* not the whole tree */ + buffer += 20; + size -= 20; + return read_one(&buffer, &size); +} + +struct cache_tree *read_cache_tree(unsigned char *sha1) +{ + int fd; + struct stat st; + char path[PATH_MAX]; + unsigned long size = 0; + void *map; + struct cache_tree *it; + + sprintf(path, "%s.aux", get_index_file()); + fd = open(path, O_RDONLY); + if (fd < 0) + return cache_tree(); + + if (fstat(fd, &st)) + return cache_tree(); + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (map == MAP_FAILED) + return cache_tree(); + it = cache_tree_read(sha1, map, size); + munmap(map, size); + if (!it) + return cache_tree(); + return it; +} + +int write_cache_tree(const unsigned char *sha1, struct cache_tree *root) +{ + char path[PATH_MAX]; + unsigned long size = 0; + void *buf, *buffer; + int fd, ret = -1; + + sprintf(path, "%s.aux", get_index_file()); + if (!root) { + unlink(path); + return -1; + } + fd = open(path, O_WRONLY|O_CREAT, 0666); + if (fd < 0) + return -1; + buffer = buf = cache_tree_write(sha1, root, &size); + while (size) { + int written = xwrite(fd, buf, size); + if (written <= 0) + goto fail; + buf += written; + size -= written; + } + ret = 0; + + fail: + close(fd); + free(buffer); + if (ret) + unlink(path); + return ret; +} diff --git a/cache-tree.h b/cache-tree.h new file mode 100644 index 000000000..7b149afdc --- /dev/null +++ b/cache-tree.h @@ -0,0 +1,29 @@ +#ifndef CACHE_TREE_H +#define CACHE_TREE_H + +struct cache_tree; +struct cache_tree_sub { + struct cache_tree *cache_tree; + int namelen; + int used; + char name[FLEX_ARRAY]; +}; + +struct cache_tree { + int entry_count; /* negative means "invalid" */ + unsigned char sha1[20]; + int subtree_nr; + int subtree_alloc; + struct cache_tree_sub **down; +}; + +struct cache_tree *cache_tree(void); +void cache_tree_free(struct cache_tree *); +void cache_tree_invalidate_path(struct cache_tree *, const char *); + +int write_cache_tree(const unsigned char *, struct cache_tree *); +struct cache_tree *read_cache_tree(unsigned char *); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); + + +#endif -- cgit v1.2.1 From a52139b47e505e74e23a02f5324485e11dfe4ef9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:35 -0700 Subject: Update write-tree to use cache-tree. The updated write-tree reads from $GIT_DIR/index.aux to pick up subtree objects information, updates the cache-tree with the index, and updates index.aux file after writing a tree out of the index file. Until update-index and other programs that modify the index are updated to maintain index.aux file, the index.aux file written by the last write-tree will become stale immediately after they update the index, which will result in the whole tree recomputation just like the original write-tree. The idea is to convert those commands to invalidate cache-tree whenever they touch the index entries, and write updated index.aux out. After the index is updated with them, write-tree will be able to reuse the parts of the cache-tree that have not been touched. Signed-off-by: Junio C Hamano --- write-tree.c | 137 ++++++----------------------------------------------------- 1 file changed, 12 insertions(+), 125 deletions(-) diff --git a/write-tree.c b/write-tree.c index dcad6e667..cef0c5bb4 100644 --- a/write-tree.c +++ b/write-tree.c @@ -5,96 +5,23 @@ */ #include "cache.h" #include "tree.h" +#include "cache-tree.h" -static int missing_ok = 0; - -static int check_valid_sha1(unsigned char *sha1) -{ - int ret; - - /* If we were anal, we'd check that the sha1 of the contents actually matches */ - ret = has_sha1_file(sha1); - if (ret == 0) - perror(sha1_file_name(sha1)); - return ret ? 0 : -1; -} - -static int write_tree(struct cache_entry **cachep, int maxentries, const char *base, int baselen, unsigned char *returnsha1) -{ - unsigned char subdir_sha1[20]; - unsigned long size, offset; - char *buffer; - int nr; - - /* Guess at some random initial size */ - size = 8192; - buffer = xmalloc(size); - offset = 0; - - nr = 0; - while (nr < maxentries) { - struct cache_entry *ce = cachep[nr]; - const char *pathname = ce->name, *filename, *dirname; - int pathlen = ce_namelen(ce), entrylen; - unsigned char *sha1; - unsigned int mode; - - /* Did we hit the end of the directory? Return how many we wrote */ - if (baselen >= pathlen || memcmp(base, pathname, baselen)) - break; - - sha1 = ce->sha1; - mode = ntohl(ce->ce_mode); - - /* Do we have _further_ subdirectories? */ - filename = pathname + baselen; - dirname = strchr(filename, '/'); - if (dirname) { - int subdir_written; - - subdir_written = write_tree(cachep + nr, maxentries - nr, pathname, dirname-pathname+1, subdir_sha1); - nr += subdir_written; +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; - /* Now we need to write out the directory entry into this tree.. */ - mode = S_IFDIR; - pathlen = dirname - pathname; - - /* ..but the directory entry doesn't count towards the total count */ - nr--; - sha1 = subdir_sha1; - } - - if (!missing_ok && check_valid_sha1(sha1) < 0) - exit(1); - - entrylen = pathlen - baselen; - if (offset + entrylen + 100 > size) { - size = alloc_nr(offset + entrylen + 100); - buffer = xrealloc(buffer, size); - } - offset += sprintf(buffer + offset, "%o %.*s", mode, entrylen, filename); - buffer[offset++] = 0; - memcpy(buffer + offset, sha1, 20); - offset += 20; - nr++; - } - - write_sha1_file(buffer, offset, tree_type, returnsha1); - free(buffer); - return nr; -} +static int missing_ok = 0; static const char write_tree_usage[] = "git-write-tree [--missing-ok]"; int main(int argc, char **argv) { - int i, funny; int entries; - unsigned char sha1[20]; - + setup_git_directory(); - entries = read_cache(); + entries = read_cache_1(active_cache_sha1); + active_cache_tree = read_cache_tree(active_cache_sha1); if (argc == 2) { if (!strcmp(argv[1], "--missing-ok")) missing_ok = 1; @@ -108,51 +35,11 @@ int main(int argc, char **argv) if (entries < 0) die("git-write-tree: error reading cache"); - /* Verify that the tree is merged */ - funny = 0; - for (i = 0; i < entries; i++) { - struct cache_entry *ce = active_cache[i]; - if (ce_stage(ce)) { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "%s: unmerged (%s)\n", ce->name, sha1_to_hex(ce->sha1)); - } - } - if (funny) - die("git-write-tree: not able to write tree"); - - /* Also verify that the cache does not have path and path/file - * at the same time. At this point we know the cache has only - * stage 0 entries. - */ - funny = 0; - for (i = 0; i < entries - 1; i++) { - /* path/file always comes after path because of the way - * the cache is sorted. Also path can appear only once, - * which means conflicting one would immediately follow. - */ - const char *this_name = active_cache[i]->name; - const char *next_name = active_cache[i+1]->name; - int this_len = strlen(this_name); - if (this_len < strlen(next_name) && - strncmp(this_name, next_name, this_len) == 0 && - next_name[this_len] == '/') { - if (10 < ++funny) { - fprintf(stderr, "...\n"); - break; - } - fprintf(stderr, "You have both %s and %s\n", - this_name, next_name); - } - } - if (funny) - die("git-write-tree: not able to write tree"); + if (cache_tree_update(active_cache_tree, active_cache, active_nr, + missing_ok)) + die("git-write-tree: error building trees"); + write_cache_tree(active_cache_sha1, active_cache_tree); - /* Ok, write it out */ - if (write_tree(active_cache, entries, "", 0, sha1) != entries) - die("git-write-tree: internal error"); - printf("%s\n", sha1_to_hex(sha1)); + printf("%s\n", sha1_to_hex(active_cache_tree->sha1)); return 0; } -- cgit v1.2.1 From 03ac6e64651e4b5ca0c2164a23b5f345f2c03af4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 16:52:52 -0700 Subject: Invalidate cache-tree entries for touched paths in git-apply. This updates git-apply to maintain cache-tree information. With this and the previous write-tree patch, repeated "apply --index" followed by "write-tree" on a huge tree will hopefully become faster. Signed-off-by: Junio C Hamano --- apply.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/apply.c b/apply.c index 269210a57..e283df38a 100644 --- a/apply.c +++ b/apply.c @@ -8,9 +8,14 @@ */ #include #include "cache.h" +#include "cache-tree.h" #include "quote.h" #include "blob.h" +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; + + // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch // --stat does just a diffstat, and doesn't actually apply @@ -1717,6 +1722,7 @@ static void remove_file(struct patch *patch) if (write_index) { if (remove_file_from_cache(patch->old_name) < 0) die("unable to remove %s from index", patch->old_name); + cache_tree_invalidate_path(active_cache_tree, patch->old_name); } unlink(patch->old_name); } @@ -1813,8 +1819,9 @@ static void create_file(struct patch *patch) if (!mode) mode = S_IFREG | 0644; - create_one_file(path, mode, buf, size); + create_one_file(path, mode, buf, size); add_index_file(path, mode, buf, size); + cache_tree_invalidate_path(active_cache_tree, path); } static void write_out_one_result(struct patch *patch) @@ -1912,8 +1919,9 @@ static int apply_patch(int fd, const char *filename) if (write_index) newfd = hold_index_file_for_update(&cache_file, get_index_file()); if (check_index) { - if (read_cache() < 0) + if (read_cache_1(active_cache_sha1) < 0) die("unable to read index file"); + active_cache_tree = read_cache_tree(active_cache_sha1); } if ((check || apply) && check_patch_list(list) < 0) @@ -1923,9 +1931,11 @@ static int apply_patch(int fd, const char *filename) write_out_results(list, skipped_patch); if (write_index) { - if (write_cache(newfd, active_cache, active_nr) || + if (write_cache_1(newfd, active_cache, active_nr, + active_cache_sha1) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); + write_cache_tree(active_cache_sha1, active_cache_tree); } if (show_index_info) -- cgit v1.2.1 From a6e5642f39db2113785c4f22add7e16d559b8d55 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 24 Apr 2006 00:23:54 -0700 Subject: Use cache-tree in update-index. Signed-off-by: Junio C Hamano --- update-index.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/update-index.c b/update-index.c index 1efac27c6..86f53948f 100644 --- a/update-index.c +++ b/update-index.c @@ -6,6 +6,11 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" +#include "tree.h" +#include "cache-tree.h" + +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; /* * Default to not allowing changes to the list of files. The @@ -70,6 +75,7 @@ static int mark_valid(const char *path) active_cache[pos]->ce_flags &= ~htons(CE_VALID); break; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; return 0; } @@ -83,6 +89,12 @@ static int add_file_to_cache(const char *path) struct stat st; status = lstat(path, &st); + + /* We probably want to do this in remove_file_from_cache() and + * add_cache_entry() instead... + */ + cache_tree_invalidate_path(active_cache_tree, path); + if (status < 0 || S_ISDIR(st.st_mode)) { /* When we used to have "path" and now we want to add * "path/file", we need a way to remove "path" before @@ -325,6 +337,7 @@ static int add_cacheinfo(unsigned int mode, const unsigned char *sha1, return error("%s: cannot add to the index - missing --add option?", path); report("add '%s'", path); + cache_tree_invalidate_path(active_cache_tree, path); return 0; } @@ -349,6 +362,7 @@ static int chmod_path(int flip, const char *path) default: return -1; } + cache_tree_invalidate_path(active_cache_tree, path); active_cache_changed = 1; return 0; } @@ -367,6 +381,7 @@ static void update_one(const char *path, const char *prefix, int prefix_length) die("Unable to mark file %s", path); return; } + cache_tree_invalidate_path(active_cache_tree, path); if (force_remove) { if (remove_file_from_cache(p)) @@ -442,6 +457,7 @@ static void read_index_info(int line_termination) free(path_name); continue; } + cache_tree_invalidate_path(active_cache_tree, path_name); if (!mode) { /* mode == 0 means there is no such path -- remove */ @@ -485,9 +501,10 @@ int main(int argc, const char **argv) if (newfd < 0) die("unable to create new cachefile"); - entries = read_cache(); + entries = read_cache_1(active_cache_sha1); if (entries < 0) die("cache corrupted"); + active_cache_tree = read_cache_tree(active_cache_sha1); for (i = 1 ; i < argc; i++) { const char *path = argv[i]; @@ -613,9 +630,11 @@ int main(int argc, const char **argv) } } if (active_cache_changed) { - if (write_cache(newfd, active_cache, active_nr) || + if (write_cache_1(newfd, active_cache, active_nr, + active_cache_sha1) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); + write_cache_tree(active_cache_sha1, active_cache_tree); } return has_errors ? 1 : 0; -- cgit v1.2.1 From 17448209f5441718c69642871c85a80f00d12b43 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 23 Apr 2006 20:20:25 -0700 Subject: Add test-dump-cache-tree This was useful in diagnosing the corrupt index.aux format problem. But do not bother building or installing it by default. Signed-off-by: Junio C Hamano --- .gitignore | 1 + Makefile | 3 +++ dump-cache-tree.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 dump-cache-tree.c diff --git a/.gitignore b/.gitignore index b5959d631..7906909b3 100644 --- a/.gitignore +++ b/.gitignore @@ -123,6 +123,7 @@ git-write-tree git-core-*/?* test-date test-delta +test-dump-cache-tree common-cmds.h *.tar.gz *.dsc diff --git a/Makefile b/Makefile index 518c3c176..1aa96f4f2 100644 --- a/Makefile +++ b/Makefile @@ -611,6 +611,9 @@ test-date$X: test-date.c date.o ctype.o test-delta$X: test-delta.c diff-delta.o patch-delta.o $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz +test-dump-cache-tree$X: dump-cache-tree.o $(GITLIBS) + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + check: for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done diff --git a/dump-cache-tree.c b/dump-cache-tree.c new file mode 100644 index 000000000..80f8683f3 --- /dev/null +++ b/dump-cache-tree.c @@ -0,0 +1,32 @@ +#include "cache.h" +#include "tree.h" +#include "cache-tree.h" + +static unsigned char active_cache_sha1[20]; +static struct cache_tree *active_cache_tree; + +static void dump_cache_tree(struct cache_tree *it, const char *pfx) +{ + int i; + if (it->entry_count < 0) + printf("%-40s %s\n", "invalid", pfx); + else + printf("%s %s (%d entries)\n", + sha1_to_hex(it->sha1), + pfx, it->entry_count); + for (i = 0; i < it->subtree_nr; i++) { + char path[PATH_MAX]; + struct cache_tree_sub *down = it->down[i]; + sprintf(path, "%s%.*s/", pfx, down->namelen, down->name); + dump_cache_tree(down->cache_tree, path); + } +} + +int main(int ac, char **av) +{ + if (read_cache_1(active_cache_sha1) < 0) + die("unable to read index file"); + active_cache_tree = read_cache_tree(active_cache_sha1); + dump_cache_tree(active_cache_tree, ""); + return 0; +} -- cgit v1.2.1 From dd0c34c46bdda0c20fd92d00516e711a4c9f7837 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 24 Apr 2006 15:12:42 -0700 Subject: cache-tree: protect against "git prune". We reused the cache-tree data without verifying the tree object still exists. Recompute in cache_tree_update() an otherwise valid cache-tree entry when the tree object disappeared. This is not usually a problem, but theoretically without this fix things can break when the user does something like this: - read-index from a side branch - write-tree the result - remove the side branch with "git branch -D" - remove the unreachable objects with "git prune" - write-tree what is in the index. Signed-off-by: Junio C Hamano --- cache-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index f6d1dd1d7..b34b0bc31 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -177,7 +177,7 @@ static int update_one(struct cache_tree *it, char *buffer; int i; - if (0 <= it->entry_count) + if (0 <= it->entry_count && has_sha1_file(it->sha1)) return it->entry_count; /* -- cgit v1.2.1 From bad68ec92410cf47dd001aa9b95d0f24c5f4bf77 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 24 Apr 2006 21:18:58 -0700 Subject: index: make the index file format extensible. ... and move the cache-tree data into it. Signed-off-by: Junio C Hamano --- apply.c | 11 +----- cache-tree.c | 108 +++++++++++++----------------------------------------- cache-tree.h | 9 +++-- cache.h | 6 +-- checkout-index.c | 1 + dump-cache-tree.c | 8 ++-- read-cache.c | 105 +++++++++++++++++++++++++++++++++++++++------------- read-tree.c | 2 + update-index.c | 11 +----- write-tree.c | 36 ++++++++++++------ 10 files changed, 147 insertions(+), 150 deletions(-) diff --git a/apply.c b/apply.c index e283df38a..acecf8de5 100644 --- a/apply.c +++ b/apply.c @@ -12,10 +12,6 @@ #include "quote.h" #include "blob.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - - // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch // --stat does just a diffstat, and doesn't actually apply @@ -1919,9 +1915,8 @@ static int apply_patch(int fd, const char *filename) if (write_index) newfd = hold_index_file_for_update(&cache_file, get_index_file()); if (check_index) { - if (read_cache_1(active_cache_sha1) < 0) + if (read_cache() < 0) die("unable to read index file"); - active_cache_tree = read_cache_tree(active_cache_sha1); } if ((check || apply) && check_patch_list(list) < 0) @@ -1931,11 +1926,9 @@ static int apply_patch(int fd, const char *filename) write_out_results(list, skipped_patch); if (write_index) { - if (write_cache_1(newfd, active_cache, active_nr, - active_cache_sha1) || + if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); - write_cache_tree(active_cache_sha1, active_cache_tree); } if (show_index_info) diff --git a/cache-tree.c b/cache-tree.c index b34b0bc31..2146723e9 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -11,16 +11,18 @@ struct cache_tree *cache_tree(void) return it; } -void cache_tree_free(struct cache_tree *it) +void cache_tree_free(struct cache_tree **it_p) { int i; + struct cache_tree *it = *it_p; if (!it) return; for (i = 0; i < it->subtree_nr; i++) - cache_tree_free(it->down[i]->cache_tree); + cache_tree_free(&it->down[i]->cache_tree); free(it->down); free(it); + *it_p = NULL; } static struct cache_tree_sub *find_subtree(struct cache_tree *it, @@ -78,7 +80,7 @@ void cache_tree_invalidate_path(struct cache_tree *it, const char *path) break; } if (i < it->subtree_nr) { - cache_tree_free(it->down[i]->cache_tree); + cache_tree_free(&it->down[i]->cache_tree); free(it->down[i]); /* 0 1 2 3 4 5 * ^ ^subtree_nr = 6 @@ -159,13 +161,27 @@ static void discard_unused_subtrees(struct cache_tree *it) if (s->used) down[dst++] = s; else { - cache_tree_free(s->cache_tree); + cache_tree_free(&s->cache_tree); free(s); it->subtree_nr--; } } } +int cache_tree_fully_valid(struct cache_tree *it) +{ + int i; + if (!it) + return 0; + if (it->entry_count < 0 || !has_sha1_file(it->sha1)) + return 0; + for (i = 0; i < it->subtree_nr; i++) { + if (!cache_tree_fully_valid(it->down[i]->cache_tree)) + return 0; + } + return 1; +} + static int update_one(struct cache_tree *it, struct cache_entry **cache, int entries, @@ -354,19 +370,15 @@ static void *write_one(struct cache_tree *it, return buffer; } -static void *cache_tree_write(const unsigned char *cache_sha1, - struct cache_tree *root, - unsigned long *offset_p) +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p) { char path[PATH_MAX]; unsigned long size = 8192; char *buffer = xmalloc(size); - /* the cache checksum of the corresponding index file. */ - memcpy(buffer, cache_sha1, 20); - *offset_p = 20; + *size_p = 0; path[0] = 0; - return write_one(root, path, 0, buffer, &size, offset_p); + return write_one(root, path, 0, buffer, &size, size_p); } static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) @@ -439,81 +451,13 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) return it; free_return: - cache_tree_free(it); + cache_tree_free(&it); return NULL; } -static struct cache_tree *cache_tree_read(unsigned char *sha1, - const char *buffer, - unsigned long size) +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size) { - /* check the cache-tree matches the index */ - if (memcmp(buffer, sha1, 20)) - return NULL; /* checksum mismatch */ - if (buffer[20]) + if (buffer[0]) return NULL; /* not the whole tree */ - buffer += 20; - size -= 20; return read_one(&buffer, &size); } - -struct cache_tree *read_cache_tree(unsigned char *sha1) -{ - int fd; - struct stat st; - char path[PATH_MAX]; - unsigned long size = 0; - void *map; - struct cache_tree *it; - - sprintf(path, "%s.aux", get_index_file()); - fd = open(path, O_RDONLY); - if (fd < 0) - return cache_tree(); - - if (fstat(fd, &st)) - return cache_tree(); - size = st.st_size; - map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (map == MAP_FAILED) - return cache_tree(); - it = cache_tree_read(sha1, map, size); - munmap(map, size); - if (!it) - return cache_tree(); - return it; -} - -int write_cache_tree(const unsigned char *sha1, struct cache_tree *root) -{ - char path[PATH_MAX]; - unsigned long size = 0; - void *buf, *buffer; - int fd, ret = -1; - - sprintf(path, "%s.aux", get_index_file()); - if (!root) { - unlink(path); - return -1; - } - fd = open(path, O_WRONLY|O_CREAT, 0666); - if (fd < 0) - return -1; - buffer = buf = cache_tree_write(sha1, root, &size); - while (size) { - int written = xwrite(fd, buf, size); - if (written <= 0) - goto fail; - buf += written; - size -= written; - } - ret = 0; - - fail: - close(fd); - free(buffer); - if (ret) - unlink(path); - return ret; -} diff --git a/cache-tree.h b/cache-tree.h index 7b149afdc..c70a7699a 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -18,12 +18,13 @@ struct cache_tree { }; struct cache_tree *cache_tree(void); -void cache_tree_free(struct cache_tree *); +void cache_tree_free(struct cache_tree **); void cache_tree_invalidate_path(struct cache_tree *, const char *); -int write_cache_tree(const unsigned char *, struct cache_tree *); -struct cache_tree *read_cache_tree(unsigned char *); -int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); +void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); +struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); +int cache_tree_fully_valid(struct cache_tree *); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); #endif diff --git a/cache.h b/cache.h index 8c9947ef4..a080727b0 100644 --- a/cache.h +++ b/cache.h @@ -114,6 +114,7 @@ static inline unsigned int create_ce_mode(unsigned int mode) extern struct cache_entry **active_cache; extern unsigned int active_nr, active_alloc, active_cache_changed; +extern struct cache_tree *active_cache_tree; #define GIT_DIR_ENVIRONMENT "GIT_DIR" #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" @@ -138,11 +139,8 @@ extern const char *prefix_filename(const char *prefix, int len, const char *path #define alloc_nr(x) (((x)+16)*3/2) /* Initialize and use the cache information */ -extern int read_cache_1(unsigned char *); -extern int write_cache_1(int, struct cache_entry **, int, unsigned char *); extern int read_cache(void); -extern int write_cache(int, struct cache_entry **, int); - +extern int write_cache(int newfd, struct cache_entry **cache, int entries); extern int cache_name_pos(const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/checkout-index.c b/checkout-index.c index dd6a2d86f..e56c354f8 100644 --- a/checkout-index.c +++ b/checkout-index.c @@ -39,6 +39,7 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" +#include "cache-tree.h" #define CHECKOUT_ALL 4 static const char *prefix; diff --git a/dump-cache-tree.c b/dump-cache-tree.c index 80f8683f3..01e8bff0e 100644 --- a/dump-cache-tree.c +++ b/dump-cache-tree.c @@ -2,12 +2,11 @@ #include "tree.h" #include "cache-tree.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - static void dump_cache_tree(struct cache_tree *it, const char *pfx) { int i; + if (!it) + return; if (it->entry_count < 0) printf("%-40s %s\n", "invalid", pfx); else @@ -24,9 +23,8 @@ static void dump_cache_tree(struct cache_tree *it, const char *pfx) int main(int ac, char **av) { - if (read_cache_1(active_cache_sha1) < 0) + if (read_cache() < 0) die("unable to read index file"); - active_cache_tree = read_cache_tree(active_cache_sha1); dump_cache_tree(active_cache_tree, ""); return 0; } diff --git a/read-cache.c b/read-cache.c index 50e094e05..1f71d1257 100644 --- a/read-cache.c +++ b/read-cache.c @@ -4,11 +4,26 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "cache.h" +#include "cache-tree.h" + +/* Index extensions. + * + * The first letter should be 'A'..'Z' for extensions that are not + * necessary for a correct operation (i.e. optimization data). + * When new extensions are added that _needs_ to be understood in + * order to correctly interpret the index file, pick character that + * is outside the range, to cause the reader to abort. + */ + +#define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) ) +#define CACHE_EXT_TREE 0x54524545 /* "TREE" */ struct cache_entry **active_cache = NULL; static time_t index_file_timestamp; unsigned int active_nr = 0, active_alloc = 0, active_cache_changed = 0; +struct cache_tree *active_cache_tree = NULL; + /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -496,12 +511,10 @@ int add_cache_entry(struct cache_entry *ce, int option) return 0; } -static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned char *sha1) +static int verify_hdr(struct cache_header *hdr, unsigned long size) { SHA_CTX c; - unsigned char sha1_buf[20]; - if (!sha1) - sha1 = sha1_buf; + unsigned char sha1[20]; if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) return error("bad signature"); @@ -515,7 +528,23 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size, unsigned cha return 0; } -int read_cache_1(unsigned char *cache_sha1) +static int read_index_extension(const char *ext, void *data, unsigned long sz) +{ + switch (CACHE_EXT(ext)) { + case CACHE_EXT_TREE: + active_cache_tree = cache_tree_read(data, sz); + break; + default: + if (*ext < 'A' || 'Z' < *ext) + return error("index uses %.4s extension, which we do not understand", + ext); + fprintf(stderr, "ignoring %.4s extension\n", ext); + break; + } + return 0; +} + +int read_cache(void) { int fd, i; struct stat st; @@ -549,7 +578,7 @@ int read_cache_1(unsigned char *cache_sha1) die("index file mmap failed (%s)", strerror(errno)); hdr = map; - if (verify_hdr(hdr, size, cache_sha1) < 0) + if (verify_hdr(hdr, size) < 0) goto unmap; active_nr = ntohl(hdr->hdr_entries); @@ -563,6 +592,22 @@ int read_cache_1(unsigned char *cache_sha1) active_cache[i] = ce; } index_file_timestamp = st.st_mtime; + while (offset <= size - 20 - 8) { + /* After an array of active_nr index entries, + * there can be arbitrary number of extended + * sections, each of which is prefixed with + * extension name (4-byte) and section length + * in 4-byte network byte order. + */ + unsigned long extsize; + memcpy(&extsize, map + offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(map + offset, + map + offset + 8, extsize) < 0) + goto unmap; + offset += 8; + offset += extsize; + } return active_nr; unmap: @@ -597,7 +642,18 @@ static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len) return 0; } -static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1) +static int write_index_ext_header(SHA_CTX *context, int fd, + unsigned long ext, unsigned long sz) +{ + ext = htonl(ext); + sz = htonl(sz); + if ((ce_write(context, fd, &ext, 4) < 0) || + (ce_write(context, fd, &sz, 4) < 0)) + return -1; + return 0; +} + +static int ce_flush(SHA_CTX *context, int fd) { unsigned int left = write_buffer_len; @@ -614,8 +670,7 @@ static int ce_flush(SHA_CTX *context, int fd, unsigned char *sha1) } /* Append the SHA1 signature at the end */ - SHA1_Final(sha1, context); - memcpy(write_buffer + left, sha1, 20); + SHA1_Final(write_buffer + left, context); left += 20; if (write(fd, write_buffer, left) != left) return -1; @@ -666,14 +721,11 @@ static void ce_smudge_racily_clean_entry(struct cache_entry *ce) } } -int write_cache_1(int newfd, struct cache_entry **cache, int entries, - unsigned char *cache_sha1) +int write_cache(int newfd, struct cache_entry **cache, int entries) { SHA_CTX c; struct cache_header hdr; int i, removed; - int status; - unsigned char sha1[20]; for (i = removed = 0; i < entries; i++) if (!cache[i]->ce_mode) @@ -697,18 +749,19 @@ int write_cache_1(int newfd, struct cache_entry **cache, int entries, if (ce_write(&c, newfd, ce, ce_size(ce)) < 0) return -1; } - status = ce_flush(&c, newfd, sha1); - if (cache_sha1) - memcpy(cache_sha1, sha1, 20); - return status; -} -int read_cache(void) -{ - return read_cache_1(NULL); -} - -int write_cache(int newfd, struct cache_entry **cache, int entries) -{ - return write_cache_1(newfd, cache, entries, NULL); + /* Write extension data here */ + if (active_cache_tree) { + unsigned long sz; + void *data = cache_tree_write(active_cache_tree, &sz); + if (data && + !write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sz) && + !ce_write(&c, newfd, data, sz)) + ; + else { + free(data); + return -1; + } + } + return ce_flush(&c, newfd); } diff --git a/read-tree.c b/read-tree.c index 26f4f7e32..1c6510129 100644 --- a/read-tree.c +++ b/read-tree.c @@ -9,6 +9,7 @@ #include "object.h" #include "tree.h" +#include "cache-tree.h" #include #include @@ -828,6 +829,7 @@ int main(int argc, char **argv) } unpack_trees(fn); + cache_tree_free(&active_cache_tree); if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); diff --git a/update-index.c b/update-index.c index 86f53948f..d6d3295e3 100644 --- a/update-index.c +++ b/update-index.c @@ -6,12 +6,8 @@ #include "cache.h" #include "strbuf.h" #include "quote.h" -#include "tree.h" #include "cache-tree.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - /* * Default to not allowing changes to the list of files. The * tool doesn't actually care, but this makes it harder to add @@ -501,10 +497,9 @@ int main(int argc, const char **argv) if (newfd < 0) die("unable to create new cachefile"); - entries = read_cache_1(active_cache_sha1); + entries = read_cache(); if (entries < 0) die("cache corrupted"); - active_cache_tree = read_cache_tree(active_cache_sha1); for (i = 1 ; i < argc; i++) { const char *path = argv[i]; @@ -630,11 +625,9 @@ int main(int argc, const char **argv) } } if (active_cache_changed) { - if (write_cache_1(newfd, active_cache, active_nr, - active_cache_sha1) || + if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("Unable to write new cachefile"); - write_cache_tree(active_cache_sha1, active_cache_tree); } return has_errors ? 1 : 0; diff --git a/write-tree.c b/write-tree.c index cef0c5bb4..a5069921a 100644 --- a/write-tree.c +++ b/write-tree.c @@ -7,21 +7,20 @@ #include "tree.h" #include "cache-tree.h" -static unsigned char active_cache_sha1[20]; -static struct cache_tree *active_cache_tree; - static int missing_ok = 0; static const char write_tree_usage[] = "git-write-tree [--missing-ok]"; +static struct cache_file cache_file; + int main(int argc, char **argv) { - int entries; + int entries, was_valid, newfd; setup_git_directory(); - entries = read_cache_1(active_cache_sha1); - active_cache_tree = read_cache_tree(active_cache_sha1); + newfd = hold_index_file_for_update(&cache_file, get_index_file()); + entries = read_cache(); if (argc == 2) { if (!strcmp(argv[1], "--missing-ok")) missing_ok = 1; @@ -35,11 +34,26 @@ int main(int argc, char **argv) if (entries < 0) die("git-write-tree: error reading cache"); - if (cache_tree_update(active_cache_tree, active_cache, active_nr, - missing_ok)) - die("git-write-tree: error building trees"); - write_cache_tree(active_cache_sha1, active_cache_tree); - + if (!active_cache_tree) + active_cache_tree = cache_tree(); + + was_valid = cache_tree_fully_valid(active_cache_tree); + if (!was_valid) { + if (cache_tree_update(active_cache_tree, + active_cache, active_nr, + missing_ok) < 0) + die("git-write-tree: error building trees"); + if (0 <= newfd) { + if (!write_cache(newfd, active_cache, active_nr)) + commit_index_file(&cache_file); + } + /* Not being able to write is fine -- we are only interested + * in updating the cache-tree part, and if the next caller + * ends up using the old index with unupdated cache-tree part + * it misses the work we did here, but that is just a + * performance penalty and not a big deal. + */ + } printf("%s\n", sha1_to_hex(active_cache_tree->sha1)); return 0; } -- cgit v1.2.1 From 53dc3f3e8069283924fcb7f1d538e2d1b03ec3bb Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 25 Apr 2006 16:37:08 -0700 Subject: Teach fsck-objects about cache-tree. Signed-off-by: Junio C Hamano --- fsck-objects.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fsck-objects.c b/fsck-objects.c index 59b25904c..cc09143a9 100644 --- a/fsck-objects.c +++ b/fsck-objects.c @@ -8,6 +8,7 @@ #include "tag.h" #include "refs.h" #include "pack.h" +#include "cache-tree.h" #define REACHABLE 0x0001 @@ -438,6 +439,21 @@ static int fsck_head_link(void) return 0; } +static int fsck_cache_tree(struct cache_tree *it) +{ + int i; + int err = 0; + + if (0 <= it->entry_count) { + struct object *obj = parse_object(it->sha1); + if (obj->type != tree_type) + err |= objerror(obj, "non-tree in cache-tree"); + } + for (i = 0; i < it->subtree_nr; i++) + err |= fsck_cache_tree(it->down[i]->cache_tree); + return err; +} + int main(int argc, char **argv) { int i, heads; @@ -547,6 +563,8 @@ int main(int argc, char **argv) obj->used = 1; mark_reachable(obj, REACHABLE); } + if (active_cache_tree) + fsck_cache_tree(active_cache_tree); } check_connectivity(); -- cgit v1.2.1 From 61fa30972c45e73d39c81d27af9eeacbda7531c9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 25 Apr 2006 17:40:02 -0700 Subject: cache-tree: sort the subtree entries. Not that this makes practical performance difference; the kernel tree for example has 200 or so directories that have subdirectory, and the largest ones have 57 of them (fs and drivers). With a test to apply 600 patches with git-apply and git-write-tree, this did not make more than one per-cent of a difference, but it is a good cleanup. Signed-off-by: Junio C Hamano --- cache-tree.c | 92 +++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 26 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 2146723e9..d8438d67d 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -19,38 +19,75 @@ void cache_tree_free(struct cache_tree **it_p) if (!it) return; for (i = 0; i < it->subtree_nr; i++) - cache_tree_free(&it->down[i]->cache_tree); + if (it->down[i]) + cache_tree_free(&it->down[i]->cache_tree); free(it->down); free(it); *it_p = NULL; } +static int subtree_name_cmp(const char *one, int onelen, + const char *two, int twolen) +{ + if (onelen < twolen) + return -1; + if (twolen < onelen) + return 1; + return memcmp(one, two, onelen); +} + +static int subtree_pos(struct cache_tree *it, const char *path, int pathlen) +{ + struct cache_tree_sub **down = it->down; + int lo, hi; + lo = 0; + hi = it->subtree_nr; + while (lo < hi) { + int mi = (lo + hi) / 2; + struct cache_tree_sub *mdl = down[mi]; + int cmp = subtree_name_cmp(path, pathlen, + mdl->name, mdl->namelen); + if (!cmp) + return mi; + if (cmp < 0) + hi = mi; + else + lo = mi + 1; + } + return -lo-1; +} + static struct cache_tree_sub *find_subtree(struct cache_tree *it, const char *path, int pathlen, int create) { - int i; struct cache_tree_sub *down; - for (i = 0; i < it->subtree_nr; i++) { - down = it->down[i]; - if (down->namelen == pathlen && - !memcmp(down->name, path, pathlen)) - return down; - } + int pos = subtree_pos(it, path, pathlen); + if (0 <= pos) + return it->down[pos]; if (!create) return NULL; + + pos = -pos-1; if (it->subtree_alloc <= it->subtree_nr) { it->subtree_alloc = alloc_nr(it->subtree_alloc); it->down = xrealloc(it->down, it->subtree_alloc * sizeof(*it->down)); } + it->subtree_nr++; + down = xmalloc(sizeof(*down) + pathlen + 1); - down->cache_tree = NULL; /* cache_tree(); */ + down->cache_tree = NULL; down->namelen = pathlen; memcpy(down->name, path, pathlen); - down->name[pathlen] = 0; /* not strictly needed */ - it->down[it->subtree_nr++] = down; + down->name[pathlen] = 0; + + if (pos < it->subtree_nr) + memmove(it->down + pos + 1, + it->down + pos, + sizeof(down) * (it->subtree_nr - pos - 1)); + it->down[pos] = down; return down; } @@ -72,25 +109,21 @@ void cache_tree_invalidate_path(struct cache_tree *it, const char *path) slash = strchr(path, '/'); it->entry_count = -1; if (!slash) { - int i; + int pos; namelen = strlen(path); - for (i = 0; i < it->subtree_nr; i++) { - if (it->down[i]->namelen == namelen && - !memcmp(it->down[i]->name, path, namelen)) - break; - } - if (i < it->subtree_nr) { - cache_tree_free(&it->down[i]->cache_tree); - free(it->down[i]); + pos = subtree_pos(it, path, namelen); + if (0 <= pos) { + cache_tree_free(&it->down[pos]->cache_tree); + free(it->down[pos]); /* 0 1 2 3 4 5 * ^ ^subtree_nr = 6 - * i + * pos * move 4 and 5 up one place (2 entries) - * 2 = 6 - 3 - 1 = subtree_nr - i - 1 + * 2 = 6 - 3 - 1 = subtree_nr - pos - 1 */ - memmove(it->down+i, it->down+i+1, + memmove(it->down+pos, it->down+pos+1, sizeof(struct cache_tree_sub *) * - (it->subtree_nr - i - 1)); + (it->subtree_nr - pos - 1)); it->subtree_nr--; } return; @@ -364,6 +397,12 @@ static void *write_one(struct cache_tree *it, } for (i = 0; i < it->subtree_nr; i++) { struct cache_tree_sub *down = it->down[i]; + if (i) { + struct cache_tree_sub *prev = it->down[i-1]; + if (subtree_name_cmp(down->name, down->namelen, + prev->name, prev->namelen) <= 0) + die("fatal - unsorted cache subtree"); + } buffer = write_one(down->cache_tree, down->name, down->namelen, buffer, size, offset); } @@ -435,14 +474,15 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) for (i = 0; i < subtree_nr; i++) { /* read each subtree */ struct cache_tree *sub; + struct cache_tree_sub *subtree; const char *name = buf; int namelen; sub = read_one(&buf, &size); if (!sub) goto free_return; namelen = strlen(name); - it->down[i] = find_subtree(it, name, namelen, 1); - it->down[i]->cache_tree = sub; + subtree = find_subtree(it, name, namelen, 1); + subtree->cache_tree = sub; } if (subtree_nr != it->subtree_nr) die("cache-tree: internal error"); -- cgit v1.2.1 From 0f8820528e4c2e4b3e1306cfe79998e7124c8d49 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 25 Apr 2006 17:40:24 -0700 Subject: test-dump-cache-tree: report number of subtrees. Signed-off-by: Junio C Hamano --- dump-cache-tree.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dump-cache-tree.c b/dump-cache-tree.c index 01e8bff0e..f6a19bac3 100644 --- a/dump-cache-tree.c +++ b/dump-cache-tree.c @@ -8,11 +8,12 @@ static void dump_cache_tree(struct cache_tree *it, const char *pfx) if (!it) return; if (it->entry_count < 0) - printf("%-40s %s\n", "invalid", pfx); + printf("%-40s %s (%d subtrees)\n", "invalid", pfx, + it->subtree_nr); else - printf("%s %s (%d entries)\n", + printf("%s %s (%d entries, %d subtrees)\n", sha1_to_hex(it->sha1), - pfx, it->entry_count); + pfx, it->entry_count, it->subtree_nr); for (i = 0; i < it->subtree_nr; i++) { char path[PATH_MAX]; struct cache_tree_sub *down = it->down[i]; -- cgit v1.2.1 From 497c32136f80aca5f724bf70c2a0f44b63cb79f1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 26 Apr 2006 22:05:05 -0700 Subject: update-index: when --unresolve, smudge the relevant cache-tree entries. Signed-off-by: Junio C Hamano --- update-index.c | 1 + 1 file changed, 1 insertion(+) diff --git a/update-index.c b/update-index.c index 258a88cbe..1c1f13bd7 100644 --- a/update-index.c +++ b/update-index.c @@ -562,6 +562,7 @@ static int unresolve_one(const char *path) goto free_return; } + cache_tree_invalidate_path(active_cache_tree, path); remove_file_from_cache(path); if (add_cache_entry(ce_2, ADD_CACHE_OK_TO_ADD)) { error("%s: cannot add our version to the index.", path); -- cgit v1.2.1 From b34c39cf31e370dad3bcfba29ee8cd023c40fd6b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 00:13:34 -0700 Subject: read-tree: teach 1 and 2 way merges about cache-tree. This teaches one-way and two-way "read-tree -m" (and its special form, "read-tree --reset" as well) not to discard cache-tree but invalidate only the changed parts of the tree. When switching between related branches, this helps the eventual commit (i.e. write-tree) by keeping cache-tree valid as much as possible. This does not prime cache-tree yet, but we ought to be able to do that for no-merge (i.e. reading from a tree object) case and, and also perhaps 1 way merge case. With this patch applied, switching between the tip of Linux 2.6 kernel tree and a branch that touches one path (fs/ext3/Makefile) from it invalidates only 3 paths out of 1201 cache-tree entries in the index, and subsequent write-tree takes about a half as much time as before. Signed-off-by: Junio C Hamano --- read-tree.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/read-tree.c b/read-tree.c index 1c6510129..ab516824e 100644 --- a/read-tree.c +++ b/read-tree.c @@ -422,6 +422,12 @@ static void verify_uptodate(struct cache_entry *ce) die("Entry '%s' not uptodate. Cannot merge.", ce->name); } +static void invalidate_ce_path(struct cache_entry *ce) +{ + if (ce) + cache_tree_invalidate_path(active_cache_tree, ce->name); +} + static int merged_entry(struct cache_entry *merge, struct cache_entry *old) { merge->ce_flags |= htons(CE_UPDATE); @@ -437,6 +443,7 @@ static int merged_entry(struct cache_entry *merge, struct cache_entry *old) *merge = *old; } else { verify_uptodate(old); + invalidate_ce_path(old); } } merge->ce_flags &= ~htons(CE_STAGEMASK); @@ -450,6 +457,7 @@ static int deleted_entry(struct cache_entry *ce, struct cache_entry *old) verify_uptodate(old); ce->ce_mode = 0; add_cache_entry(ce, ADD_CACHE_OK_TO_ADD); + invalidate_ce_path(ce); return 1; } @@ -684,8 +692,10 @@ static int oneway_merge(struct cache_entry **src) return error("Cannot do a oneway merge of %d trees", merge_size); - if (!a) + if (!a) { + invalidate_ce_path(old); return 0; + } if (old && same(old, a)) { return keep_entry(old); } @@ -704,6 +714,7 @@ static int read_cache_unmerged(void) struct cache_entry *ce = active_cache[i]; if (ce_stage(ce)) { deleted++; + invalidate_ce_path(ce); continue; } if (deleted) @@ -815,10 +826,9 @@ int main(int argc, char **argv) fn = twoway_merge; break; case 3: - fn = threeway_merge; - break; default: fn = threeway_merge; + cache_tree_free(&active_cache_tree); break; } @@ -829,7 +839,6 @@ int main(int argc, char **argv) } unpack_trees(fn); - cache_tree_free(&active_cache_tree); if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); -- cgit v1.2.1 From 7927a55d5bde25702dca4fb1a7d6eb7ef61110ba Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 01:33:07 -0700 Subject: read-tree: teach 1-way merege and plain read to prime cache-tree. This teaches read-tree to fully populate valid cache-tree when reading a tree from scratch, or reading a single tree into an existing index, reusing only the cached stat information (i.e. one-way merge). We have already taught update-index about cache-tree, so "git checkout" followed by updates to a few path followed by a "git commit" would become very efficient. Signed-off-by: Junio C Hamano --- cache-tree.c | 11 ++++++++--- cache-tree.h | 1 + read-tree.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index d8438d67d..35740b364 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -91,6 +91,12 @@ static struct cache_tree_sub *find_subtree(struct cache_tree *it, return down; } +struct cache_tree_sub *cache_tree_sub(struct cache_tree *it, const char *path) +{ + int pathlen = strlen(path); + return find_subtree(it, path, pathlen, 1); +} + void cache_tree_invalidate_path(struct cache_tree *it, const char *path) { /* a/b/c @@ -476,12 +482,11 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) struct cache_tree *sub; struct cache_tree_sub *subtree; const char *name = buf; - int namelen; + sub = read_one(&buf, &size); if (!sub) goto free_return; - namelen = strlen(name); - subtree = find_subtree(it, name, namelen, 1); + subtree = cache_tree_sub(it, name); subtree->cache_tree = sub; } if (subtree_nr != it->subtree_nr) diff --git a/cache-tree.h b/cache-tree.h index c70a7699a..5d824df2e 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -20,6 +20,7 @@ struct cache_tree { struct cache_tree *cache_tree(void); void cache_tree_free(struct cache_tree **); void cache_tree_invalidate_path(struct cache_tree *, const char *); +struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *); void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); diff --git a/read-tree.c b/read-tree.c index ab516824e..66c0120f1 100644 --- a/read-tree.c +++ b/read-tree.c @@ -725,6 +725,39 @@ static int read_cache_unmerged(void) return deleted; } +static void prime_cache_tree_rec(struct cache_tree *it, struct tree *tree) +{ + struct tree_entry_list *ent; + int cnt; + + memcpy(it->sha1, tree->object.sha1, 20); + for (cnt = 0, ent = tree->entries; ent; ent = ent->next) { + if (!ent->directory) + cnt++; + else { + struct cache_tree_sub *sub; + struct tree *subtree = (struct tree *)ent->item.tree; + if (!subtree->object.parsed) + parse_tree(subtree); + sub = cache_tree_sub(it, ent->name); + sub->cache_tree = cache_tree(); + prime_cache_tree_rec(sub->cache_tree, subtree); + cnt += sub->cache_tree->entry_count; + } + } + it->entry_count = cnt; +} + +static void prime_cache_tree(void) +{ + struct tree *tree = (struct tree *)trees->item; + if (!tree) + return; + active_cache_tree = cache_tree(); + prime_cache_tree_rec(active_cache_tree, tree); + +} + static const char read_tree_usage[] = "git-read-tree ( | -m [--aggressive] [-u | -i] [ []])"; static struct cache_file cache_file; @@ -839,6 +872,18 @@ int main(int argc, char **argv) } unpack_trees(fn); + + /* + * When reading only one tree (either the most basic form, + * "-m ent" or "--reset ent" form), we can obtain a fully + * valid cache-tree because the index must match exactly + * what came from the tree. + */ + if (trees->item && (!merge || (stage == 2))) { + cache_tree_free(&active_cache_tree); + prime_cache_tree(); + } + if (write_cache(newfd, active_cache, active_nr) || commit_index_file(&cache_file)) die("unable to write new index file"); -- cgit v1.2.1 From 2956dd3bd7bd512aa8fce7e55d5eec1e56df99ab Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 16:21:54 -0700 Subject: cache_tree_update: give an option to update cache-tree only. When the extra "dryrun" parameter is true, cache_tree_update() recomputes the invalid entry but does not actually creates new tree object. Signed-off-by: Junio C Hamano --- cache-tree.c | 20 +++++++++++++++----- cache-tree.h | 2 +- write-tree.c | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index 35740b364..a10902fd3 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -226,7 +226,8 @@ static int update_one(struct cache_tree *it, int entries, const char *base, int baselen, - int missing_ok) + int missing_ok, + int dryrun) { unsigned long size, offset; char *buffer; @@ -273,7 +274,8 @@ static int update_one(struct cache_tree *it, cache + i, entries - i, path, baselen + sublen + 1, - missing_ok); + missing_ok, + dryrun); i += subcnt - 1; sub->used = 1; } @@ -338,7 +340,14 @@ static int update_one(struct cache_tree *it, #endif } - write_sha1_file(buffer, offset, tree_type, it->sha1); + if (dryrun) { + char hdr[200]; + int hdrlen; + write_sha1_file_prepare(buffer, offset, tree_type, it->sha1, + hdr, &hdrlen); + } + else + write_sha1_file(buffer, offset, tree_type, it->sha1); free(buffer); it->entry_count = i; #if DEBUG @@ -352,13 +361,14 @@ static int update_one(struct cache_tree *it, int cache_tree_update(struct cache_tree *it, struct cache_entry **cache, int entries, - int missing_ok) + int missing_ok, + int dryrun) { int i; i = verify_cache(cache, entries); if (i) return i; - i = update_one(it, cache, entries, "", 0, missing_ok); + i = update_one(it, cache, entries, "", 0, missing_ok, dryrun); if (i < 0) return i; return 0; diff --git a/cache-tree.h b/cache-tree.h index 5d824df2e..72c64801f 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -26,6 +26,6 @@ void *cache_tree_write(struct cache_tree *root, unsigned long *size_p); struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); -int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int); +int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int, int); #endif diff --git a/write-tree.c b/write-tree.c index a5069921a..7a4f691d8 100644 --- a/write-tree.c +++ b/write-tree.c @@ -41,7 +41,7 @@ int main(int argc, char **argv) if (!was_valid) { if (cache_tree_update(active_cache_tree, active_cache, active_nr, - missing_ok) < 0) + missing_ok, 0) < 0) die("git-write-tree: error building trees"); if (0 <= newfd) { if (!write_cache(newfd, active_cache, active_nr)) -- cgit v1.2.1 From d2cb7c6e9303c082b406acc643018f51179e8b35 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 16:22:45 -0700 Subject: test-dump-cache-tree: validate the cached data as well. While dumping the cached data, try recomputing everything from scratch to make sure things match. Signed-off-by: Junio C Hamano --- dump-cache-tree.c | 56 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/dump-cache-tree.c b/dump-cache-tree.c index f6a19bac3..fbea263dd 100644 --- a/dump-cache-tree.c +++ b/dump-cache-tree.c @@ -2,30 +2,64 @@ #include "tree.h" #include "cache-tree.h" -static void dump_cache_tree(struct cache_tree *it, const char *pfx) + +static void dump_one(struct cache_tree *it, const char *pfx, const char *x) +{ + if (it->entry_count < 0) + printf("%-40s %s%s (%d subtrees)\n", + "invalid", x, pfx, it->subtree_nr); + else + printf("%s %s%s (%d entries, %d subtrees)\n", + sha1_to_hex(it->sha1), x, pfx, + it->entry_count, it->subtree_nr); +} + +static int dump_cache_tree(struct cache_tree *it, + struct cache_tree *ref, + const char *pfx) { int i; + int errs = 0; + if (!it) return; - if (it->entry_count < 0) - printf("%-40s %s (%d subtrees)\n", "invalid", pfx, - it->subtree_nr); - else - printf("%s %s (%d entries, %d subtrees)\n", - sha1_to_hex(it->sha1), - pfx, it->entry_count, it->subtree_nr); + if (!ref) + die("internal error"); + + if (it->entry_count < 0) { + dump_one(it, pfx, ""); + dump_one(ref, pfx, "#(ref) "); + if (it->subtree_nr != ref->subtree_nr) + errs = 1; + } + else { + dump_one(it, pfx, ""); + if (memcmp(it->sha1, ref->sha1, 20) || + ref->entry_count != it->entry_count || + ref->subtree_nr != it->subtree_nr) { + dump_one(ref, pfx, "#(ref) "); + errs = 1; + } + } + for (i = 0; i < it->subtree_nr; i++) { char path[PATH_MAX]; struct cache_tree_sub *down = it->down[i]; + struct cache_tree_sub *rdwn; + + rdwn = cache_tree_sub(ref, down->name); sprintf(path, "%s%.*s/", pfx, down->namelen, down->name); - dump_cache_tree(down->cache_tree, path); + if (dump_cache_tree(down->cache_tree, rdwn->cache_tree, path)) + errs = 1; } + return errs; } int main(int ac, char **av) { + struct cache_tree *another = cache_tree(); if (read_cache() < 0) die("unable to read index file"); - dump_cache_tree(active_cache_tree, ""); - return 0; + cache_tree_update(another, active_cache, active_nr, 0, 1); + return dump_cache_tree(active_cache_tree, another, ""); } -- cgit v1.2.1 From 7bc70a590db6ed3069d52aa38d59938e7955901b Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 27 Apr 2006 22:48:27 -0700 Subject: cache-tree.c: typefix Signed-off-by: Junio C Hamano --- cache-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index a10902fd3..28b78f88e 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -341,7 +341,7 @@ static int update_one(struct cache_tree *it, } if (dryrun) { - char hdr[200]; + unsigned char hdr[200]; int hdrlen; write_sha1_file_prepare(buffer, offset, tree_type, it->sha1, hdr, &hdrlen); -- cgit v1.2.1 From 0111ea38cbb9db0e4e245dcd5c86198186bab197 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 2 May 2006 03:31:02 +0200 Subject: cache-tree: replace a sscanf() by two strtol() calls On one of my systems, sscanf() first calls strlen() on the buffer. But this buffer is not terminated by NUL. So git crashed. strtol() does not share that problem, as it stops reading after the first non-digit. [jc: original patch was wrong and did not read the cache-tree structure correctly; this has been fixed up and tested minimally with fsck-objects. ] Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- cache-tree.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index 28b78f88e..e452238ba 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -440,6 +440,8 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) { const char *buf = *buffer; unsigned long size = *size_p; + const char *cp; + char *ep; struct cache_tree *it; int i, subtree_nr; @@ -453,7 +455,14 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p) goto free_return; buf++; size--; it = cache_tree(); - if (sscanf(buf, "%d %d\n", &it->entry_count, &subtree_nr) != 2) + + cp = buf; + it->entry_count = strtol(cp, &ep, 10); + if (cp == ep) + goto free_return; + cp = ep; + subtree_nr = strtol(cp, &ep, 10); + if (cp == ep) goto free_return; while (size && *buf && *buf != '\n') { size--; -- cgit v1.2.1 From cdc08b33ef3da0e963f9956e4a66f67cc3330f83 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 1 May 2006 22:15:54 -0700 Subject: fsck-objects: mark objects reachable from cache-tree When fsck-objects scanned cache-tree, it forgot to mark the trees it found reachable and in use. Signed-off-by: Junio C Hamano --- fsck-objects.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fsck-objects.c b/fsck-objects.c index cc09143a9..98421aab3 100644 --- a/fsck-objects.c +++ b/fsck-objects.c @@ -446,6 +446,8 @@ static int fsck_cache_tree(struct cache_tree *it) if (0 <= it->entry_count) { struct object *obj = parse_object(it->sha1); + mark_reachable(obj, REACHABLE); + obj->used = 1; if (obj->type != tree_type) err |= objerror(obj, "non-tree in cache-tree"); } -- cgit v1.2.1 From a84faf777075e54f9faf22dbc6345fd756cd0c8d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 3 May 2006 15:32:54 -0700 Subject: Fix test-dump-cache-tree in one-tree disappeared case. When reconstructing an invalidated subtree for reference purposes by test-dump-cache-tree, we did not handle the case where we shouldn't have a cached and invalidated subtree in the result, leading to an unneeded die(). Signed-off-by: Junio C Hamano --- dump-cache-tree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dump-cache-tree.c b/dump-cache-tree.c index fbea263dd..1ccaf5177 100644 --- a/dump-cache-tree.c +++ b/dump-cache-tree.c @@ -21,10 +21,9 @@ static int dump_cache_tree(struct cache_tree *it, int i; int errs = 0; - if (!it) - return; - if (!ref) - die("internal error"); + if (!it || !ref) + /* missing in either */ + return 0; if (it->entry_count < 0) { dump_one(it, pfx, ""); -- cgit v1.2.1 From c2b9ae43303342a33b2632939f319a3554f3a70c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 3 May 2006 16:07:02 -0700 Subject: read-tree: invalidate cache-tree entry when a new index entry is added. When doing two-way merge, we failed to invalidate the directory that a new entry is added (we correctly did so for modified and deleted entries). Signed-off-by: Junio C Hamano --- read-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/read-tree.c b/read-tree.c index 66c0120f1..067fb95e9 100644 --- a/read-tree.c +++ b/read-tree.c @@ -446,6 +446,8 @@ static int merged_entry(struct cache_entry *merge, struct cache_entry *old) invalidate_ce_path(old); } } + else + invalidate_ce_path(merge); merge->ce_flags &= ~htons(CE_STAGEMASK); add_cache_entry(merge, ADD_CACHE_OK_TO_ADD); return 1; -- cgit v1.2.1 From 00703e6d6822f81e7b22d079b7e6d9d044f46cf0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 3 May 2006 16:10:45 -0700 Subject: cache-tree: a bit more debugging support. Signed-off-by: Junio C Hamano --- cache-tree.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cache-tree.c b/cache-tree.c index e452238ba..a880c97b3 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -110,6 +110,10 @@ void cache_tree_invalidate_path(struct cache_tree *it, const char *path) int namelen; struct cache_tree_sub *down; +#if DEBUG + fprintf(stderr, "cache-tree invalidate <%s>\n", path); +#endif + if (!it) return; slash = strchr(path, '/'); @@ -335,7 +339,7 @@ static int update_one(struct cache_tree *it, offset += 20; #if DEBUG - fprintf(stderr, "cache-tree %o %.*s\n", + fprintf(stderr, "cache-tree update-one %o %.*s\n", mode, entlen, path + baselen); #endif } @@ -351,7 +355,7 @@ static int update_one(struct cache_tree *it, free(buffer); it->entry_count = i; #if DEBUG - fprintf(stderr, "cache-tree (%d ent, %d subtree) %s\n", + fprintf(stderr, "cache-tree update-one (%d ent, %d subtree) %s\n", it->entry_count, it->subtree_nr, sha1_to_hex(it->sha1)); #endif -- cgit v1.2.1 From 6d60bbefdc2a42614069024b0a38db8c2de33967 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 3 May 2006 21:17:45 -0700 Subject: fsck-objects: do not segfault on missing tree in cache-tree Even if trees are missing in cache-tree, we should continue and check the rest of the object database. Signed-off-by: Junio C Hamano --- fsck-objects.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fsck-objects.c b/fsck-objects.c index 98421aab3..1922b6d84 100644 --- a/fsck-objects.c +++ b/fsck-objects.c @@ -446,6 +446,11 @@ static int fsck_cache_tree(struct cache_tree *it) if (0 <= it->entry_count) { struct object *obj = parse_object(it->sha1); + if (!obj) { + error("%s: invalid sha1 pointer in cache-tree", + sha1_to_hex(it->sha1)); + return 1; + } mark_reachable(obj, REACHABLE); obj->used = 1; if (obj->type != tree_type) -- cgit v1.2.1 From b6c4a480b3161effaa3578df91d8cdc83044d7b6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sun, 7 May 2006 17:42:37 +0200 Subject: Fix crash when reading the empty tree cvsimport needs to call git-read-tree without arguments to create an empty tree. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- read-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/read-tree.c b/read-tree.c index 067fb95e9..49436bf96 100644 --- a/read-tree.c +++ b/read-tree.c @@ -881,8 +881,8 @@ int main(int argc, char **argv) * valid cache-tree because the index must match exactly * what came from the tree. */ - if (trees->item && (!merge || (stage == 2))) { - cache_tree_free(&active_cache_tree); + if (trees && trees->item && (!merge || (stage == 2))) { + cache_tree_free(&active_cache_tree); prime_cache_tree(); } -- cgit v1.2.1