diff options
author | Junio C Hamano <gitster@pobox.com> | 2010-03-02 22:28:49 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2010-03-02 22:28:49 -0800 |
commit | f54555ca29146e72d674d090a760d7bfc9afd77f (patch) | |
tree | 10a56236dde9728905caa0044d1ffa9f42b474e4 | |
parent | 9be3614eff36271d5f1cd460a568a219902cb044 (diff) | |
parent | b500d5e11ea67d29dd7be622f65571d611d6e9a3 (diff) | |
download | git-f54555ca29146e72d674d090a760d7bfc9afd77f.tar.gz git-f54555ca29146e72d674d090a760d7bfc9afd77f.tar.xz |
Merge branch 'np/fast-import-idx-v2' into maint
* np/fast-import-idx-v2:
fast-import: use the diff_delta() max_delta_size argument
fast-import: honor pack.indexversion and pack.packsizelimit config vars
fast-import: make default pack size unlimited
fast-import: use write_idx_file() instead of custom code
fast-import: use sha1write() for pack data
fast-import: start using struct pack_idx_entry
-rw-r--r-- | Documentation/git-fast-import.txt | 5 | ||||
-rw-r--r-- | fast-import.c | 174 |
2 files changed, 84 insertions, 95 deletions
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index 6764ff188..19082b04e 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -45,10 +45,7 @@ OPTIONS --max-pack-size=<n>:: Maximum size of each output packfile. - The default is 4 GiB as that is the maximum allowed - packfile size (due to file format limitations). Some - importers may wish to lower this, such as to ensure the - resulting packfiles fit on CDs. + The default is unlimited. --big-file-threshold=<n>:: Maximum size of a blob that fast-import will attempt to diff --git a/fast-import.c b/fast-import.c index b477dc6a8..74f08bd55 100644 --- a/fast-import.c +++ b/fast-import.c @@ -164,12 +164,11 @@ Format of STDIN stream: struct object_entry { + struct pack_idx_entry idx; struct object_entry *next; - uint32_t offset; uint32_t type : TYPE_BITS, pack_id : PACK_ID_BITS, depth : DEPTH_BITS; - unsigned char sha1[20]; }; struct object_entry_pool @@ -192,7 +191,7 @@ struct mark_set struct last_object { struct strbuf data; - uint32_t offset; + off_t offset; unsigned int depth; unsigned no_swap : 1; }; @@ -280,7 +279,7 @@ struct recent_command /* Configured limits on output */ static unsigned long max_depth = 10; -static off_t max_packsize = (1LL << 32) - 1; +static off_t max_packsize; static uintmax_t big_file_threshold = 512 * 1024 * 1024; static int force_update; static int pack_compression_level = Z_DEFAULT_COMPRESSION; @@ -313,9 +312,10 @@ static struct atom_str **atom_table; /* The .pack file being generated */ static unsigned int pack_id; +static struct sha1file *pack_file; static struct packed_git *pack_data; static struct packed_git **all_packs; -static unsigned long pack_size; +static off_t pack_size; /* Table of objects we've written. */ static unsigned int object_entry_alloc = 5000; @@ -521,7 +521,7 @@ static struct object_entry *new_object(unsigned char *sha1) alloc_objects(object_entry_alloc); e = blocks->next_free++; - hashcpy(e->sha1, sha1); + hashcpy(e->idx.sha1, sha1); return e; } @@ -530,7 +530,7 @@ static struct object_entry *find_object(unsigned char *sha1) unsigned int h = sha1[0] << 8 | sha1[1]; struct object_entry *e; for (e = object_table[h]; e; e = e->next) - if (!hashcmp(sha1, e->sha1)) + if (!hashcmp(sha1, e->idx.sha1)) return e; return NULL; } @@ -542,7 +542,7 @@ static struct object_entry *insert_object(unsigned char *sha1) struct object_entry *p = NULL; while (e) { - if (!hashcmp(sha1, e->sha1)) + if (!hashcmp(sha1, e->idx.sha1)) return e; p = e; e = e->next; @@ -550,7 +550,7 @@ static struct object_entry *insert_object(unsigned char *sha1) e = new_object(sha1); e->next = NULL; - e->offset = 0; + e->idx.offset = 0; if (p) p->next = e; else @@ -839,11 +839,12 @@ static void start_packfile(void) p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2); strcpy(p->pack_name, tmpfile); p->pack_fd = pack_fd; + pack_file = sha1fd(pack_fd, p->pack_name); hdr.hdr_signature = htonl(PACK_SIGNATURE); hdr.hdr_version = htonl(2); hdr.hdr_entries = 0; - write_or_die(p->pack_fd, &hdr, sizeof(hdr)); + sha1write(pack_file, &hdr, sizeof(hdr)); pack_data = p; pack_size = sizeof(hdr); @@ -853,67 +854,30 @@ static void start_packfile(void) all_packs[pack_id] = p; } -static int oecmp (const void *a_, const void *b_) -{ - struct object_entry *a = *((struct object_entry**)a_); - struct object_entry *b = *((struct object_entry**)b_); - return hashcmp(a->sha1, b->sha1); -} - -static char *create_index(void) +static const char *create_index(void) { - static char tmpfile[PATH_MAX]; - git_SHA_CTX ctx; - struct sha1file *f; - struct object_entry **idx, **c, **last, *e; + const char *tmpfile; + struct pack_idx_entry **idx, **c, **last; + struct object_entry *e; struct object_entry_pool *o; - uint32_t array[256]; - int i, idx_fd; - /* Build the sorted table of object IDs. */ - idx = xmalloc(object_count * sizeof(struct object_entry*)); + /* Build the table of object IDs. */ + idx = xmalloc(object_count * sizeof(*idx)); c = idx; for (o = blocks; o; o = o->next_pool) for (e = o->next_free; e-- != o->entries;) if (pack_id == e->pack_id) - *c++ = e; + *c++ = &e->idx; last = idx + object_count; if (c != last) die("internal consistency error creating the index"); - qsort(idx, object_count, sizeof(struct object_entry*), oecmp); - /* Generate the fan-out array. */ - c = idx; - for (i = 0; i < 256; i++) { - struct object_entry **next = c; - while (next < last) { - if ((*next)->sha1[0] != i) - break; - next++; - } - array[i] = htonl(next - idx); - c = next; - } - - idx_fd = odb_mkstemp(tmpfile, sizeof(tmpfile), - "pack/tmp_idx_XXXXXX"); - f = sha1fd(idx_fd, tmpfile); - sha1write(f, array, 256 * sizeof(int)); - git_SHA1_Init(&ctx); - for (c = idx; c != last; c++) { - uint32_t offset = htonl((*c)->offset); - sha1write(f, &offset, 4); - sha1write(f, (*c)->sha1, sizeof((*c)->sha1)); - git_SHA1_Update(&ctx, (*c)->sha1, 20); - } - sha1write(f, pack_data->sha1, sizeof(pack_data->sha1)); - sha1close(f, NULL, CSUM_FSYNC); + tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1); free(idx); - git_SHA1_Final(pack_data->sha1, &ctx); return tmpfile; } -static char *keep_pack(char *curr_index_name) +static char *keep_pack(const char *curr_index_name) { static char name[PATH_MAX]; static const char *keep_msg = "fast-import"; @@ -935,6 +899,7 @@ static char *keep_pack(char *curr_index_name) get_object_directory(), sha1_to_hex(pack_data->sha1)); if (move_temp_to_file(curr_index_name, name)) die("cannot store index file"); + free((void *)curr_index_name); return name; } @@ -957,15 +922,17 @@ static void end_packfile(void) clear_delta_base_cache(); if (object_count) { + unsigned char cur_pack_sha1[20]; char *idx_name; int i; struct branch *b; struct tag *t; close_pack_windows(pack_data); + sha1close(pack_file, cur_pack_sha1, 0); fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1, pack_data->pack_name, object_count, - NULL, 0); + cur_pack_sha1, pack_size); close(pack_data->pack_fd); idx_name = keep_pack(create_index()); @@ -1063,25 +1030,21 @@ static int store_object( e = insert_object(sha1); if (mark) insert_mark(mark, e); - if (e->offset) { + if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; } else if (find_sha1_pack(sha1, packed_git)) { e->type = type; e->pack_id = MAX_PACK_ID; - e->offset = 1; /* just not zero! */ + e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[type]++; return 1; } - if (last && last->data.buf && last->depth < max_depth) { + if (last && last->data.buf && last->depth < max_depth && dat->len > 20) { delta = diff_delta(last->data.buf, last->data.len, dat->buf, dat->len, - &deltalen, 0); - if (delta && deltalen >= dat->len) { - free(delta); - delta = NULL; - } + &deltalen, dat->len - 20); } else delta = NULL; @@ -1101,7 +1064,7 @@ static int store_object( deflateEnd(&s); /* Determine if we should auto-checkpoint. */ - if ((pack_size + 60 + s.total_out) > max_packsize + if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize) || (pack_size + 60 + s.total_out) < pack_size) { /* This new object needs to *not* have the current pack_id. */ @@ -1127,36 +1090,40 @@ static int store_object( e->type = type; e->pack_id = pack_id; - e->offset = pack_size; + e->idx.offset = pack_size; object_count++; object_count_by_type[type]++; + crc32_begin(pack_file); + if (delta) { - unsigned long ofs = e->offset - last->offset; + off_t ofs = e->idx.offset - last->offset; unsigned pos = sizeof(hdr) - 1; delta_count_by_type[type]++; e->depth = last->depth + 1; hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr); - write_or_die(pack_data->pack_fd, hdr, hdrlen); + sha1write(pack_file, hdr, hdrlen); pack_size += hdrlen; hdr[pos] = ofs & 127; while (ofs >>= 7) hdr[--pos] = 128 | (--ofs & 127); - write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos); + sha1write(pack_file, hdr + pos, sizeof(hdr) - pos); pack_size += sizeof(hdr) - pos; } else { e->depth = 0; hdrlen = encode_header(type, dat->len, hdr); - write_or_die(pack_data->pack_fd, hdr, hdrlen); + sha1write(pack_file, hdr, hdrlen); pack_size += hdrlen; } - write_or_die(pack_data->pack_fd, out, s.total_out); + sha1write(pack_file, out, s.total_out); pack_size += s.total_out; + e->idx.crc32 = crc32_end(pack_file); + free(out); free(delta); if (last) { @@ -1165,18 +1132,23 @@ static int store_object( } else { strbuf_swap(&last->data, dat); } - last->offset = e->offset; + last->offset = e->idx.offset; last->depth = e->depth; } return 0; } -static void truncate_pack(off_t to) +static void truncate_pack(off_t to, git_SHA_CTX *ctx) { if (ftruncate(pack_data->pack_fd, to) || lseek(pack_data->pack_fd, to, SEEK_SET) != to) die_errno("cannot truncate pack to skip duplicate"); pack_size = to; + + /* yes this is a layering violation */ + pack_file->total = to; + pack_file->offset = 0; + pack_file->ctx = *ctx; } static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) @@ -1189,16 +1161,21 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) unsigned long hdrlen; off_t offset; git_SHA_CTX c; + git_SHA_CTX pack_file_ctx; z_stream s; int status = Z_OK; /* Determine if we should auto-checkpoint. */ - if ((pack_size + 60 + len) > max_packsize + if ((max_packsize && (pack_size + 60 + len) > max_packsize) || (pack_size + 60 + len) < pack_size) cycle_packfile(); offset = pack_size; + /* preserve the pack_file SHA1 ctx in case we have to truncate later */ + sha1flush(pack_file); + pack_file_ctx = pack_file->ctx; + hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1; if (out_sz <= hdrlen) die("impossibly large object header"); @@ -1206,6 +1183,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) git_SHA1_Init(&c); git_SHA1_Update(&c, out_buf, hdrlen); + crc32_begin(pack_file); + memset(&s, 0, sizeof(s)); deflateInit(&s, pack_compression_level); @@ -1233,7 +1212,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) if (!s.avail_out || status == Z_STREAM_END) { size_t n = s.next_out - out_buf; - write_or_die(pack_data->pack_fd, out_buf, n); + sha1write(pack_file, out_buf, n); pack_size += n; s.next_out = out_buf; s.avail_out = out_sz; @@ -1259,22 +1238,23 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark) if (mark) insert_mark(mark, e); - if (e->offset) { + if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(offset); + truncate_pack(offset, &pack_file_ctx); } else if (find_sha1_pack(sha1, packed_git)) { e->type = OBJ_BLOB; e->pack_id = MAX_PACK_ID; - e->offset = 1; /* just not zero! */ + e->idx.offset = 1; /* just not zero! */ duplicate_count_by_type[OBJ_BLOB]++; - truncate_pack(offset); + truncate_pack(offset, &pack_file_ctx); } else { e->depth = 0; e->type = OBJ_BLOB; e->pack_id = pack_id; - e->offset = offset; + e->idx.offset = offset; + e->idx.crc32 = crc32_end(pack_file); object_count++; object_count_by_type[OBJ_BLOB]++; } @@ -1317,6 +1297,7 @@ static void *gfi_unpack_entry( * the newly written data. */ close_pack_windows(p); + sha1flush(pack_file); /* We have to offer 20 bytes additional on the end of * the packfile as the core unpacker code assumes the @@ -1326,7 +1307,7 @@ static void *gfi_unpack_entry( */ p->pack_size = pack_size + 20; } - return unpack_entry(p, oe->offset, &type, sizep); + return unpack_entry(p, oe->idx.offset, &type, sizep); } static const char *get_mode(const char *str, uint16_t *modep) @@ -1457,7 +1438,7 @@ static void store_tree(struct tree_entry *root) if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) { mktree(t, 0, &old_tree); lo.data = old_tree; - lo.offset = le->offset; + lo.offset = le->idx.offset; lo.depth = t->delta_depth; } @@ -1715,7 +1696,7 @@ static void dump_marks_helper(FILE *f, for (k = 0; k < 1024; k++) { if (m->data.marked[k]) fprintf(f, ":%" PRIuMAX " %s\n", base + k, - sha1_to_hex(m->data.marked[k]->sha1)); + sha1_to_hex(m->data.marked[k]->idx.sha1)); } } } @@ -1798,7 +1779,7 @@ static void read_marks(void) e = insert_object(sha1); e->type = type; e->pack_id = MAX_PACK_ID; - e->offset = 1; /* just not zero! */ + e->idx.offset = 1; /* just not zero! */ } insert_mark(mark, e); } @@ -2183,7 +2164,7 @@ static void file_change_m(struct branch *b) if (*p == ':') { char *x; oe = find_mark(strtoumax(p + 1, &x, 10)); - hashcpy(sha1, oe->sha1); + hashcpy(sha1, oe->idx.sha1); p = x; } else if (!prefixcmp(p, "inline")) { inline_data = 1; @@ -2316,7 +2297,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) if (*p == ':') { char *x; oe = find_mark(strtoumax(p + 1, &x, 10)); - hashcpy(sha1, oe->sha1); + hashcpy(sha1, oe->idx.sha1); p = x; } else if (!prefixcmp(p, "inline")) { inline_data = 1; @@ -2339,7 +2320,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout) struct object_entry *commit_oe = find_mark(commit_mark); if (commit_oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", commit_mark); - hashcpy(commit_sha1, commit_oe->sha1); + hashcpy(commit_sha1, commit_oe->idx.sha1); } else if (!get_sha1(p, commit_sha1)) { unsigned long size; char *buf = read_object_with_reference(commit_sha1, @@ -2446,7 +2427,7 @@ static int parse_from(struct branch *b) struct object_entry *oe = find_mark(idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); - hashcpy(b->sha1, oe->sha1); + hashcpy(b->sha1, oe->idx.sha1); if (oe->pack_id != MAX_PACK_ID) { unsigned long size; char *buf = gfi_unpack_entry(oe, &size); @@ -2481,7 +2462,7 @@ static struct hash_list *parse_merge(unsigned int *count) struct object_entry *oe = find_mark(idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); - hashcpy(n->sha1, oe->sha1); + hashcpy(n->sha1, oe->idx.sha1); } else if (!get_sha1(from, n->sha1)) { unsigned long size; char *buf = read_object_with_reference(n->sha1, @@ -2639,7 +2620,7 @@ static void parse_new_tag(void) from_mark = strtoumax(from + 1, NULL, 10); oe = find_mark(from_mark); type = oe->type; - hashcpy(sha1, oe->sha1); + hashcpy(sha1, oe->idx.sha1); } else if (!get_sha1(from, sha1)) { unsigned long size; char *buf; @@ -2891,6 +2872,17 @@ static int git_pack_config(const char *k, const char *v, void *cb) pack_compression_seen = 1; return 0; } + if (!strcmp(k, "pack.indexversion")) { + pack_idx_default_version = git_config_int(k, v); + if (pack_idx_default_version > 2) + die("bad pack.indexversion=%"PRIu32, + pack_idx_default_version); + return 0; + } + if (!strcmp(k, "pack.packsizelimit")) { + max_packsize = git_config_ulong(k, v); + return 0; + } if (!strcmp(k, "core.bigfilethreshold")) { long n = git_config_int(k, v); big_file_threshold = 0 < n ? n : 0; |