aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2010-03-02 22:28:49 -0800
committerJunio C Hamano <gitster@pobox.com>2010-03-02 22:28:49 -0800
commitf54555ca29146e72d674d090a760d7bfc9afd77f (patch)
tree10a56236dde9728905caa0044d1ffa9f42b474e4
parent9be3614eff36271d5f1cd460a568a219902cb044 (diff)
parentb500d5e11ea67d29dd7be622f65571d611d6e9a3 (diff)
downloadgit-f54555ca29146e72d674d090a760d7bfc9afd77f.tar.gz
git-f54555ca29146e72d674d090a760d7bfc9afd77f.tar.xz
Merge branch 'np/fast-import-idx-v2' into maint
* np/fast-import-idx-v2: fast-import: use the diff_delta() max_delta_size argument fast-import: honor pack.indexversion and pack.packsizelimit config vars fast-import: make default pack size unlimited fast-import: use write_idx_file() instead of custom code fast-import: use sha1write() for pack data fast-import: start using struct pack_idx_entry
-rw-r--r--Documentation/git-fast-import.txt5
-rw-r--r--fast-import.c174
2 files changed, 84 insertions, 95 deletions
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 6764ff188..19082b04e 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -45,10 +45,7 @@ OPTIONS
--max-pack-size=<n>::
Maximum size of each output packfile.
- The default is 4 GiB as that is the maximum allowed
- packfile size (due to file format limitations). Some
- importers may wish to lower this, such as to ensure the
- resulting packfiles fit on CDs.
+ The default is unlimited.
--big-file-threshold=<n>::
Maximum size of a blob that fast-import will attempt to
diff --git a/fast-import.c b/fast-import.c
index b477dc6a8..74f08bd55 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -164,12 +164,11 @@ Format of STDIN stream:
struct object_entry
{
+ struct pack_idx_entry idx;
struct object_entry *next;
- uint32_t offset;
uint32_t type : TYPE_BITS,
pack_id : PACK_ID_BITS,
depth : DEPTH_BITS;
- unsigned char sha1[20];
};
struct object_entry_pool
@@ -192,7 +191,7 @@ struct mark_set
struct last_object
{
struct strbuf data;
- uint32_t offset;
+ off_t offset;
unsigned int depth;
unsigned no_swap : 1;
};
@@ -280,7 +279,7 @@ struct recent_command
/* Configured limits on output */
static unsigned long max_depth = 10;
-static off_t max_packsize = (1LL << 32) - 1;
+static off_t max_packsize;
static uintmax_t big_file_threshold = 512 * 1024 * 1024;
static int force_update;
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
@@ -313,9 +312,10 @@ static struct atom_str **atom_table;
/* The .pack file being generated */
static unsigned int pack_id;
+static struct sha1file *pack_file;
static struct packed_git *pack_data;
static struct packed_git **all_packs;
-static unsigned long pack_size;
+static off_t pack_size;
/* Table of objects we've written. */
static unsigned int object_entry_alloc = 5000;
@@ -521,7 +521,7 @@ static struct object_entry *new_object(unsigned char *sha1)
alloc_objects(object_entry_alloc);
e = blocks->next_free++;
- hashcpy(e->sha1, sha1);
+ hashcpy(e->idx.sha1, sha1);
return e;
}
@@ -530,7 +530,7 @@ static struct object_entry *find_object(unsigned char *sha1)
unsigned int h = sha1[0] << 8 | sha1[1];
struct object_entry *e;
for (e = object_table[h]; e; e = e->next)
- if (!hashcmp(sha1, e->sha1))
+ if (!hashcmp(sha1, e->idx.sha1))
return e;
return NULL;
}
@@ -542,7 +542,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
struct object_entry *p = NULL;
while (e) {
- if (!hashcmp(sha1, e->sha1))
+ if (!hashcmp(sha1, e->idx.sha1))
return e;
p = e;
e = e->next;
@@ -550,7 +550,7 @@ static struct object_entry *insert_object(unsigned char *sha1)
e = new_object(sha1);
e->next = NULL;
- e->offset = 0;
+ e->idx.offset = 0;
if (p)
p->next = e;
else
@@ -839,11 +839,12 @@ static void start_packfile(void)
p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
strcpy(p->pack_name, tmpfile);
p->pack_fd = pack_fd;
+ pack_file = sha1fd(pack_fd, p->pack_name);
hdr.hdr_signature = htonl(PACK_SIGNATURE);
hdr.hdr_version = htonl(2);
hdr.hdr_entries = 0;
- write_or_die(p->pack_fd, &hdr, sizeof(hdr));
+ sha1write(pack_file, &hdr, sizeof(hdr));
pack_data = p;
pack_size = sizeof(hdr);
@@ -853,67 +854,30 @@ static void start_packfile(void)
all_packs[pack_id] = p;
}
-static int oecmp (const void *a_, const void *b_)
-{
- struct object_entry *a = *((struct object_entry**)a_);
- struct object_entry *b = *((struct object_entry**)b_);
- return hashcmp(a->sha1, b->sha1);
-}
-
-static char *create_index(void)
+static const char *create_index(void)
{
- static char tmpfile[PATH_MAX];
- git_SHA_CTX ctx;
- struct sha1file *f;
- struct object_entry **idx, **c, **last, *e;
+ const char *tmpfile;
+ struct pack_idx_entry **idx, **c, **last;
+ struct object_entry *e;
struct object_entry_pool *o;
- uint32_t array[256];
- int i, idx_fd;
- /* Build the sorted table of object IDs. */
- idx = xmalloc(object_count * sizeof(struct object_entry*));
+ /* Build the table of object IDs. */
+ idx = xmalloc(object_count * sizeof(*idx));
c = idx;
for (o = blocks; o; o = o->next_pool)
for (e = o->next_free; e-- != o->entries;)
if (pack_id == e->pack_id)
- *c++ = e;
+ *c++ = &e->idx;
last = idx + object_count;
if (c != last)
die("internal consistency error creating the index");
- qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
- /* Generate the fan-out array. */
- c = idx;
- for (i = 0; i < 256; i++) {
- struct object_entry **next = c;
- while (next < last) {
- if ((*next)->sha1[0] != i)
- break;
- next++;
- }
- array[i] = htonl(next - idx);
- c = next;
- }
-
- idx_fd = odb_mkstemp(tmpfile, sizeof(tmpfile),
- "pack/tmp_idx_XXXXXX");
- f = sha1fd(idx_fd, tmpfile);
- sha1write(f, array, 256 * sizeof(int));
- git_SHA1_Init(&ctx);
- for (c = idx; c != last; c++) {
- uint32_t offset = htonl((*c)->offset);
- sha1write(f, &offset, 4);
- sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
- git_SHA1_Update(&ctx, (*c)->sha1, 20);
- }
- sha1write(f, pack_data->sha1, sizeof(pack_data->sha1));
- sha1close(f, NULL, CSUM_FSYNC);
+ tmpfile = write_idx_file(NULL, idx, object_count, pack_data->sha1);
free(idx);
- git_SHA1_Final(pack_data->sha1, &ctx);
return tmpfile;
}
-static char *keep_pack(char *curr_index_name)
+static char *keep_pack(const char *curr_index_name)
{
static char name[PATH_MAX];
static const char *keep_msg = "fast-import";
@@ -935,6 +899,7 @@ static char *keep_pack(char *curr_index_name)
get_object_directory(), sha1_to_hex(pack_data->sha1));
if (move_temp_to_file(curr_index_name, name))
die("cannot store index file");
+ free((void *)curr_index_name);
return name;
}
@@ -957,15 +922,17 @@ static void end_packfile(void)
clear_delta_base_cache();
if (object_count) {
+ unsigned char cur_pack_sha1[20];
char *idx_name;
int i;
struct branch *b;
struct tag *t;
close_pack_windows(pack_data);
+ sha1close(pack_file, cur_pack_sha1, 0);
fixup_pack_header_footer(pack_data->pack_fd, pack_data->sha1,
pack_data->pack_name, object_count,
- NULL, 0);
+ cur_pack_sha1, pack_size);
close(pack_data->pack_fd);
idx_name = keep_pack(create_index());
@@ -1063,25 +1030,21 @@ static int store_object(
e = insert_object(sha1);
if (mark)
insert_mark(mark, e);
- if (e->offset) {
+ if (e->idx.offset) {
duplicate_count_by_type[type]++;
return 1;
} else if (find_sha1_pack(sha1, packed_git)) {
e->type = type;
e->pack_id = MAX_PACK_ID;
- e->offset = 1; /* just not zero! */
+ e->idx.offset = 1; /* just not zero! */
duplicate_count_by_type[type]++;
return 1;
}
- if (last && last->data.buf && last->depth < max_depth) {
+ if (last && last->data.buf && last->depth < max_depth && dat->len > 20) {
delta = diff_delta(last->data.buf, last->data.len,
dat->buf, dat->len,
- &deltalen, 0);
- if (delta && deltalen >= dat->len) {
- free(delta);
- delta = NULL;
- }
+ &deltalen, dat->len - 20);
} else
delta = NULL;
@@ -1101,7 +1064,7 @@ static int store_object(
deflateEnd(&s);
/* Determine if we should auto-checkpoint. */
- if ((pack_size + 60 + s.total_out) > max_packsize
+ if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize)
|| (pack_size + 60 + s.total_out) < pack_size) {
/* This new object needs to *not* have the current pack_id. */
@@ -1127,36 +1090,40 @@ static int store_object(
e->type = type;
e->pack_id = pack_id;
- e->offset = pack_size;
+ e->idx.offset = pack_size;
object_count++;
object_count_by_type[type]++;
+ crc32_begin(pack_file);
+
if (delta) {
- unsigned long ofs = e->offset - last->offset;
+ off_t ofs = e->idx.offset - last->offset;
unsigned pos = sizeof(hdr) - 1;
delta_count_by_type[type]++;
e->depth = last->depth + 1;
hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
- write_or_die(pack_data->pack_fd, hdr, hdrlen);
+ sha1write(pack_file, hdr, hdrlen);
pack_size += hdrlen;
hdr[pos] = ofs & 127;
while (ofs >>= 7)
hdr[--pos] = 128 | (--ofs & 127);
- write_or_die(pack_data->pack_fd, hdr + pos, sizeof(hdr) - pos);
+ sha1write(pack_file, hdr + pos, sizeof(hdr) - pos);
pack_size += sizeof(hdr) - pos;
} else {
e->depth = 0;
hdrlen = encode_header(type, dat->len, hdr);
- write_or_die(pack_data->pack_fd, hdr, hdrlen);
+ sha1write(pack_file, hdr, hdrlen);
pack_size += hdrlen;
}
- write_or_die(pack_data->pack_fd, out, s.total_out);
+ sha1write(pack_file, out, s.total_out);
pack_size += s.total_out;
+ e->idx.crc32 = crc32_end(pack_file);
+
free(out);
free(delta);
if (last) {
@@ -1165,18 +1132,23 @@ static int store_object(
} else {
strbuf_swap(&last->data, dat);
}
- last->offset = e->offset;
+ last->offset = e->idx.offset;
last->depth = e->depth;
}
return 0;
}
-static void truncate_pack(off_t to)
+static void truncate_pack(off_t to, git_SHA_CTX *ctx)
{
if (ftruncate(pack_data->pack_fd, to)
|| lseek(pack_data->pack_fd, to, SEEK_SET) != to)
die_errno("cannot truncate pack to skip duplicate");
pack_size = to;
+
+ /* yes this is a layering violation */
+ pack_file->total = to;
+ pack_file->offset = 0;
+ pack_file->ctx = *ctx;
}
static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
@@ -1189,16 +1161,21 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
unsigned long hdrlen;
off_t offset;
git_SHA_CTX c;
+ git_SHA_CTX pack_file_ctx;
z_stream s;
int status = Z_OK;
/* Determine if we should auto-checkpoint. */
- if ((pack_size + 60 + len) > max_packsize
+ if ((max_packsize && (pack_size + 60 + len) > max_packsize)
|| (pack_size + 60 + len) < pack_size)
cycle_packfile();
offset = pack_size;
+ /* preserve the pack_file SHA1 ctx in case we have to truncate later */
+ sha1flush(pack_file);
+ pack_file_ctx = pack_file->ctx;
+
hdrlen = snprintf((char *)out_buf, out_sz, "blob %" PRIuMAX, len) + 1;
if (out_sz <= hdrlen)
die("impossibly large object header");
@@ -1206,6 +1183,8 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
git_SHA1_Init(&c);
git_SHA1_Update(&c, out_buf, hdrlen);
+ crc32_begin(pack_file);
+
memset(&s, 0, sizeof(s));
deflateInit(&s, pack_compression_level);
@@ -1233,7 +1212,7 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
if (!s.avail_out || status == Z_STREAM_END) {
size_t n = s.next_out - out_buf;
- write_or_die(pack_data->pack_fd, out_buf, n);
+ sha1write(pack_file, out_buf, n);
pack_size += n;
s.next_out = out_buf;
s.avail_out = out_sz;
@@ -1259,22 +1238,23 @@ static void stream_blob(uintmax_t len, unsigned char *sha1out, uintmax_t mark)
if (mark)
insert_mark(mark, e);
- if (e->offset) {
+ if (e->idx.offset) {
duplicate_count_by_type[OBJ_BLOB]++;
- truncate_pack(offset);
+ truncate_pack(offset, &pack_file_ctx);
} else if (find_sha1_pack(sha1, packed_git)) {
e->type = OBJ_BLOB;
e->pack_id = MAX_PACK_ID;
- e->offset = 1; /* just not zero! */
+ e->idx.offset = 1; /* just not zero! */
duplicate_count_by_type[OBJ_BLOB]++;
- truncate_pack(offset);
+ truncate_pack(offset, &pack_file_ctx);
} else {
e->depth = 0;
e->type = OBJ_BLOB;
e->pack_id = pack_id;
- e->offset = offset;
+ e->idx.offset = offset;
+ e->idx.crc32 = crc32_end(pack_file);
object_count++;
object_count_by_type[OBJ_BLOB]++;
}
@@ -1317,6 +1297,7 @@ static void *gfi_unpack_entry(
* the newly written data.
*/
close_pack_windows(p);
+ sha1flush(pack_file);
/* We have to offer 20 bytes additional on the end of
* the packfile as the core unpacker code assumes the
@@ -1326,7 +1307,7 @@ static void *gfi_unpack_entry(
*/
p->pack_size = pack_size + 20;
}
- return unpack_entry(p, oe->offset, &type, sizep);
+ return unpack_entry(p, oe->idx.offset, &type, sizep);
}
static const char *get_mode(const char *str, uint16_t *modep)
@@ -1457,7 +1438,7 @@ static void store_tree(struct tree_entry *root)
if (S_ISDIR(root->versions[0].mode) && le && le->pack_id == pack_id) {
mktree(t, 0, &old_tree);
lo.data = old_tree;
- lo.offset = le->offset;
+ lo.offset = le->idx.offset;
lo.depth = t->delta_depth;
}
@@ -1715,7 +1696,7 @@ static void dump_marks_helper(FILE *f,
for (k = 0; k < 1024; k++) {
if (m->data.marked[k])
fprintf(f, ":%" PRIuMAX " %s\n", base + k,
- sha1_to_hex(m->data.marked[k]->sha1));
+ sha1_to_hex(m->data.marked[k]->idx.sha1));
}
}
}
@@ -1798,7 +1779,7 @@ static void read_marks(void)
e = insert_object(sha1);
e->type = type;
e->pack_id = MAX_PACK_ID;
- e->offset = 1; /* just not zero! */
+ e->idx.offset = 1; /* just not zero! */
}
insert_mark(mark, e);
}
@@ -2183,7 +2164,7 @@ static void file_change_m(struct branch *b)
if (*p == ':') {
char *x;
oe = find_mark(strtoumax(p + 1, &x, 10));
- hashcpy(sha1, oe->sha1);
+ hashcpy(sha1, oe->idx.sha1);
p = x;
} else if (!prefixcmp(p, "inline")) {
inline_data = 1;
@@ -2316,7 +2297,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
if (*p == ':') {
char *x;
oe = find_mark(strtoumax(p + 1, &x, 10));
- hashcpy(sha1, oe->sha1);
+ hashcpy(sha1, oe->idx.sha1);
p = x;
} else if (!prefixcmp(p, "inline")) {
inline_data = 1;
@@ -2339,7 +2320,7 @@ static void note_change_n(struct branch *b, unsigned char old_fanout)
struct object_entry *commit_oe = find_mark(commit_mark);
if (commit_oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", commit_mark);
- hashcpy(commit_sha1, commit_oe->sha1);
+ hashcpy(commit_sha1, commit_oe->idx.sha1);
} else if (!get_sha1(p, commit_sha1)) {
unsigned long size;
char *buf = read_object_with_reference(commit_sha1,
@@ -2446,7 +2427,7 @@ static int parse_from(struct branch *b)
struct object_entry *oe = find_mark(idnum);
if (oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", idnum);
- hashcpy(b->sha1, oe->sha1);
+ hashcpy(b->sha1, oe->idx.sha1);
if (oe->pack_id != MAX_PACK_ID) {
unsigned long size;
char *buf = gfi_unpack_entry(oe, &size);
@@ -2481,7 +2462,7 @@ static struct hash_list *parse_merge(unsigned int *count)
struct object_entry *oe = find_mark(idnum);
if (oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", idnum);
- hashcpy(n->sha1, oe->sha1);
+ hashcpy(n->sha1, oe->idx.sha1);
} else if (!get_sha1(from, n->sha1)) {
unsigned long size;
char *buf = read_object_with_reference(n->sha1,
@@ -2639,7 +2620,7 @@ static void parse_new_tag(void)
from_mark = strtoumax(from + 1, NULL, 10);
oe = find_mark(from_mark);
type = oe->type;
- hashcpy(sha1, oe->sha1);
+ hashcpy(sha1, oe->idx.sha1);
} else if (!get_sha1(from, sha1)) {
unsigned long size;
char *buf;
@@ -2891,6 +2872,17 @@ static int git_pack_config(const char *k, const char *v, void *cb)
pack_compression_seen = 1;
return 0;
}
+ if (!strcmp(k, "pack.indexversion")) {
+ pack_idx_default_version = git_config_int(k, v);
+ if (pack_idx_default_version > 2)
+ die("bad pack.indexversion=%"PRIu32,
+ pack_idx_default_version);
+ return 0;
+ }
+ if (!strcmp(k, "pack.packsizelimit")) {
+ max_packsize = git_config_ulong(k, v);
+ return 0;
+ }
if (!strcmp(k, "core.bigfilethreshold")) {
long n = git_config_int(k, v);
big_file_threshold = 0 < n ? n : 0;