aboutsummaryrefslogtreecommitdiff
path: root/pack-bitmap.h
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2013-12-21 09:00:01 -0500
committerJunio C Hamano <gitster@pobox.com>2013-12-30 12:19:22 -0800
commitfff42755efc7585378ca562e54828532922371be (patch)
tree5900b54e19f7f37187646429aba3adff2b6cfc86 /pack-bitmap.h
parent0d4455a3ab070e0477ab80ae641ef19146b7a736 (diff)
downloadgit-fff42755efc7585378ca562e54828532922371be.tar.gz
git-fff42755efc7585378ca562e54828532922371be.tar.xz
pack-bitmap: add support for bitmap indexes
A bitmap index is a `.bitmap` file that can be found inside `$GIT_DIR/objects/pack/`, next to its corresponding packfile, and contains precalculated reachability information for selected commits. The full specification of the format for these bitmap indexes can be found in `Documentation/technical/bitmap-format.txt`. For a given commit SHA1, if it happens to be available in the bitmap index, its bitmap will represent every single object that is reachable from the commit itself. The nth bit in the bitmap is the nth object in the packfile; if it's set to 1, the object is reachable. By using the bitmaps available in the index, this commit implements several new functions: - `prepare_bitmap_git` - `prepare_bitmap_walk` - `traverse_bitmap_commit_list` - `reuse_partial_packfile_from_bitmap` The `prepare_bitmap_walk` function tries to build a bitmap of all the objects that can be reached from the commit roots of a given `rev_info` struct by using the following algorithm: - If all the interesting commits for a revision walk are available in the index, the resulting reachability bitmap is the bitwise OR of all the individual bitmaps. - When the full set of WANTs is not available in the index, we perform a partial revision walk using the commits that don't have bitmaps as roots, and limiting the revision walk as soon as we reach a commit that has a corresponding bitmap. The earlier OR'ed bitmap with all the indexed commits can now be completed as this walk progresses, so the end result is the full reachability list. - For revision walks with a HAVEs set (a set of commits that are deemed uninteresting), first we perform the same method as for the WANTs, but using our HAVEs as roots, in order to obtain a full reachability bitmap of all the uninteresting commits. This bitmap then can be used to: a) limit the subsequent walk when building the WANTs bitmap b) finding the final set of interesting commits by performing an AND-NOT of the WANTs and the HAVEs. If `prepare_bitmap_walk` runs successfully, the resulting bitmap is stored and the equivalent of a `traverse_commit_list` call can be performed by using `traverse_bitmap_commit_list`; the bitmap version of this call yields the objects straight from the packfile index (without having to look them up or parse them) and hence is several orders of magnitude faster. As an extra optimization, when `prepare_bitmap_walk` succeeds, the `reuse_partial_packfile_from_bitmap` call can be attempted: it will find the amount of objects at the beginning of the on-disk packfile that can be reused as-is, and return an offset into the packfile. The source packfile can then be loaded and the bytes up to `offset` can be written directly to the result without having to consider the entires inside the packfile individually. If the `prepare_bitmap_walk` call fails (e.g. because no bitmap files are available), the `rev_info` struct is left untouched, and can be used to perform a manual rev-walk using `traverse_commit_list`. Hence, this new set of functions are a generic API that allows to perform the equivalent of git rev-list --objects [roots...] [^uninteresting...] for any set of commits, even if they don't have specific bitmaps generated for them. In further patches, we'll use this bitmap traversal optimization to speed up the `pack-objects` and `rev-list` commands. Signed-off-by: Vicent Marti <tanoku@gmail.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'pack-bitmap.h')
-rw-r--r--pack-bitmap.h43
1 files changed, 43 insertions, 0 deletions
diff --git a/pack-bitmap.h b/pack-bitmap.h
new file mode 100644
index 000000000..b4510d5cc
--- /dev/null
+++ b/pack-bitmap.h
@@ -0,0 +1,43 @@
+#ifndef PACK_BITMAP_H
+#define PACK_BITMAP_H
+
+#include "ewah/ewok.h"
+#include "khash.h"
+
+struct bitmap_disk_entry {
+ uint32_t object_pos;
+ uint8_t xor_offset;
+ uint8_t flags;
+} __attribute__((packed));
+
+struct bitmap_disk_header {
+ char magic[4];
+ uint16_t version;
+ uint16_t options;
+ uint32_t entry_count;
+ unsigned char checksum[20];
+};
+
+static const char BITMAP_IDX_SIGNATURE[] = {'B', 'I', 'T', 'M'};
+
+enum pack_bitmap_opts {
+ BITMAP_OPT_FULL_DAG = 1
+};
+
+typedef int (*show_reachable_fn)(
+ const unsigned char *sha1,
+ enum object_type type,
+ int flags,
+ uint32_t hash,
+ struct packed_git *found_pack,
+ off_t found_offset);
+
+int prepare_bitmap_git(void);
+void count_bitmap_commit_list(uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags);
+void traverse_bitmap_commit_list(show_reachable_fn show_reachable);
+void test_bitmap_walk(struct rev_info *revs);
+char *pack_bitmap_filename(struct packed_git *p);
+int prepare_bitmap_walk(struct rev_info *revs);
+int reuse_partial_packfile_from_bitmap(struct packed_git **packfile, uint32_t *entries, off_t *up_to);
+
+#endif