diff options
author | Junio C Hamano <gitster@pobox.com> | 2012-01-27 11:04:28 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2012-01-27 11:20:00 -0800 |
commit | d47553665833935157292a71248278a40e32c320 (patch) | |
tree | 13a04cd180ad12b34dbf8cefba185f0c3ec65db9 /vcs-svn | |
parent | 69204d0ab18d28d07ee2c8c9b50bbf5bd80343ab (diff) | |
parent | c5bcbcdcfa1e2a1977497cb3a342c0365c8d78d6 (diff) | |
download | git-d47553665833935157292a71248278a40e32c320.tar.gz git-d47553665833935157292a71248278a40e32c320.tar.xz |
Merge branch 'svn-fe' of git://repo.or.cz/git/jrn into jn/svn-fe
This simplifies svn-fe a great deal and fulfills a longstanding wish:
support for dumps with deltas in them, and incremental imports.
The cost is that commandline usage of the svn-fe tool becomes a little
more complicated since it no longer keeps state itself but instead reads
blobs back from fast-import in order to copy them between revisions and
apply deltas to them.
Also removes a couple of custom data structures and replaces them with
strbufs like other parts of Git.
* 'svn-fe' of git://repo.or.cz/git/jrn: (32 commits)
vcs-svn: reset first_commit_done in fast_export_init
vcs-svn: do not initialize report_buffer twice
vcs-svn: avoid hangs from corrupt deltas
vcs-svn: guard against overflow when computing preimage length
vcs-svn: cap number of bytes read from sliding view
test-svn-fe: split off "test-svn-fe -d" into a separate function
vcs-svn: implement text-delta handling
vcs-svn: let deltas use data from preimage
vcs-svn: let deltas use data from postimage
vcs-svn: verify that deltas consume all inline data
vcs-svn: implement copyfrom_data delta instruction
vcs-svn: read instructions from deltas
vcs-svn: read inline data from deltas
vcs-svn: read the preimage when applying deltas
vcs-svn: parse svndiff0 window header
vcs-svn: skeleton of an svn delta parser
vcs-svn: make buffer_read_binary API more convenient
vcs-svn: learn to maintain a sliding view of a file
Makefile: list one vcs-svn/xdiff object or header per line
vcs-svn: avoid using ls command twice
...
Conflicts:
Makefile
contrib/svn-fe/svn-fe.txt
Diffstat (limited to 'vcs-svn')
-rw-r--r-- | vcs-svn/LICENSE | 3 | ||||
-rw-r--r-- | vcs-svn/fast_export.c | 253 | ||||
-rw-r--r-- | vcs-svn/fast_export.h | 26 | ||||
-rw-r--r-- | vcs-svn/line_buffer.c | 6 | ||||
-rw-r--r-- | vcs-svn/line_buffer.h | 2 | ||||
-rw-r--r-- | vcs-svn/obj_pool.h | 61 | ||||
-rw-r--r-- | vcs-svn/repo_tree.c | 330 | ||||
-rw-r--r-- | vcs-svn/repo_tree.h | 12 | ||||
-rw-r--r-- | vcs-svn/sliding_window.c | 79 | ||||
-rw-r--r-- | vcs-svn/sliding_window.h | 18 | ||||
-rw-r--r-- | vcs-svn/string_pool.c | 102 | ||||
-rw-r--r-- | vcs-svn/string_pool.h | 11 | ||||
-rw-r--r-- | vcs-svn/string_pool.txt | 43 | ||||
-rw-r--r-- | vcs-svn/svndiff.c | 308 | ||||
-rw-r--r-- | vcs-svn/svndiff.h | 10 | ||||
-rw-r--r-- | vcs-svn/svndump.c | 117 | ||||
-rw-r--r-- | vcs-svn/trp.h | 237 | ||||
-rw-r--r-- | vcs-svn/trp.txt | 109 |
18 files changed, 789 insertions, 938 deletions
diff --git a/vcs-svn/LICENSE b/vcs-svn/LICENSE index 0a5e3c43a..eb91858b8 100644 --- a/vcs-svn/LICENSE +++ b/vcs-svn/LICENSE @@ -1,8 +1,7 @@ Copyright (C) 2010 David Barr <david.barr@cordelta.com>. All rights reserved. -Copyright (C) 2008 Jason Evans <jasone@canonware.com>. -All rights reserved. +Copyright (C) 2010 Jonathan Nieder <jrnieder@gmail.com>. Copyright (C) 2005 Stefan Hegny, hydrografix Consulting GmbH, Frankfurt/Main, Germany diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 99ed70b88..19d7c34c2 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -4,34 +4,77 @@ */ #include "git-compat-util.h" +#include "strbuf.h" +#include "quote.h" #include "fast_export.h" -#include "line_buffer.h" #include "repo_tree.h" -#include "string_pool.h" +#include "strbuf.h" +#include "svndiff.h" +#include "sliding_window.h" +#include "line_buffer.h" #define MAX_GITSVN_LINE_LEN 4096 static uint32_t first_commit_done; +static struct line_buffer postimage = LINE_BUFFER_INIT; +static struct line_buffer report_buffer = LINE_BUFFER_INIT; + +/* NEEDSWORK: move to fast_export_init() */ +static int init_postimage(void) +{ + static int postimage_initialized; + if (postimage_initialized) + return 0; + postimage_initialized = 1; + return buffer_tmpfile_init(&postimage); +} + +void fast_export_init(int fd) +{ + first_commit_done = 0; + if (buffer_fdinit(&report_buffer, fd)) + die_errno("cannot read from file descriptor %d", fd); +} + +void fast_export_deinit(void) +{ + if (buffer_deinit(&report_buffer)) + die_errno("error closing fast-import feedback stream"); +} + +void fast_export_reset(void) +{ + buffer_reset(&report_buffer); +} -void fast_export_delete(uint32_t depth, uint32_t *path) +void fast_export_delete(const char *path) { putchar('D'); putchar(' '); - pool_print_seq(depth, path, '/', stdout); + quote_c_style(path, NULL, stdout, 0); putchar('\n'); } -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark) +static void fast_export_truncate(const char *path, uint32_t mode) +{ + fast_export_modify(path, mode, "inline"); + printf("data 0\n\n"); +} + +void fast_export_modify(const char *path, uint32_t mode, const char *dataref) { /* Mode must be 100644, 100755, 120000, or 160000. */ - printf("M %06"PRIo32" :%"PRIu32" ", mode, mark); - pool_print_seq(depth, path, '/', stdout); + if (!dataref) { + fast_export_truncate(path, mode); + return; + } + printf("M %06"PRIo32" %s ", mode, dataref); + quote_c_style(path, NULL, stdout, 0); putchar('\n'); } static char gitsvnline[MAX_GITSVN_LINE_LEN]; -void fast_export_commit(uint32_t revision, const char *author, +void fast_export_begin_commit(uint32_t revision, const char *author, const struct strbuf *log, const char *uuid, const char *url, unsigned long timestamp) @@ -47,6 +90,7 @@ void fast_export_commit(uint32_t revision, const char *author, *gitsvnline = '\0'; } printf("commit refs/heads/master\n"); + printf("mark :%"PRIu32"\n", revision); printf("committer %s <%s@%s> %ld +0000\n", *author ? author : "nobody", *author ? author : "nobody", @@ -57,15 +101,44 @@ void fast_export_commit(uint32_t revision, const char *author, printf("%s\n", gitsvnline); if (!first_commit_done) { if (revision > 1) - printf("from refs/heads/master^0\n"); + printf("from :%"PRIu32"\n", revision - 1); first_commit_done = 1; } - repo_diff(revision - 1, revision); - fputc('\n', stdout); +} +void fast_export_end_commit(uint32_t revision) +{ printf("progress Imported commit %"PRIu32".\n\n", revision); } +static void ls_from_rev(uint32_t rev, const char *path) +{ + /* ls :5 path/to/old/file */ + printf("ls :%"PRIu32" ", rev); + quote_c_style(path, NULL, stdout, 0); + putchar('\n'); + fflush(stdout); +} + +static void ls_from_active_commit(const char *path) +{ + /* ls "path/to/file" */ + printf("ls \""); + quote_c_style(path, NULL, stdout, 1); + printf("\"\n"); + fflush(stdout); +} + +static const char *get_response_line(void) +{ + const char *line = buffer_read_line(&report_buffer); + if (line) + return line; + if (buffer_ferror(&report_buffer)) + die_errno("error reading from fast-import"); + die("unexpected end of fast-import feedback"); +} + static void die_short_read(struct line_buffer *input) { if (buffer_ferror(input)) @@ -73,7 +146,88 @@ static void die_short_read(struct line_buffer *input) die("invalid dump: unexpected end of file"); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) +static int ends_with(const char *s, size_t len, const char *suffix) +{ + const size_t suffixlen = strlen(suffix); + if (len < suffixlen) + return 0; + return !memcmp(s + len - suffixlen, suffix, suffixlen); +} + +static int parse_cat_response_line(const char *header, off_t *len) +{ + size_t headerlen = strlen(header); + uintmax_t n; + const char *type; + const char *end; + + if (ends_with(header, headerlen, " missing")) + return error("cat-blob reports missing blob: %s", header); + type = memmem(header, headerlen, " blob ", strlen(" blob ")); + if (!type) + return error("cat-blob header has wrong object type: %s", header); + n = strtoumax(type + strlen(" blob "), (char **) &end, 10); + if (end == type + strlen(" blob ")) + return error("cat-blob header does not contain length: %s", header); + if (memchr(type + strlen(" blob "), '-', end - type - strlen(" blob "))) + return error("cat-blob header contains negative length: %s", header); + if (n == UINTMAX_MAX || n > maximum_signed_value_of_type(off_t)) + return error("blob too large for current definition of off_t"); + *len = n; + if (*end) + return error("cat-blob header contains garbage after length: %s", header); + return 0; +} + +static void check_preimage_overflow(off_t a, off_t b) +{ + if (signed_add_overflows(a, b)) + die("blob too large for current definition of off_t"); +} + +static long apply_delta(off_t len, struct line_buffer *input, + const char *old_data, uint32_t old_mode) +{ + long ret; + struct sliding_view preimage = SLIDING_VIEW_INIT(&report_buffer, 0); + FILE *out; + + if (init_postimage() || !(out = buffer_tmpfile_rewind(&postimage))) + die("cannot open temporary file for blob retrieval"); + if (old_data) { + const char *response; + printf("cat-blob %s\n", old_data); + fflush(stdout); + response = get_response_line(); + if (parse_cat_response_line(response, &preimage.max_off)) + die("invalid cat-blob response: %s", response); + check_preimage_overflow(preimage.max_off, 1); + } + if (old_mode == REPO_MODE_LNK) { + strbuf_addstr(&preimage.buf, "link "); + check_preimage_overflow(preimage.max_off, strlen("link ")); + preimage.max_off += strlen("link "); + check_preimage_overflow(preimage.max_off, 1); + } + if (svndiff0_apply(input, len, &preimage, out)) + die("cannot apply delta"); + if (old_data) { + /* Read the remainder of preimage and trailing newline. */ + assert(!signed_add_overflows(preimage.max_off, 1)); + preimage.max_off++; /* room for newline */ + if (move_window(&preimage, preimage.max_off - 1, 1)) + die("cannot seek to end of input"); + if (preimage.buf.buf[0] != '\n') + die("missing newline after cat-blob response"); + } + ret = buffer_tmpfile_prepare_to_read(&postimage); + if (ret < 0) + die("cannot read temporary file for blob retrieval"); + strbuf_release(&preimage.buf); + return ret; +} + +void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ @@ -81,8 +235,79 @@ void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_bu if (buffer_skip_bytes(input, 5) != 5) die_short_read(input); } - printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); + printf("data %"PRIu32"\n", len); if (buffer_copy_bytes(input, len) != len) die_short_read(input); fputc('\n', stdout); } + +static int parse_ls_response(const char *response, uint32_t *mode, + struct strbuf *dataref) +{ + const char *tab; + const char *response_end; + + assert(response); + response_end = response + strlen(response); + + if (*response == 'm') { /* Missing. */ + errno = ENOENT; + return -1; + } + + /* Mode. */ + if (response_end - response < strlen("100644") || + response[strlen("100644")] != ' ') + die("invalid ls response: missing mode: %s", response); + *mode = 0; + for (; *response != ' '; response++) { + char ch = *response; + if (ch < '0' || ch > '7') + die("invalid ls response: mode is not octal: %s", response); + *mode *= 8; + *mode += ch - '0'; + } + + /* ' blob ' or ' tree ' */ + if (response_end - response < strlen(" blob ") || + (response[1] != 'b' && response[1] != 't')) + die("unexpected ls response: not a tree or blob: %s", response); + response += strlen(" blob "); + + /* Dataref. */ + tab = memchr(response, '\t', response_end - response); + if (!tab) + die("invalid ls response: missing tab: %s", response); + strbuf_add(dataref, response, tab - response); + return 0; +} + +int fast_export_ls_rev(uint32_t rev, const char *path, + uint32_t *mode, struct strbuf *dataref) +{ + ls_from_rev(rev, path); + return parse_ls_response(get_response_line(), mode, dataref); +} + +int fast_export_ls(const char *path, uint32_t *mode, struct strbuf *dataref) +{ + ls_from_active_commit(path); + return parse_ls_response(get_response_line(), mode, dataref); +} + +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + uint32_t len, struct line_buffer *input) +{ + long postimage_len; + if (len > maximum_signed_value_of_type(off_t)) + die("enormous delta"); + postimage_len = apply_delta((off_t) len, input, old_data, old_mode); + if (mode == REPO_MODE_LNK) { + buffer_skip_bytes(&postimage, strlen("link ")); + postimage_len -= strlen("link "); + } + printf("data %ld\n", postimage_len); + buffer_copy_bytes(&postimage, postimage_len); + fputc('\n', stdout); +} diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 33a8fe996..43d05b65e 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,16 +1,28 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ -#include "line_buffer.h" struct strbuf; +struct line_buffer; -void fast_export_delete(uint32_t depth, uint32_t *path); -void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, - uint32_t mark); -void fast_export_commit(uint32_t revision, const char *author, +void fast_export_init(int fd); +void fast_export_deinit(void); +void fast_export_reset(void); + +void fast_export_delete(const char *path); +void fast_export_modify(const char *path, uint32_t mode, const char *dataref); +void fast_export_begin_commit(uint32_t revision, const char *author, const struct strbuf *log, const char *uuid, const char *url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, - struct line_buffer *input); +void fast_export_end_commit(uint32_t revision); +void fast_export_data(uint32_t mode, uint32_t len, struct line_buffer *input); +void fast_export_blob_delta(uint32_t mode, + uint32_t old_mode, const char *old_data, + uint32_t len, struct line_buffer *input); + +/* If there is no such file at that rev, returns -1, errno == ENOENT. */ +int fast_export_ls_rev(uint32_t rev, const char *path, + uint32_t *mode_out, struct strbuf *dataref_out); +int fast_export_ls(const char *path, + uint32_t *mode_out, struct strbuf *dataref_out); #endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index c39038723..01fcb842f 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -91,10 +91,10 @@ char *buffer_read_line(struct line_buffer *buf) return buf->line_buffer; } -void buffer_read_binary(struct line_buffer *buf, - struct strbuf *sb, uint32_t size) +size_t buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, size_t size) { - strbuf_fread(sb, size, buf->infile); + return strbuf_fread(sb, size, buf->infile); } off_t buffer_copy_bytes(struct line_buffer *buf, off_t nbytes) diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index d0b22dda7..8901f214b 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -23,7 +23,7 @@ long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); int buffer_ferror(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); int buffer_read_char(struct line_buffer *buf); -void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +size_t buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, size_t len); /* Returns number of bytes read (not necessarily written). */ off_t buffer_copy_bytes(struct line_buffer *buf, off_t len); off_t buffer_skip_bytes(struct line_buffer *buf, off_t len); diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h deleted file mode 100644 index deb6eb813..000000000 --- a/vcs-svn/obj_pool.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#ifndef OBJ_POOL_H_ -#define OBJ_POOL_H_ - -#include "git-compat-util.h" - -#define MAYBE_UNUSED __attribute__((__unused__)) - -#define obj_pool_gen(pre, obj_t, initial_capacity) \ -static struct { \ - uint32_t committed; \ - uint32_t size; \ - uint32_t capacity; \ - obj_t *base; \ -} pre##_pool = {0, 0, 0, NULL}; \ -static MAYBE_UNUSED uint32_t pre##_alloc(uint32_t count) \ -{ \ - uint32_t offset; \ - if (pre##_pool.size + count > pre##_pool.capacity) { \ - while (pre##_pool.size + count > pre##_pool.capacity) \ - if (pre##_pool.capacity) \ - pre##_pool.capacity *= 2; \ - else \ - pre##_pool.capacity = initial_capacity; \ - pre##_pool.base = realloc(pre##_pool.base, \ - pre##_pool.capacity * sizeof(obj_t)); \ - } \ - offset = pre##_pool.size; \ - pre##_pool.size += count; \ - return offset; \ -} \ -static MAYBE_UNUSED void pre##_free(uint32_t count) \ -{ \ - pre##_pool.size -= count; \ -} \ -static MAYBE_UNUSED uint32_t pre##_offset(obj_t *obj) \ -{ \ - return obj == NULL ? ~0 : obj - pre##_pool.base; \ -} \ -static MAYBE_UNUSED obj_t *pre##_pointer(uint32_t offset) \ -{ \ - return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \ -} \ -static MAYBE_UNUSED void pre##_commit(void) \ -{ \ - pre##_pool.committed = pre##_pool.size; \ -} \ -static MAYBE_UNUSED void pre##_reset(void) \ -{ \ - free(pre##_pool.base); \ - pre##_pool.base = NULL; \ - pre##_pool.size = 0; \ - pre##_pool.capacity = 0; \ - pre##_pool.committed = 0; \ -} - -#endif diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index c3f198d29..67d27f0b6 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -4,323 +4,45 @@ */ #include "git-compat-util.h" - -#include "string_pool.h" +#include "strbuf.h" #include "repo_tree.h" -#include "obj_pool.h" #include "fast_export.h" -#include "trp.h" - -struct repo_dirent { - uint32_t name_offset; - struct trp_node children; - uint32_t mode; - uint32_t content_offset; -}; - -struct repo_dir { - struct trp_root entries; -}; - -struct repo_commit { - uint32_t root_dir_offset; -}; - -/* Memory pools for commit, dir and dirent */ -obj_pool_gen(commit, struct repo_commit, 4096) -obj_pool_gen(dir, struct repo_dir, 4096) -obj_pool_gen(dent, struct repo_dirent, 4096) - -static uint32_t active_commit; -static uint32_t mark; - -static int repo_dirent_name_cmp(const void *a, const void *b); - -/* Treap for directory entries */ -trp_gen(static, dent_, struct repo_dirent, children, dent, repo_dirent_name_cmp) - -uint32_t next_blob_mark(void) +const char *repo_read_path(const char *path, uint32_t *mode_out) { - return mark++; -} + int err; + static struct strbuf buf = STRBUF_INIT; -static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit) -{ - return dir_pointer(commit->root_dir_offset); -} - -static struct repo_dirent *repo_first_dirent(struct repo_dir *dir) -{ - return dent_first(&dir->entries); -} - -static int repo_dirent_name_cmp(const void *a, const void *b) -{ - const struct repo_dirent *dent1 = a, *dent2 = b; - uint32_t a_offset = dent1->name_offset; - uint32_t b_offset = dent2->name_offset; - return (a_offset > b_offset) - (a_offset < b_offset); -} - -static int repo_dirent_is_dir(struct repo_dirent *dent) -{ - return dent != NULL && dent->mode == REPO_MODE_DIR; -} - -static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dent) -{ - if (!repo_dirent_is_dir(dent)) + strbuf_reset(&buf); + err = fast_export_ls(path, mode_out, &buf); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls error"); + /* Treat missing paths as directories. */ + *mode_out = REPO_MODE_DIR; return NULL; - return dir_pointer(dent->content_offset); -} - -static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir) -{ - uint32_t orig_o, new_o; - orig_o = dir_offset(orig_dir); - if (orig_o >= dir_pool.committed) - return orig_dir; - new_o = dir_alloc(1); - orig_dir = dir_pointer(orig_o); - *dir_pointer(new_o) = *orig_dir; - return dir_pointer(new_o); -} - -static struct repo_dirent *repo_read_dirent(uint32_t revision, - const uint32_t *path) -{ - uint32_t name = 0; - struct repo_dirent *key = dent_pointer(dent_alloc(1)); - struct repo_dir *dir = NULL; - struct repo_dirent *dent = NULL; - dir = repo_commit_root_dir(commit_pointer(revision)); - while (~(name = *path++)) { - key->name_offset = name; - dent = dent_search(&dir->entries, key); - if (dent == NULL || !repo_dirent_is_dir(dent)) - break; - dir = repo_dir_from_dirent(dent); } - dent_free(1); - return dent; + return buf.buf; } -static void repo_write_dirent(const uint32_t *path, uint32_t mode, - uint32_t content_offset, uint32_t del) +void repo_copy(uint32_t revision, const char *src, const char *dst) { - uint32_t name, revision, dir_o = ~0U, parent_dir_o = ~0U; - struct repo_dir *dir; - struct repo_dirent *key; - struct repo_dirent *dent = NULL; - revision = active_commit; - dir = repo_commit_root_dir(commit_pointer(revision)); - dir = repo_clone_dir(dir); - commit_pointer(revision)->root_dir_offset = dir_offset(dir); - while (~(name = *path++)) { - parent_dir_o = dir_offset(dir); - - key = dent_pointer(dent_alloc(1)); - key->name_offset = name; - - dent = dent_search(&dir->entries, key); - if (dent == NULL) - dent = key; - else - dent_free(1); - - if (dent == key) { - dent->mode = REPO_MODE_DIR; - dent->content_offset = 0; - dent = dent_insert(&dir->entries, dent); - } - - if (dent_offset(dent) < dent_pool.committed) { - dir_o = repo_dirent_is_dir(dent) ? - dent->content_offset : ~0; - dent_remove(&dir->entries, dent); - dent = dent_pointer(dent_alloc(1)); - dent->name_offset = name; - dent->mode = REPO_MODE_DIR; - dent->content_offset = dir_o; - dent = dent_insert(&dir->entries, dent); - } - - dir = repo_dir_from_dirent(dent); - dir = repo_clone_dir(dir); - dent->content_offset = dir_offset(dir); - } - if (dent == NULL) + int err; + uint32_t mode; + static struct strbuf data = STRBUF_INIT; + + strbuf_reset(&data); + err = fast_export_ls_rev(revision, src, &mode, &data); + if (err) { + if (errno != ENOENT) + die_errno("BUG: unexpected fast_export_ls_rev error"); + fast_export_delete(dst); return; - dent->mode = mode; - dent->content_offset = content_offset; - if (del && ~parent_dir_o) - dent_remove(&dir_pointer(parent_dir_o)->entries, dent); -} - -uint32_t repo_read_path(const uint32_t *path) -{ - uint32_t content_offset = 0; - struct repo_dirent *dent = repo_read_dirent(active_commit, path); - if (dent != NULL) - content_offset = dent->content_offset; - return content_offset; -} - -uint32_t repo_read_mode(const uint32_t *path) -{ - struct repo_dirent *dent = repo_read_dirent(active_commit, path); - if (dent == NULL) - die("invalid dump: path to be modified is missing"); - return dent->mode; -} - -void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst) -{ - uint32_t mode = 0, content_offset = 0; - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(revision, src); - if (src_dent != NULL) { - mode = src_dent->mode; - content_offset = src_dent->content_offset; - repo_write_dirent(dst, mode, content_offset, 0); - } -} - -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - repo_write_dirent(path, mode, blob_mark, 0); -} - -void repo_delete(uint32_t *path) -{ - repo_write_dirent(path, 0, 0, 1); -} - -static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir); - -static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dent) -{ - if (repo_dirent_is_dir(dent)) - repo_git_add_r(depth, path, repo_dir_from_dirent(dent)); - else - fast_export_modify(depth, path, - dent->mode, dent->content_offset); -} - -static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir) -{ - struct repo_dirent *de = repo_first_dirent(dir); - while (de) { - path[depth] = de->name_offset; - repo_git_add(depth + 1, path, de); - de = dent_next(&dir->entries, de); - } -} - -static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1, - struct repo_dir *dir2) -{ - struct repo_dirent *de1, *de2; - de1 = repo_first_dirent(dir1); - de2 = repo_first_dirent(dir2); - - while (de1 && de2) { - if (de1->name_offset < de2->name_offset) { - path[depth] = de1->name_offset; - fast_export_delete(depth + 1, path); - de1 = dent_next(&dir1->entries, de1); - continue; - } - if (de1->name_offset > de2->name_offset) { - path[depth] = de2->name_offset; - repo_git_add(depth + 1, path, de2); - de2 = dent_next(&dir2->entries, de2); - continue; - } - path[depth] = de1->name_offset; - - if (de1->mode == de2->mode && - de1->content_offset == de2->content_offset) { - ; /* No change. */ - } else if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) { - repo_diff_r(depth + 1, path, - repo_dir_from_dirent(de1), - repo_dir_from_dirent(de2)); - } else if (!repo_dirent_is_dir(de1) && !repo_dirent_is_dir(de2)) { - repo_git_add(depth + 1, path, de2); - } else { - fast_export_delete(depth + 1, path); - repo_git_add(depth + 1, path, de2); - } - de1 = dent_next(&dir1->entries, de1); - de2 = dent_next(&dir2->entries, de2); - } - while (de1) { - path[depth] = de1->name_offset; - fast_export_delete(depth + 1, path); - de1 = dent_next(&dir1->entries, de1); - } - while (de2) { - path[depth] = de2->name_offset; - repo_git_add(depth + 1, path, de2); - de2 = dent_next(&dir2->entries, de2); - } -} - -static uint32_t path_stack[REPO_MAX_PATH_DEPTH]; - -void repo_diff(uint32_t r1, uint32_t r2) -{ - repo_diff_r(0, - path_stack, - repo_commit_root_dir(commit_pointer(r1)), - repo_commit_root_dir(commit_pointer(r2))); -} - -void repo_commit(uint32_t revision, const char *author, - const struct strbuf *log, const char *uuid, const char *url, - unsigned long timestamp) -{ - fast_export_commit(revision, author, log, uuid, url, timestamp); - dent_commit(); - dir_commit(); - active_commit = commit_alloc(1); - commit_pointer(active_commit)->root_dir_offset = - commit_pointer(active_commit - 1)->root_dir_offset; -} - -static void mark_init(void) -{ - uint32_t i; - mark = 0; - for (i = 0; i < dent_pool.size; i++) - if (!repo_dirent_is_dir(dent_pointer(i)) && - dent_pointer(i)->content_offset > mark) - mark = dent_pointer(i)->content_offset; - mark++; -} - -void repo_init(void) -{ - mark_init(); - if (commit_pool.size == 0) { - /* Create empty tree for commit 0. */ - commit_alloc(1); - commit_pointer(0)->root_dir_offset = dir_alloc(1); - dir_pointer(0)->entries.trp_root = ~0; - dir_commit(); } - /* Preallocate next commit, ready for changes. */ - active_commit = commit_alloc(1); - commit_pointer(active_commit)->root_dir_offset = - commit_pointer(active_commit - 1)->root_dir_offset; + fast_export_modify(dst, mode, data.buf); } -void repo_reset(void) +void repo_delete(const char *path) { - pool_reset(); - commit_reset(); - dir_reset(); - dent_reset(); + fast_export_delete(path); } diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 37bde2e37..889c6a3c9 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -8,15 +8,11 @@ struct strbuf; #define REPO_MODE_EXE 0100755 #define REPO_MODE_LNK 0120000 -#define REPO_MAX_PATH_LEN 4096 -#define REPO_MAX_PATH_DEPTH 1000 - uint32_t next_blob_mark(void); -void repo_copy(uint32_t revision, const uint32_t *src, const uint32_t *dst); -void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_read_path(const uint32_t *path); -uint32_t repo_read_mode(const uint32_t *path); -void repo_delete(uint32_t *path); +void repo_copy(uint32_t revision, const char *src, const char *dst); +void repo_add(const char *path, uint32_t mode, uint32_t blob_mark); +const char *repo_read_path(const char *path, uint32_t *mode_out); +void repo_delete(const char *path); void repo_commit(uint32_t revision, const char *author, const struct strbuf *log, const char *uuid, const char *url, long unsigned timestamp); diff --git a/vcs-svn/sliding_window.c b/vcs-svn/sliding_window.c new file mode 100644 index 000000000..1bac7a4c7 --- /dev/null +++ b/vcs-svn/sliding_window.c @@ -0,0 +1,79 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "strbuf.h" + +static int input_error(struct line_buffer *file) +{ + if (!buffer_ferror(file)) + return error("delta preimage ends early"); + return error("cannot read delta preimage: %s", strerror(errno)); +} + +static int skip_or_whine(struct line_buffer *file, off_t gap) +{ + if (buffer_skip_bytes(file, gap) != gap) + return input_error(file); + return 0; +} + +static int read_to_fill_or_whine(struct line_buffer *file, + struct strbuf *buf, size_t width) +{ + buffer_read_binary(file, buf, width - buf->len); + if (buf->len != width) + return input_error(file); + return 0; +} + +static int check_overflow(off_t a, size_t b) +{ + if (b > maximum_signed_value_of_type(off_t)) + return error("unrepresentable length in delta: " + "%"PRIuMAX" > OFF_MAX", (uintmax_t) b); + if (signed_add_overflows(a, (off_t) b)) + return error("unrepresentable offset in delta: " + "%"PRIuMAX" + %"PRIuMAX" > OFF_MAX", + (uintmax_t) a, (uintmax_t) b); + return 0; +} + +int move_window(struct sliding_view *view, off_t off, size_t width) +{ + off_t file_offset; + assert(view); + assert(view->width <= view->buf.len); + assert(!check_overflow(view->off, view->buf.len)); + + if (check_overflow(off, width)) + return -1; + if (off < view->off || off + width < view->off + view->width) + return error("invalid delta: window slides left"); + if (view->max_off >= 0 && view->max_off < off + width) + return error("delta preimage ends early"); + + file_offset = view->off + view->buf.len; + if (off < file_offset) { + /* Move the overlapping region into place. */ + strbuf_remove(&view->buf, 0, off - view->off); + } else { + /* Seek ahead to skip the gap. */ + if (skip_or_whine(view->file, off - file_offset)) + return -1; + strbuf_setlen(&view->buf, 0); + } + + if (view->buf.len > width) + ; /* Already read. */ + else if (read_to_fill_or_whine(view->file, &view->buf, width)) + return -1; + + view->off = off; + view->width = width; + return 0; +} diff --git a/vcs-svn/sliding_window.h b/vcs-svn/sliding_window.h new file mode 100644 index 000000000..b43a825cb --- /dev/null +++ b/vcs-svn/sliding_window.h @@ -0,0 +1,18 @@ +#ifndef SLIDING_WINDOW_H_ +#define SLIDING_WINDOW_H_ + +#include "strbuf.h" + +struct sliding_view { + struct line_buffer *file; + off_t off; + size_t width; + off_t max_off; /* -1 means unlimited */ + struct strbuf buf; +}; + +#define SLIDING_VIEW_INIT(input, len) { (input), 0, 0, (len), STRBUF_INIT } + +extern int move_window(struct sliding_view *view, off_t off, size_t width); + +#endif diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c deleted file mode 100644 index 1b63b19a3..000000000 --- a/vcs-svn/string_pool.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#include "git-compat-util.h" -#include "trp.h" -#include "obj_pool.h" -#include "string_pool.h" - -static struct trp_root tree = { ~0U }; - -struct node { - uint32_t offset; - struct trp_node children; -}; - -/* Two memory pools: one for struct node, and another for strings */ -obj_pool_gen(node, struct node, 4096) -obj_pool_gen(string, char, 4096) - -static char *node_value(struct node *node) -{ - return node ? string_pointer(node->offset) : NULL; -} - -static int node_cmp(struct node *a, struct node *b) -{ - return strcmp(node_value(a), node_value(b)); -} - -/* Build a Treap from the node structure (a trp_node w/ offset) */ -trp_gen(static, tree_, struct node, children, node, node_cmp) - -const char *pool_fetch(uint32_t entry) -{ - return node_value(node_pointer(entry)); -} - -uint32_t pool_intern(const char *key) -{ - /* Canonicalize key */ - struct node *match = NULL, *node; - uint32_t key_len; - if (key == NULL) - return ~0; - key_len = strlen(key) + 1; - node = node_pointer(node_alloc(1)); - node->offset = string_alloc(key_len); - strcpy(node_value(node), key); - match = tree_search(&tree, node); - if (!match) { - tree_insert(&tree, node); - } else { - node_free(1); - string_free(key_len); - node = match; - } - return node_offset(node); -} - -uint32_t pool_tok_r(char *str, const char *delim, char **saveptr) -{ - char *token = strtok_r(str, delim, saveptr); - return token ? pool_intern(token) : ~0; -} - -void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream) -{ - uint32_t i; - for (i = 0; i < len && ~seq[i]; i++) { - fputs(pool_fetch(seq[i]), stream); - if (i < len - 1 && ~seq[i + 1]) - fputc(delim, stream); - } -} - -uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str) -{ - char *context = NULL; - uint32_t token = ~0U; - uint32_t length; - - if (sz == 0) - return ~0; - if (str) - token = pool_tok_r(str, delim, &context); - for (length = 0; length < sz; length++) { - seq[length] = token; - if (token == ~0) - return length; - token = pool_tok_r(NULL, delim, &context); - } - seq[sz - 1] = ~0; - return sz; -} - -void pool_reset(void) -{ - node_reset(); - string_reset(); -} diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h deleted file mode 100644 index 222fb66e6..000000000 --- a/vcs-svn/string_pool.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef STRING_POOL_H_ -#define STRING_POOL_H_ - -uint32_t pool_intern(const char *key); -const char *pool_fetch(uint32_t entry); -uint32_t pool_tok_r(char *str, const char *delim, char **saveptr); -void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream); -uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str); -void pool_reset(void); - -#endif diff --git a/vcs-svn/string_pool.txt b/vcs-svn/string_pool.txt deleted file mode 100644 index 1b41f1562..000000000 --- a/vcs-svn/string_pool.txt +++ /dev/null @@ -1,43 +0,0 @@ -string_pool API -=============== - -The string_pool API provides facilities for replacing strings -with integer keys that can be more easily compared and stored. -The facilities are designed so that one could teach Git without -too much trouble to store the information needed for these keys to -remain valid over multiple executions. - -Functions ---------- - -pool_intern:: - Include a string in the string pool and get its key. - If that string is already in the pool, retrieves its - existing key. - -pool_fetch:: - Retrieve the string associated to a given key. - -pool_tok_r:: - Extract the key of the next token from a string. - Interface mimics strtok_r. - -pool_print_seq:: - Print a sequence of strings named by key to a file, using the - specified delimiter to separate them. - - If NULL (key ~0) appears in the sequence, the sequence ends - early. - -pool_tok_seq:: - Split a string into tokens, storing the keys of segments - into a caller-provided array. - - Unless sz is 0, the array will always be ~0-terminated. - If there is not enough room for all the tokens, the - array holds as many tokens as fit in the entries before - the terminating ~0. Return value is the index after the - last token, or sz if the tokens did not fit. - -pool_reset:: - Deallocate storage for the string pool. diff --git a/vcs-svn/svndiff.c b/vcs-svn/svndiff.c new file mode 100644 index 000000000..9ee41bbc9 --- /dev/null +++ b/vcs-svn/svndiff.c @@ -0,0 +1,308 @@ +/* + * Licensed under a two-clause BSD-style license. + * See LICENSE for details. + */ + +#include "git-compat-util.h" +#include "sliding_window.h" +#include "line_buffer.h" +#include "svndiff.h" + +/* + * svndiff0 applier + * + * See http://svn.apache.org/repos/asf/subversion/trunk/notes/svndiff. + * + * svndiff0 ::= 'SVN\0' window* + * window ::= int int int int int instructions inline_data; + * instructions ::= instruction*; + * instruction ::= view_selector int int + * | copyfrom_data int + * | packed_view_selector int + * | packed_copyfrom_data + * ; + * view_selector ::= copyfrom_source + * | copyfrom_target + * ; + * copyfrom_source ::= # binary 00 000000; + * copyfrom_target ::= # binary 01 000000; + * copyfrom_data ::= # binary 10 000000; + * packed_view_selector ::= # view_selector OR-ed with 6 bit value; + * packed_copyfrom_data ::= # copyfrom_data OR-ed with 6 bit value; + * int ::= highdigit* lowdigit; + * highdigit ::= # binary 1000 0000 OR-ed with 7 bit value; + * lowdigit ::= # 7 bit value; + */ + +#define INSN_MASK 0xc0 +#define INSN_COPYFROM_SOURCE 0x00 +#define INSN_COPYFROM_TARGET 0x40 +#define INSN_COPYFROM_DATA 0x80 +#define OPERAND_MASK 0x3f + +#define VLI_CONTINUE 0x80 +#define VLI_DIGIT_MASK 0x7f +#define VLI_BITS_PER_DIGIT 7 + +struct window { + struct sliding_view *in; + struct strbuf out; + struct strbuf instructions; + struct strbuf data; +}; + +#define WINDOW_INIT(w) { (w), STRBUF_INIT, STRBUF_INIT, STRBUF_INIT } + +static void window_release(struct window *ctx) +{ + strbuf_release(&ctx->out); + strbuf_release(&ctx->instructions); + strbuf_release(&ctx->data); +} + +static int write_strbuf(struct strbuf *sb, FILE *out) +{ + if (fwrite(sb->buf, 1, sb->len, out) == sb->len) /* Success. */ + return 0; + return error("cannot write delta postimage: %s", strerror(errno)); +} + +static int error_short_read(struct line_buffer *input) +{ + if (buffer_ferror(input)) + return error("error reading delta: %s", strerror(errno)); + return error("invalid delta: unexpected end of file"); +} + +static int read_chunk(struct line_buffer *delta, off_t *delta_len, + struct strbuf *buf, size_t len) +{ + strbuf_reset(buf); + if (len > *delta_len || + buffer_read_binary(delta, buf, len) != len) + return error_short_read(delta); + *delta_len -= buf->len; + return 0; +} + +static int read_magic(struct line_buffer *in, off_t *len) +{ + static const char magic[] = {'S', 'V', 'N', '\0'}; + struct strbuf sb = STRBUF_INIT; + + if (read_chunk(in, len, &sb, sizeof(magic))) { + strbuf_release(&sb); + return -1; + } + if (memcmp(sb.buf, magic, sizeof(magic))) { + strbuf_release(&sb); + return error("invalid delta: unrecognized file type"); + } + strbuf_release(&sb); + return 0; +} + +static int read_int(struct line_buffer *in, uintmax_t *result, off_t *len) +{ + uintmax_t rv = 0; + off_t sz; + for (sz = *len; sz; sz--) { + const int ch = buffer_read_char(in); + if (ch == EOF) + break; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *len = sz - 1; + return 0; + } + return error_short_read(in); +} + +static int parse_int(const char **buf, size_t *result, const char *end) +{ + size_t rv = 0; + const char *pos; + for (pos = *buf; pos != end; pos++) { + unsigned char ch = *pos; + + rv <<= VLI_BITS_PER_DIGIT; + rv += (ch & VLI_DIGIT_MASK); + if (ch & VLI_CONTINUE) + continue; + + *result = rv; + *buf = pos + 1; + return 0; + } + return error("invalid delta: unexpected end of instructions section"); +} + +static int read_offset(struct line_buffer *in, off_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > maximum_signed_value_of_type(off_t)) + return error("unrepresentable offset in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int read_length(struct line_buffer *in, size_t *result, off_t *len) +{ + uintmax_t val; + if (read_int(in, &val, len)) + return -1; + if (val > SIZE_MAX) + return error("unrepresentable length in delta: %"PRIuMAX"", val); + *result = val; + return 0; +} + +static int copyfrom_source(struct window *ctx, const char **instructions, + size_t nbytes, const char *insns_end) +{ + size_t offset; + if (parse_int(instructions, &offset, insns_end)) + return -1; + if (unsigned_add_overflows(offset, nbytes) || + offset + nbytes > ctx->in->width) + return error("invalid delta: copies source data outside view"); + strbuf_add(&ctx->out, ctx->in->buf.buf + offset, nbytes); + return 0; +} + +static int copyfrom_target(struct window *ctx, const char **instructions, + size_t nbytes, const char *instructions_end) +{ + size_t offset; + if (parse_int(instructions, &offset, instructions_end)) + return -1; + if (offset >= ctx->out.len) + return error("invalid delta: copies from the future"); + for (; nbytes > 0; nbytes--) + strbuf_addch(&ctx->out, ctx->out.buf[offset++]); + return 0; +} + +static int copyfrom_data(struct window *ctx, size_t *data_pos, size_t nbytes) +{ + const size_t pos = *data_pos; + if (unsigned_add_overflows(pos, nbytes) || + pos + nbytes > ctx->data.len) + return error("invalid delta: copies unavailable inline data"); + strbuf_add(&ctx->out, ctx->data.buf + pos, nbytes); + *data_pos += nbytes; + return 0; +} + +static int parse_first_operand(const char **buf, size_t *out, const char *end) +{ + size_t result = (unsigned char) *(*buf)++ & OPERAND_MASK; + if (result) { /* immediate operand */ + *out = result; + return 0; + } + return parse_int(buf, out, end); +} + +static int execute_one_instruction(struct window *ctx, + const char **instructions, size_t *data_pos) +{ + unsigned int instruction; + const char *insns_end = ctx->instructions.buf + ctx->instructions.len; + size_t nbytes; + assert(ctx); + assert(instructions && *instructions); + assert(data_pos); + + instruction = (unsigned char) **instructions; + if (parse_first_operand(instructions, &nbytes, insns_end)) + return -1; + switch (instruction & INSN_MASK) { + case INSN_COPYFROM_SOURCE: + return copyfrom_source(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_TARGET: + return copyfrom_target(ctx, instructions, nbytes, insns_end); + case INSN_COPYFROM_DATA: + return copyfrom_data(ctx, data_pos, nbytes); + default: + return error("invalid delta: unrecognized instruction"); + } +} + +static int apply_window_in_core(struct window *ctx) +{ + const char *instructions; + size_t data_pos = 0; + + /* + * Fill ctx->out.buf using data from the source, target, + * and inline data views. + */ + for (instructions = ctx->instructions.buf; + instructions != ctx->instructions.buf + ctx->instructions.len; + ) + if (execute_one_instruction(ctx, &instructions, &data_pos)) + return -1; + if (data_pos != ctx->data.len) + return error("invalid delta: does not copy all inline data"); + return 0; +} + +static int apply_one_window(struct line_buffer *delta, off_t *delta_len, + struct sliding_view *preimage, FILE *out) +{ + struct window ctx = WINDOW_INIT(preimage); + size_t out_len; + size_t instructions_len; + size_t data_len; + assert(delta_len); + + /* "source view" offset and length already handled; */ + if (read_length(delta, &out_len, delta_len) || + read_length(delta, &instructions_len, delta_len) || + read_length(delta, &data_len, delta_len) || + read_chunk(delta, delta_len, &ctx.instructions, instructions_len) || + read_chunk(delta, delta_len, &ctx.data, data_len)) + goto error_out; + strbuf_grow(&ctx.out, out_len); + if (apply_window_in_core(&ctx)) + goto error_out; + if (ctx.out.len != out_len) { + error("invalid delta: incorrect postimage length"); + goto error_out; + } + if (write_strbuf(&ctx.out, out)) + goto error_out; + window_release(&ctx); + return 0; +error_out: + window_release(&ctx); + return -1; +} + +int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage) +{ + assert(delta && preimage && postimage); + + if (read_magic(delta, &delta_len)) + return -1; + while (delta_len) { /* For each window: */ + off_t pre_off; + size_t pre_len; + + if (read_offset(delta, &pre_off, &delta_len) || + read_length(delta, &pre_len, &delta_len) || + move_window(preimage, pre_off, pre_len) || + apply_one_window(delta, &delta_len, preimage, postimage)) + return -1; + } + return 0; +} diff --git a/vcs-svn/svndiff.h b/vcs-svn/svndiff.h new file mode 100644 index 000000000..74eb464ba --- /dev/null +++ b/vcs-svn/svndiff.h @@ -0,0 +1,10 @@ +#ifndef SVNDIFF_H_ +#define SVNDIFF_H_ + +struct line_buffer; +struct sliding_view; + +extern int svndiff0_apply(struct line_buffer *delta, off_t delta_len, + struct sliding_view *preimage, FILE *postimage); + +#endif diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index bc792223b..ca63760fe 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -11,7 +11,6 @@ #include "repo_tree.h" #include "fast_export.h" #include "line_buffer.h" -#include "string_pool.h" #include "strbuf.h" #include "svndump.h" @@ -21,15 +20,19 @@ */ #define constcmp(s, ref) memcmp(s, ref, sizeof(ref) - 1) +#define REPORT_FILENO 3 + #define NODEACT_REPLACE 4 #define NODEACT_DELETE 3 #define NODEACT_ADD 2 #define NODEACT_CHANGE 1 #define NODEACT_UNKNOWN 0 -#define DUMP_CTX 0 -#define REV_CTX 1 -#define NODE_CTX 2 +/* States: */ +#define DUMP_CTX 0 /* dump metadata */ +#define REV_CTX 1 /* revision metadata */ +#define NODE_CTX 2 /* node metadata */ +#define INTERNODE_CTX 3 /* between nodes */ #define LENGTH_UNKNOWN (~0) #define DATE_RFC2822_LEN 31 @@ -38,7 +41,7 @@ static struct line_buffer input = LINE_BUFFER_INIT; static struct { uint32_t action, propLength, textLength, srcRev, type; - uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + struct strbuf src, dst; uint32_t text_delta, prop_delta; } node_ctx; @@ -59,9 +62,11 @@ static void reset_node_ctx(char *fname) node_ctx.action = NODEACT_UNKNOWN; node_ctx.propLength = LENGTH_UNKNOWN; node_ctx.textLength = LENGTH_UNKNOWN; - node_ctx.src[0] = ~0; + strbuf_reset(&node_ctx.src); node_ctx.srcRev = 0; - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); + strbuf_reset(&node_ctx.dst); + if (fname) + strbuf_addstr(&node_ctx.dst, fname); node_ctx.text_delta = 0; node_ctx.prop_delta = 0; } @@ -202,28 +207,32 @@ static void read_props(void) static void handle_node(void) { - uint32_t mark = 0; const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; const int have_text = node_ctx.textLength != LENGTH_UNKNOWN; + /* + * Old text for this node: + * NULL - directory or bug + * empty_blob - empty + * "<dataref>" - data retrievable from fast-import + */ + static const char *const empty_blob = "::empty::"; + const char *old_data = NULL; + uint32_t old_mode = REPO_MODE_BLB; - if (node_ctx.text_delta) - die("text deltas not supported"); - if (have_text) - mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { if (have_text || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " "copyfrom info, text, or properties"); - repo_delete(node_ctx.dst); + repo_delete(node_ctx.dst.buf); return; } if (node_ctx.action == NODEACT_REPLACE) { - repo_delete(node_ctx.dst); + repo_delete(node_ctx.dst.buf); node_ctx.action = NODEACT_ADD; } if (node_ctx.srcRev) { - repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + repo_copy(node_ctx.srcRev, node_ctx.src.buf, node_ctx.dst.buf); if (node_ctx.action == NODEACT_ADD) node_ctx.action = NODEACT_CHANGE; } @@ -231,23 +240,27 @@ static void handle_node(void) die("invalid dump: directories cannot have text attached"); /* - * Decide on the new content (mark) and mode (node_ctx.type). + * Find old content (old_data) and decide on the new mode. */ - if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (node_ctx.action == NODEACT_CHANGE && !*node_ctx.dst.buf) { if (type != REPO_MODE_DIR) die("invalid dump: root of tree is not a regular file"); + old_data = NULL; } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode; - if (!have_text) - mark = repo_read_path(node_ctx.dst); - mode = repo_read_mode(node_ctx.dst); + old_data = repo_read_path(node_ctx.dst.buf, &mode); if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) die("invalid dump: cannot modify a directory into a file"); if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) die("invalid dump: cannot modify a file into a directory"); node_ctx.type = mode; + old_mode = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (!have_text && type != REPO_MODE_DIR) + if (type == REPO_MODE_DIR) + old_data = NULL; + else if (have_text) + old_data = empty_blob; + else die("invalid dump: adds node without text"); } else { die("invalid dump: Node-path block lacks Node-action"); @@ -266,18 +279,39 @@ static void handle_node(void) /* * Save the result. */ - repo_add(node_ctx.dst, node_ctx.type, mark); - if (have_text) - fast_export_blob(node_ctx.type, mark, - node_ctx.textLength, &input); + if (type == REPO_MODE_DIR) /* directories are not tracked. */ + return; + assert(old_data); + if (old_data == empty_blob) + /* For the fast_export_* functions, NULL means empty. */ + old_data = NULL; + if (!have_text) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, old_data); + return; + } + if (!node_ctx.text_delta) { + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_data(node_ctx.type, node_ctx.textLength, &input); + return; + } + fast_export_modify(node_ctx.dst.buf, node_ctx.type, "inline"); + fast_export_blob_delta(node_ctx.type, old_mode, old_data, + node_ctx.textLength, &input); +} + +static void begin_revision(void) +{ + if (!rev_ctx.revision) /* revision 0 gets no git commit. */ + return; + fast_export_begin_commit(rev_ctx.revision, rev_ctx.author.buf, + &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, + rev_ctx.timestamp); } -static void handle_revision(void) +static void end_revision(void) { if (rev_ctx.revision) - repo_commit(rev_ctx.revision, rev_ctx.author.buf, - &rev_ctx.log, dump_ctx.uuid.buf, dump_ctx.url.buf, - rev_ctx.timestamp); + fast_export_end_commit(rev_ctx.revision); } void svndump_read(const char *url) @@ -318,8 +352,10 @@ void svndump_read(const char *url) continue; if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); active_ctx = REV_CTX; reset_rev_ctx(atoi(val)); break; @@ -329,6 +365,8 @@ void svndump_read(const char *url) if (!constcmp(t + strlen("Node-"), "path")) { if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); active_ctx = NODE_CTX; reset_node_ctx(val); break; @@ -361,7 +399,8 @@ void svndump_read(const char *url) case sizeof("Node-copyfrom-path"): if (constcmp(t, "Node-copyfrom-path")) continue; - pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val); + strbuf_reset(&node_ctx.src); + strbuf_addstr(&node_ctx.src, val); break; case sizeof("Node-copyfrom-rev"): if (constcmp(t, "Node-copyfrom-rev")) @@ -399,7 +438,7 @@ void svndump_read(const char *url) read_props(); } else if (active_ctx == NODE_CTX) { handle_node(); - active_ctx = REV_CTX; + active_ctx = INTERNODE_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); if (buffer_skip_bytes(&input, len) != len) @@ -411,19 +450,23 @@ void svndump_read(const char *url) die_short_read(); if (active_ctx == NODE_CTX) handle_node(); + if (active_ctx == REV_CTX) + begin_revision(); if (active_ctx != DUMP_CTX) - handle_revision(); + end_revision(); } int svndump_init(const char *filename) { if (buffer_init(&input, filename)) return error("cannot open %s: %s", filename, strerror(errno)); - repo_init(); + fast_export_init(REPORT_FILENO); strbuf_init(&dump_ctx.uuid, 4096); strbuf_init(&dump_ctx.url, 4096); strbuf_init(&rev_ctx.log, 4096); strbuf_init(&rev_ctx.author, 4096); + strbuf_init(&node_ctx.src, 4096); + strbuf_init(&node_ctx.dst, 4096); reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); @@ -432,11 +475,13 @@ int svndump_init(const char *filename) void svndump_deinit(void) { - repo_reset(); + fast_export_deinit(); reset_dump_ctx(NULL); reset_rev_ctx(0); reset_node_ctx(NULL); strbuf_release(&rev_ctx.log); + strbuf_release(&node_ctx.src); + strbuf_release(&node_ctx.dst); if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) @@ -445,8 +490,8 @@ void svndump_deinit(void) void svndump_reset(void) { + fast_export_reset(); buffer_reset(&input); - repo_reset(); strbuf_release(&dump_ctx.uuid); strbuf_release(&dump_ctx.url); strbuf_release(&rev_ctx.log); diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h deleted file mode 100644 index c32b9184e..000000000 --- a/vcs-svn/trp.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * C macro implementation of treaps. - * - * Usage: - * #include <stdint.h> - * #include "trp.h" - * trp_gen(...) - * - * Licensed under a two-clause BSD-style license. - * See LICENSE for details. - */ - -#ifndef TRP_H_ -#define TRP_H_ - -#define MAYBE_UNUSED __attribute__((__unused__)) - -/* Node structure. */ -struct trp_node { - uint32_t trpn_left; - uint32_t trpn_right; -}; - -/* Root structure. */ -struct trp_root { - uint32_t trp_root; -}; - -/* Pointer/Offset conversion. */ -#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset)) -#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer)) -#define trpn_modify(a_base, a_offset) \ - do { \ - if ((a_offset) < a_base##_pool.committed) { \ - uint32_t old_offset = (a_offset);\ - (a_offset) = a_base##_alloc(1); \ - *trpn_pointer(a_base, a_offset) = \ - *trpn_pointer(a_base, old_offset); \ - } \ - } while (0) - -/* Left accessors. */ -#define trp_left_get(a_base, a_field, a_node) \ - (trpn_pointer(a_base, a_node)->a_field.trpn_left) -#define trp_left_set(a_base, a_field, a_node, a_left) \ - do { \ - trpn_modify(a_base, a_node); \ - trp_left_get(a_base, a_field, a_node) = (a_left); \ - } while (0) - -/* Right accessors. */ -#define trp_right_get(a_base, a_field, a_node) \ - (trpn_pointer(a_base, a_node)->a_field.trpn_right) -#define trp_right_set(a_base, a_field, a_node, a_right) \ - do { \ - trpn_modify(a_base, a_node); \ - trp_right_get(a_base, a_field, a_node) = (a_right); \ - } while (0) - -/* - * Fibonacci hash function. - * The multiplier is the nearest prime to (2^32 times (√5 - 1)/2). - * See Knuth §6.4: volume 3, 3rd ed, p518. - */ -#define trpn_hash(a_node) (uint32_t) (2654435761u * (a_node)) - -/* Priority accessors. */ -#define trp_prio_get(a_node) trpn_hash(a_node) - -/* Node initializer. */ -#define trp_node_new(a_base, a_field, a_node) \ - do { \ - trp_left_set(a_base, a_field, (a_node), ~0); \ - trp_right_set(a_base, a_field, (a_node), ~0); \ - } while (0) - -/* Internal utility macros. */ -#define trpn_first(a_base, a_field, a_root, r_node) \ - do { \ - (r_node) = (a_root); \ - if ((r_node) == ~0) \ - return NULL; \ - while (~trp_left_get(a_base, a_field, (r_node))) \ - (r_node) = trp_left_get(a_base, a_field, (r_node)); \ - } while (0) - -#define trpn_rotate_left(a_base, a_field, a_node, r_node) \ - do { \ - (r_node) = trp_right_get(a_base, a_field, (a_node)); \ - trp_right_set(a_base, a_field, (a_node), \ - trp_left_get(a_base, a_field, (r_node))); \ - trp_left_set(a_base, a_field, (r_node), (a_node)); \ - } while (0) - -#define trpn_rotate_right(a_base, a_field, a_node, r_node) \ - do { \ - (r_node) = trp_left_get(a_base, a_field, (a_node)); \ - trp_left_set(a_base, a_field, (a_node), \ - trp_right_get(a_base, a_field, (r_node))); \ - trp_right_set(a_base, a_field, (r_node), (a_node)); \ - } while (0) - -#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \ -a_attr a_type MAYBE_UNUSED *a_pre##first(struct trp_root *treap) \ -{ \ - uint32_t ret; \ - trpn_first(a_base, a_field, treap->trp_root, ret); \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##next(struct trp_root *treap, a_type *node) \ -{ \ - uint32_t ret; \ - uint32_t offset = trpn_offset(a_base, node); \ - if (~trp_right_get(a_base, a_field, offset)) { \ - trpn_first(a_base, a_field, \ - trp_right_get(a_base, a_field, offset), ret); \ - } else { \ - uint32_t tnode = treap->trp_root; \ - ret = ~0; \ - while (1) { \ - int cmp = (a_cmp)(trpn_pointer(a_base, offset), \ - trpn_pointer(a_base, tnode)); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = trp_left_get(a_base, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = trp_right_get(a_base, a_field, tnode); \ - } else { \ - break; \ - } \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##search(struct trp_root *treap, a_type *key) \ -{ \ - int cmp; \ - uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ - if (cmp < 0) { \ - ret = trp_left_get(a_base, a_field, ret); \ - } else { \ - ret = trp_right_get(a_base, a_field, ret); \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr a_type MAYBE_UNUSED *a_pre##nsearch(struct trp_root *treap, a_type *key) \ -{ \ - int cmp; \ - uint32_t ret = treap->trp_root; \ - while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base, ret)))) { \ - if (cmp < 0) { \ - if (!~trp_left_get(a_base, a_field, ret)) \ - break; \ - ret = trp_left_get(a_base, a_field, ret); \ - } else { \ - ret = trp_right_get(a_base, a_field, ret); \ - } \ - } \ - return trpn_pointer(a_base, ret); \ -} \ -a_attr uint32_t MAYBE_UNUSED a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \ -{ \ - if (cur_node == ~0) { \ - return ins_node; \ - } else { \ - uint32_t ret; \ - int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \ - trpn_pointer(a_base, cur_node)); \ - if (cmp < 0) { \ - uint32_t left = a_pre##insert_recurse( \ - trp_left_get(a_base, a_field, cur_node), ins_node); \ - trp_left_set(a_base, a_field, cur_node, left); \ - if (trp_prio_get(left) < trp_prio_get(cur_node)) \ - trpn_rotate_right(a_base, a_field, cur_node, ret); \ - else \ - ret = cur_node; \ - } else { \ - uint32_t right = a_pre##insert_recurse( \ - trp_right_get(a_base, a_field, cur_node), ins_node); \ - trp_right_set(a_base, a_field, cur_node, right); \ - if (trp_prio_get(right) < trp_prio_get(cur_node)) \ - trpn_rotate_left(a_base, a_field, cur_node, ret); \ - else \ - ret = cur_node; \ - } \ - return ret; \ - } \ -} \ -a_attr a_type *MAYBE_UNUSED a_pre##insert(struct trp_root *treap, a_type *node) \ -{ \ - uint32_t offset = trpn_offset(a_base, node); \ - trp_node_new(a_base, a_field, offset); \ - treap->trp_root = a_pre##insert_recurse(treap->trp_root, offset); \ - return trpn_pointer(a_base, offset); \ -} \ -a_attr uint32_t MAYBE_UNUSED a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \ -{ \ - int cmp = a_cmp(trpn_pointer(a_base, rem_node), \ - trpn_pointer(a_base, cur_node)); \ - if (cmp == 0) { \ - uint32_t ret; \ - uint32_t left = trp_left_get(a_base, a_field, cur_node); \ - uint32_t right = trp_right_get(a_base, a_field, cur_node); \ - if (left == ~0) { \ - if (right == ~0) \ - return ~0; \ - } else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \ - trpn_rotate_right(a_base, a_field, cur_node, ret); \ - right = a_pre##remove_recurse(cur_node, rem_node); \ - trp_right_set(a_base, a_field, ret, right); \ - return ret; \ - } \ - trpn_rotate_left(a_base, a_field, cur_node, ret); \ - left = a_pre##remove_recurse(cur_node, rem_node); \ - trp_left_set(a_base, a_field, ret, left); \ - return ret; \ - } else if (cmp < 0) { \ - uint32_t left = a_pre##remove_recurse( \ - trp_left_get(a_base, a_field, cur_node), rem_node); \ - trp_left_set(a_base, a_field, cur_node, left); \ - return cur_node; \ - } else { \ - uint32_t right = a_pre##remove_recurse( \ - trp_right_get(a_base, a_field, cur_node), rem_node); \ - trp_right_set(a_base, a_field, cur_node, right); \ - return cur_node; \ - } \ -} \ -a_attr void MAYBE_UNUSED a_pre##remove(struct trp_root *treap, a_type *node) \ -{ \ - treap->trp_root = a_pre##remove_recurse(treap->trp_root, \ - trpn_offset(a_base, node)); \ -} \ - -#endif diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt deleted file mode 100644 index 177ebca33..000000000 --- a/vcs-svn/trp.txt +++ /dev/null @@ -1,109 +0,0 @@ -Motivation -========== - -Treaps provide a memory-efficient binary search tree structure. -Insertion/deletion/search are about as about as fast in the average -case as red-black trees and the chances of worst-case behavior are -vanishingly small, thanks to (pseudo-)randomness. The bad worst-case -behavior is a small price to pay, given that treaps are much simpler -to implement. - -API -=== - -The trp API generates a data structure and functions to handle a -large growing set of objects stored in a pool. - -The caller: - -. Specifies parameters for the generated functions with the - trp_gen(static, foo_, ...) macro. - -. Allocates a `struct trp_root` variable and sets it to {~0}. - -. Adds new nodes to the set using `foo_insert`. Any pointers - to existing nodes cannot be relied upon any more, so the caller - might retrieve them anew with `foo_pointer`. - -. Can find a specific item in the set using `foo_search`. - -. Can iterate over items in the set using `foo_first` and `foo_next`. - -. Can remove an item from the set using `foo_remove`. - -Example: - ----- -struct ex_node { - const char *s; - struct trp_node ex_link; -}; -static struct trp_root ex_base = {~0}; -obj_pool_gen(ex, struct ex_node, 4096); -trp_gen(static, ex_, struct ex_node, ex_link, ex, strcmp) -struct ex_node *item; - -item = ex_pointer(ex_alloc(1)); -item->s = "hello"; -ex_insert(&ex_base, item); -item = ex_pointer(ex_alloc(1)); -item->s = "goodbye"; -ex_insert(&ex_base, item); -for (item = ex_first(&ex_base); item; item = ex_next(&ex_base, item)) - printf("%s\n", item->s); ----- - -Functions ---------- - -trp_gen(attr, foo_, node_type, link_field, pool, cmp):: - - Generate a type-specific treap implementation. -+ -. The storage class for generated functions will be 'attr' (e.g., `static`). -. Generated function names are prefixed with 'foo_' (e.g., `treap_`). -. Treap nodes will be of type 'node_type' (e.g., `struct treap_node`). - This type must be a struct with at least one `struct trp_node` field - to point to its children. -. The field used to access child nodes will be 'link_field'. -. All treap nodes must lie in the 'pool' object pool. -. Treap nodes must be totally ordered by the 'cmp' relation, with the - following prototype: -+ -int (*cmp)(node_type \*a, node_type \*b) -+ -and returning a value less than, equal to, or greater than zero -according to the result of comparison. - -node_type {asterisk}foo_insert(struct trp_root *treap, node_type \*node):: - - Insert node into treap. If inserted multiple times, - a node will appear in the treap multiple times. -+ -The return value is the address of the node within the treap, -which might differ from `node` if `pool_alloc` had to call -`realloc` to expand the pool. - -void foo_remove(struct trp_root *treap, node_type \*node):: - - Remove node from treap. Caller must ensure node is - present in treap before using this function. - -node_type *foo_search(struct trp_root \*treap, node_type \*key):: - - Search for a node that matches key. If no match is found, - result is NULL. - -node_type *foo_nsearch(struct trp_root \*treap, node_type \*key):: - - Like `foo_search`, but if the key is missing return what - would be key's successor, were key in treap (NULL if no - successor). - -node_type *foo_first(struct trp_root \*treap):: - - Find the first item from the treap, in sorted order. - -node_type *foo_next(struct trp_root \*treap, node_type \*node):: - - Find the next item. |