diff options
-rw-r--r-- | Documentation/git-fast-import.txt | 63 | ||||
-rw-r--r-- | contrib/svn-fe/svn-fe.c | 3 | ||||
-rw-r--r-- | fast-import.c | 162 | ||||
-rwxr-xr-x | t/t0080-vcs-svn.sh | 54 | ||||
-rwxr-xr-x | t/t0081-line-buffer.sh | 201 | ||||
-rwxr-xr-x | t/t9010-svn-fe.sh | 703 | ||||
-rwxr-xr-x | t/t9300-fast-import.sh | 92 | ||||
-rw-r--r-- | test-line-buffer.c | 90 | ||||
-rw-r--r-- | test-svn-fe.c | 3 | ||||
-rw-r--r-- | vcs-svn/fast_export.c | 6 | ||||
-rw-r--r-- | vcs-svn/fast_export.h | 5 | ||||
-rw-r--r-- | vcs-svn/line_buffer.c | 105 | ||||
-rw-r--r-- | vcs-svn/line_buffer.h | 33 | ||||
-rw-r--r-- | vcs-svn/line_buffer.txt | 36 | ||||
-rw-r--r-- | vcs-svn/repo_tree.c | 21 | ||||
-rw-r--r-- | vcs-svn/repo_tree.h | 3 | ||||
-rw-r--r-- | vcs-svn/svndump.c | 222 | ||||
-rw-r--r-- | vcs-svn/svndump.h | 2 |
18 files changed, 1571 insertions, 233 deletions
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index c3a2766b2..e1b7a0f9e 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -196,7 +196,8 @@ especially when a higher level language such as Perl, Python or Ruby is being used. fast-import is very strict about its input. Where we say SP below we mean -*exactly* one space. Likewise LF means one (and only one) linefeed. +*exactly* one space. Likewise LF means one (and only one) linefeed +and HT one (and only one) horizontal tab. Supplying additional whitespace characters will cause unexpected results, such as branch names or file names with leading or trailing spaces in their name, or early termination of fast-import when it encounters @@ -334,6 +335,11 @@ and control the current import process. More detailed discussion format to the file descriptor set with `--cat-blob-fd` or `stdout` if unspecified. +`ls`:: + Causes fast-import to print a line describing a directory + entry in 'ls-tree' format to the file descriptor set with + `--cat-blob-fd` or `stdout` if unspecified. + `feature`:: Require that fast-import supports the specified feature, or abort if it does not. @@ -919,6 +925,55 @@ This command can be used anywhere in the stream that comments are accepted. In particular, the `cat-blob` command can be used in the middle of a commit but not in the middle of a `data` command. +`ls` +~~~~ +Prints information about the object at a path to a file descriptor +previously arranged with the `--cat-blob-fd` argument. This allows +printing a blob from the active commit (with `cat-blob`) or copying a +blob or tree from a previous commit for use in the current one (with +`filemodify`). + +The `ls` command can be used anywhere in the stream that comments are +accepted, including the middle of a commit. + +Reading from the active commit:: + This form can only be used in the middle of a `commit`. + The path names a directory entry within fast-import's + active commit. The path must be quoted in this case. ++ +.... + 'ls' SP <path> LF +.... + +Reading from a named tree:: + The `<dataref>` can be a mark reference (`:<idnum>`) or the + full 40-byte SHA-1 of a Git tag, commit, or tree object, + preexisting or waiting to be written. + The path is relative to the top level of the tree + named by `<dataref>`. ++ +.... + 'ls' SP <dataref> SP <path> LF +.... + +See `filemodify` above for a detailed description of `<path>`. + +Output uses the same format as `git ls-tree <tree> {litdd} <path>`: + +==== + <mode> SP ('blob' | 'tree' | 'commit') SP <dataref> HT <path> LF +==== + +The <dataref> represents the blob, tree, or commit object at <path> +and can be used in later 'cat-blob', 'filemodify', or 'ls' commands. + +If there is no file or subtree at that path, 'git fast-import' will +instead report + +==== + missing SP <path> LF +==== + `feature` ~~~~~~~~~ Require that fast-import supports the specified feature, or abort if @@ -946,8 +1001,10 @@ import-marks:: any "feature import-marks" command in the stream. cat-blob:: - Ignored. Versions of fast-import not supporting the - "cat-blob" command will exit with a message indicating so. +ls:: + Require that the backend support the 'cat-blob' or 'ls' command. + Versions of fast-import not supporting the specified command + will exit with a message indicating so. This lets the import error out early with a clear message, rather than wasting time on the early part of an import before the unsupported command is detected. diff --git a/contrib/svn-fe/svn-fe.c b/contrib/svn-fe/svn-fe.c index a2677b03e..35db24f5e 100644 --- a/contrib/svn-fe/svn-fe.c +++ b/contrib/svn-fe/svn-fe.c @@ -8,7 +8,8 @@ int main(int argc, char **argv) { - svndump_init(NULL); + if (svndump_init(NULL)) + return 1; svndump_read((argc > 1) ? argv[1] : NULL); svndump_deinit(); svndump_reset(); diff --git a/fast-import.c b/fast-import.c index 3886a1b46..e1268b8cb 100644 --- a/fast-import.c +++ b/fast-import.c @@ -24,10 +24,12 @@ Format of STDIN stream: commit_msg ('from' sp committish lf)? ('merge' sp committish lf)* - file_change* + (file_change | ls)* lf?; commit_msg ::= data; + ls ::= 'ls' sp '"' quoted(path) '"' lf; + file_change ::= file_clr | file_del | file_rnm @@ -132,7 +134,7 @@ Format of STDIN stream: ts ::= # time since the epoch in seconds, ascii base10 notation; tz ::= # GIT style timezone; - # note: comments and cat requests may appear anywhere + # note: comments, ls and cat requests may appear anywhere # in the input, except within a data command. Any form # of the data command always escapes the related input # from comment processing. @@ -141,7 +143,9 @@ Format of STDIN stream: # must be the first character on that line (an lf # preceded it). # + cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; + ls_tree ::= 'ls' sp (hexsha1 | idnum) sp path_str lf; comment ::= '#' not_lf* lf; not_lf ::= # Any byte that is not ASCII newline (LF); @@ -374,6 +378,7 @@ static int cat_blob_fd = STDOUT_FILENO; static void parse_argv(void); static void parse_cat_blob(void); +static void parse_ls(struct branch *b); static void write_branch_report(FILE *rpt, struct branch *b) { @@ -2614,6 +2619,8 @@ static void parse_new_commit(void) note_change_n(b, prev_fanout); else if (!strcmp("deleteall", command_buf.buf)) file_change_deleteall(b); + else if (!prefixcmp(command_buf.buf, "ls ")) + parse_ls(b); else { unread_command_buf = 1; break; @@ -2837,6 +2844,153 @@ static void parse_cat_blob(void) cat_blob(oe, sha1); } +static struct object_entry *dereference(struct object_entry *oe, + unsigned char sha1[20]) +{ + unsigned long size; + char *buf = NULL; + if (!oe) { + enum object_type type = sha1_object_info(sha1, NULL); + if (type < 0) + die("object not found: %s", sha1_to_hex(sha1)); + /* cache it! */ + oe = insert_object(sha1); + oe->type = type; + oe->pack_id = MAX_PACK_ID; + oe->idx.offset = 1; + } + switch (oe->type) { + case OBJ_TREE: /* easy case. */ + return oe; + case OBJ_COMMIT: + case OBJ_TAG: + break; + default: + die("Not a treeish: %s", command_buf.buf); + } + + if (oe->pack_id != MAX_PACK_ID) { /* in a pack being written */ + buf = gfi_unpack_entry(oe, &size); + } else { + enum object_type unused; + buf = read_sha1_file(sha1, &unused, &size); + } + if (!buf) + die("Can't load object %s", sha1_to_hex(sha1)); + + /* Peel one layer. */ + switch (oe->type) { + case OBJ_TAG: + if (size < 40 + strlen("object ") || + get_sha1_hex(buf + strlen("object "), sha1)) + die("Invalid SHA1 in tag: %s", command_buf.buf); + break; + case OBJ_COMMIT: + if (size < 40 + strlen("tree ") || + get_sha1_hex(buf + strlen("tree "), sha1)) + die("Invalid SHA1 in commit: %s", command_buf.buf); + } + + free(buf); + return find_object(sha1); +} + +static struct object_entry *parse_treeish_dataref(const char **p) +{ + unsigned char sha1[20]; + struct object_entry *e; + + if (**p == ':') { /* <mark> */ + char *endptr; + e = find_mark(strtoumax(*p + 1, &endptr, 10)); + if (endptr == *p + 1) + die("Invalid mark: %s", command_buf.buf); + if (!e) + die("Unknown mark: %s", command_buf.buf); + *p = endptr; + hashcpy(sha1, e->idx.sha1); + } else { /* <sha1> */ + if (get_sha1_hex(*p, sha1)) + die("Invalid SHA1: %s", command_buf.buf); + e = find_object(sha1); + *p += 40; + } + + while (!e || e->type != OBJ_TREE) + e = dereference(e, sha1); + return e; +} + +static void print_ls(int mode, const unsigned char *sha1, const char *path) +{ + static struct strbuf line = STRBUF_INIT; + + /* See show_tree(). */ + const char *type = + S_ISGITLINK(mode) ? commit_type : + S_ISDIR(mode) ? tree_type : + blob_type; + + if (!mode) { + /* missing SP path LF */ + strbuf_reset(&line); + strbuf_addstr(&line, "missing "); + quote_c_style(path, &line, NULL, 0); + strbuf_addch(&line, '\n'); + } else { + /* mode SP type SP object_name TAB path LF */ + strbuf_reset(&line); + strbuf_addf(&line, "%06o %s %s\t", + mode, type, sha1_to_hex(sha1)); + quote_c_style(path, &line, NULL, 0); + strbuf_addch(&line, '\n'); + } + cat_blob_write(line.buf, line.len); +} + +static void parse_ls(struct branch *b) +{ + const char *p; + struct tree_entry *root = NULL; + struct tree_entry leaf = {0}; + + /* ls SP (<treeish> SP)? <path> */ + p = command_buf.buf + strlen("ls "); + if (*p == '"') { + if (!b) + die("Not in a commit: %s", command_buf.buf); + root = &b->branch_tree; + } else { + struct object_entry *e = parse_treeish_dataref(&p); + root = new_tree_entry(); + hashcpy(root->versions[1].sha1, e->idx.sha1); + load_tree(root); + if (*p++ != ' ') + die("Missing space after tree-ish: %s", command_buf.buf); + } + if (*p == '"') { + static struct strbuf uq = STRBUF_INIT; + const char *endp; + strbuf_reset(&uq); + if (unquote_c_style(&uq, p, &endp)) + die("Invalid path: %s", command_buf.buf); + if (*endp) + die("Garbage after path in: %s", command_buf.buf); + p = uq.buf; + } + tree_content_get(root, p, &leaf); + /* + * A directory in preparation would have a sha1 of zero + * until it is saved. Save, for simplicity. + */ + if (S_ISDIR(leaf.versions[1].mode)) + store_tree(&leaf); + + print_ls(leaf.versions[1].mode, leaf.versions[1].sha1, p); + if (!b || root != &b->branch_tree) + release_tree_entry(root); +} + static void checkpoint(void) { checkpoint_requested = 0; @@ -3001,7 +3155,7 @@ static int parse_one_feature(const char *feature, int from_stream) relative_marks_paths = 0; } else if (!prefixcmp(feature, "force")) { force_update = 1; - } else if (!strcmp(feature, "notes")) { + } else if (!strcmp(feature, "notes") || !strcmp(feature, "ls")) { ; /* do nothing; we have the feature */ } else { return 0; @@ -3142,6 +3296,8 @@ int main(int argc, const char **argv) while (read_next_command() != EOF) { if (!strcmp("blob", command_buf.buf)) parse_new_blob(); + else if (!prefixcmp(command_buf.buf, "ls ")) + parse_ls(NULL); else if (!prefixcmp(command_buf.buf, "commit ")) parse_new_commit(); else if (!prefixcmp(command_buf.buf, "tag ")) diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh index d3225ada6..99a314b08 100755 --- a/t/t0080-vcs-svn.sh +++ b/t/t0080-vcs-svn.sh @@ -76,60 +76,6 @@ test_expect_success 'obj pool: high-water mark' ' test_cmp expected actual ' -test_expect_success 'line buffer' ' - echo HELLO >expected1 && - printf "%s\n" "" HELLO >expected2 && - echo >expected3 && - printf "%s\n" "" Q | q_to_nul >expected4 && - printf "%s\n" foo "" >expected5 && - printf "%s\n" "" foo >expected6 && - - test-line-buffer <<-\EOF >actual1 && - 5 - HELLO - EOF - - test-line-buffer <<-\EOF >actual2 && - 0 - - 5 - HELLO - EOF - - q_to_nul <<-\EOF | - 1 - Q - EOF - test-line-buffer >actual3 && - - q_to_nul <<-\EOF | - 0 - - 1 - Q - EOF - test-line-buffer >actual4 && - - test-line-buffer <<-\EOF >actual5 && - 5 - foo - EOF - - test-line-buffer <<-\EOF >actual6 && - 0 - - 5 - foo - EOF - - test_cmp expected1 actual1 && - test_cmp expected2 actual2 && - test_cmp expected3 actual3 && - test_cmp expected4 actual4 && - test_cmp expected5 actual5 && - test_cmp expected6 actual6 -' - test_expect_success 'string pool' ' echo a does not equal b >expected.differ && echo a equals a >expected.match && diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh new file mode 100755 index 000000000..550fad082 --- /dev/null +++ b/t/t0081-line-buffer.sh @@ -0,0 +1,201 @@ +#!/bin/sh + +test_description="Test the svn importer's input handling routines. + +These tests exercise the line_buffer library, but their real purpose +is to check the assumptions that library makes of the platform's input +routines. Processes engaged in bi-directional communication would +hang if fread or fgets is too greedy. + +While at it, check that input of newlines and null bytes are handled +correctly. +" +. ./test-lib.sh + +test -n "$GIT_REMOTE_SVN_TEST_BIG_FILES" && test_set_prereq EXPENSIVE + +generate_tens_of_lines () { + tens=$1 && + line=$2 && + + i=0 && + while test $i -lt "$tens" + do + for j in a b c d e f g h i j + do + echo "$line" + done && + : $((i = $i + 1)) || + return + done +} + +long_read_test () { + : each line is 10 bytes, including newline && + line=abcdefghi && + echo "$line" >expect && + + if ! test_declared_prereq PIPE + then + echo >&4 "long_read_test: need to declare PIPE prerequisite" + return 127 + fi && + tens_of_lines=$(($1 / 100 + 1)) && + lines=$(($tens_of_lines * 10)) && + readsize=$((($lines - 1) * 10 + 3)) && + copysize=7 && + rm -f input && + mkfifo input && + { + { + generate_tens_of_lines $tens_of_lines "$line" && + sleep 100 + } >input & + } && + test-line-buffer input <<-EOF >output && + read $readsize + copy $copysize + EOF + kill $! && + test_line_count = $lines output && + tail -n 1 <output >actual && + test_cmp expect actual +} + +test_expect_success 'setup: have pipes?' ' + rm -f frob && + if mkfifo frob + then + test_set_prereq PIPE + fi +' + +test_expect_success 'hello world' ' + echo HELLO >expect && + test-line-buffer <<-\EOF >actual && + read 6 + HELLO + EOF + test_cmp expect actual +' + +test_expect_success PIPE '0-length read, no input available' ' + >expect && + rm -f input && + mkfifo input && + { + sleep 100 >input & + } && + test-line-buffer input <<-\EOF >actual && + read 0 + copy 0 + EOF + kill $! && + test_cmp expect actual +' + +test_expect_success '0-length read, send along greeting' ' + echo HELLO >expect && + test-line-buffer <<-\EOF >actual && + read 0 + copy 6 + HELLO + EOF + test_cmp expect actual +' + +test_expect_success PIPE '1-byte read, no input available' ' + printf "%s" ab >expect && + rm -f input && + mkfifo input && + { + { + printf "%s" a && + printf "%s" b && + sleep 100 + } >input & + } && + test-line-buffer input <<-\EOF >actual && + read 1 + copy 1 + EOF + kill $! && + test_cmp expect actual +' + +test_expect_success PIPE 'long read (around 8192 bytes)' ' + long_read_test 8192 +' + +test_expect_success PIPE,EXPENSIVE 'longer read (around 65536 bytes)' ' + long_read_test 65536 +' + +test_expect_success 'read from file descriptor' ' + rm -f input && + echo hello >expect && + echo hello >input && + echo copy 6 | + test-line-buffer "&4" 4<input >actual && + test_cmp expect actual +' + +test_expect_success 'buffer_read_string copes with null byte' ' + >expect && + q_to_nul <<-\EOF | test-line-buffer >actual && + read 2 + Q + EOF + test_cmp expect actual +' + +test_expect_success 'skip, copy null byte' ' + echo Q | q_to_nul >expect && + q_to_nul <<-\EOF | test-line-buffer >actual && + skip 2 + Q + copy 2 + Q + EOF + test_cmp expect actual +' + +test_expect_success 'read null byte' ' + echo ">QhelloQ" | q_to_nul >expect && + q_to_nul <<-\EOF | test-line-buffer >actual && + binary 8 + QhelloQ + EOF + test_cmp expect actual +' + +test_expect_success 'long reads are truncated' ' + echo foo >expect && + test-line-buffer <<-\EOF >actual && + read 5 + foo + EOF + test_cmp expect actual +' + +test_expect_success 'long copies are truncated' ' + printf "%s\n" "" foo >expect && + test-line-buffer <<-\EOF >actual && + read 1 + + copy 5 + foo + EOF + test_cmp expect actual +' + +test_expect_success 'long binary reads are truncated' ' + echo ">foo" >expect && + test-line-buffer <<-\EOF >actual && + binary 5 + foo + EOF + test_cmp expect actual +' + +test_done diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index 88a9751dd..5a6a4b9b7 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -9,6 +9,30 @@ reinit_git () { git init } +properties () { + while test "$#" -ne 0 + do + property="$1" && + value="$2" && + printf "%s\n" "K ${#property}" && + printf "%s\n" "$property" && + printf "%s\n" "V ${#value}" && + printf "%s\n" "$value" && + shift 2 || + return 1 + done +} + +text_no_props () { + text="$1 +" && + printf "%s\n" "Prop-content-length: 10" && + printf "%s\n" "Text-content-length: ${#text}" && + printf "%s\n" "Content-length: $((${#text} + 10))" && + printf "%s\n" "" "PROPS-END" && + printf "%s\n" "$text" +} + >empty test_expect_success 'empty dump' ' @@ -18,13 +42,686 @@ test_expect_success 'empty dump' ' git fast-import <stream ' -test_expect_success 'v3 dumps not supported' ' +test_expect_success 'v4 dumps not supported' ' reinit_git && - echo "SVN-fs-dump-format-version: 3" >input && - test_must_fail test-svn-fe input >stream && + echo "SVN-fs-dump-format-version: 4" >v4.dump && + test_must_fail test-svn-fe v4.dump >stream && test_cmp empty stream ' +test_expect_failure 'empty revision' ' + reinit_git && + printf "rev <nobody, nobody@local>: %s\n" "" "" >expect && + cat >emptyrev.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 0 + Content-length: 0 + + Revision-number: 2 + Prop-content-length: 0 + Content-length: 0 + + EOF + test-svn-fe emptyrev.dump >stream && + git fast-import <stream && + git log -p --format="rev <%an, %ae>: %s" HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'empty properties' ' + reinit_git && + printf "rev <nobody, nobody@local>: %s\n" "" "" >expect && + cat >emptyprop.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test-svn-fe emptyprop.dump >stream && + git fast-import <stream && + git log -p --format="rev <%an, %ae>: %s" HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'author name and commit message' ' + reinit_git && + echo "<author@example.com, author@example.com@local>" >expect.author && + cat >message <<-\EOF && + A concise summary of the change + + A detailed description of the change, why it is needed, what + was broken and why applying this is the best course of action. + + * file.c + Details pertaining to an individual file. + EOF + { + properties \ + svn:author author@example.com \ + svn:log "$(cat message)" && + echo PROPS-END + } >props && + { + echo "SVN-fs-dump-format-version: 3" && + echo && + echo "Revision-number: 1" && + echo Prop-content-length: $(wc -c <props) && + echo Content-length: $(wc -c <props) && + echo && + cat props + } >log.dump && + test-svn-fe log.dump >stream && + git fast-import <stream && + git log -p --format="%B" HEAD >actual.log && + git log --format="<%an, %ae>" >actual.author && + test_cmp message actual.log && + test_cmp expect.author actual.author +' + +test_expect_success 'unsupported properties are ignored' ' + reinit_git && + echo author >expect && + cat >extraprop.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 56 + Content-length: 56 + + K 8 + nonsense + V 1 + y + K 10 + svn:author + V 6 + author + PROPS-END + EOF + test-svn-fe extraprop.dump >stream && + git fast-import <stream && + git log -p --format=%an HEAD >actual && + test_cmp expect actual +' + +test_expect_failure 'timestamp and empty file' ' + echo author@example.com >expect.author && + echo 1999-01-01 >expect.date && + echo file >expect.files && + reinit_git && + { + properties \ + svn:author author@example.com \ + svn:date "1999-01-01T00:01:002.000000Z" \ + svn:log "add empty file" && + echo PROPS-END + } >props && + { + cat <<-EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + EOF + echo Prop-content-length: $(wc -c <props) && + echo Content-length: $(wc -c <props) && + echo && + cat props && + cat <<-\EOF + + Node-path: empty-file + Node-kind: file + Node-action: add + Content-length: 0 + + EOF + } >emptyfile.dump && + test-svn-fe emptyfile.dump >stream && + git fast-import <stream && + git log --format=%an HEAD >actual.author && + git log --date=short --format=%ad HEAD >actual.date && + git ls-tree -r --name-only HEAD >actual.files && + test_cmp expect.author actual.author && + test_cmp expect.date actual.date && + test_cmp expect.files actual.files && + git checkout HEAD empty-file && + test_cmp empty file +' + +test_expect_success 'directory with files' ' + reinit_git && + printf "%s\n" directory/file1 directory/file2 >expect.files && + echo hi >hi && + echo hello >hello && + { + properties \ + svn:author author@example.com \ + svn:date "1999-02-01T00:01:002.000000Z" \ + svn:log "add directory with some files in it" && + echo PROPS-END + } >props && + { + cat <<-EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + EOF + echo Prop-content-length: $(wc -c <props) && + echo Content-length: $(wc -c <props) && + echo && + cat props && + cat <<-\EOF && + + Node-path: directory + Node-kind: dir + Node-action: add + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: directory/file1 + Node-kind: file + Node-action: add + EOF + text_no_props hello && + cat <<-\EOF && + Node-path: directory/file2 + Node-kind: file + Node-action: add + EOF + text_no_props hi + } >directory.dump && + test-svn-fe directory.dump >stream && + git fast-import <stream && + + git ls-tree -r --name-only HEAD >actual.files && + git checkout HEAD directory && + test_cmp expect.files actual.files && + test_cmp hello directory/file1 && + test_cmp hi directory/file2 +' + +test_expect_success 'node without action' ' + cat >inaction.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: directory + Node-kind: dir + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test_must_fail test-svn-fe inaction.dump +' + +test_expect_success 'action: add node without text' ' + cat >textless.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: textless + Node-kind: file + Node-action: add + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test_must_fail test-svn-fe textless.dump +' + +test_expect_failure 'change file mode but keep old content' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T greeting + OBJID + :100644 120000 OBJID OBJID T greeting + OBJID + :000000 100644 OBJID OBJID A greeting + EOF + echo "link hello" >expect.blob && + echo hello >hello && + cat >filemode.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: add + Prop-content-length: 10 + Text-content-length: 11 + Content-length: 21 + + PROPS-END + link hello + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 33 + Content-length: 33 + + K 11 + svn:special + V 1 + * + PROPS-END + + Revision-number: 3 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test-svn-fe filemode.dump >stream && + git fast-import <stream && + { + git rev-list HEAD | + git diff-tree --root --stdin | + sed "s/$_x40/OBJID/g" + } >actual && + git show HEAD:greeting >actual.blob && + git show HEAD^:greeting >actual.target && + test_cmp expect actual && + test_cmp expect.blob actual.blob && + test_cmp hello actual.target +' + +test_expect_success 'change file mode and reiterate content' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T greeting + OBJID + :100644 120000 OBJID OBJID T greeting + OBJID + :000000 100644 OBJID OBJID A greeting + EOF + echo "link hello" >expect.blob && + echo hello >hello && + cat >filemode.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: add + Prop-content-length: 10 + Text-content-length: 11 + Content-length: 21 + + PROPS-END + link hello + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 33 + Text-content-length: 11 + Content-length: 44 + + K 11 + svn:special + V 1 + * + PROPS-END + link hello + + Revision-number: 3 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 10 + Text-content-length: 11 + Content-length: 21 + + PROPS-END + link hello + EOF + test-svn-fe filemode.dump >stream && + git fast-import <stream && + { + git rev-list HEAD | + git diff-tree --root --stdin | + sed "s/$_x40/OBJID/g" + } >actual && + git show HEAD:greeting >actual.blob && + git show HEAD^:greeting >actual.target && + test_cmp expect actual && + test_cmp expect.blob actual.blob && + test_cmp hello actual.target +' + +test_expect_success 'deltas not supported' ' + { + # (old) h + (inline) ello + (old) \n + printf "SVNQ%b%b%s" "Q\003\006\005\004" "\001Q\0204\001\002" "ello" | + q_to_nul + } >delta && + { + properties \ + svn:author author@example.com \ + svn:date "1999-01-05T00:01:002.000000Z" \ + svn:log "add greeting" && + echo PROPS-END + } >props && + { + properties \ + svn:author author@example.com \ + svn:date "1999-01-06T00:01:002.000000Z" \ + svn:log "change it" && + echo PROPS-END + } >props2 && + { + echo SVN-fs-dump-format-version: 3 && + echo && + echo Revision-number: 1 && + echo Prop-content-length: $(wc -c <props) && + echo Content-length: $(wc -c <props) && + echo && + cat props && + cat <<-\EOF && + + Node-path: hello + Node-kind: file + Node-action: add + Prop-content-length: 10 + Text-content-length: 3 + Content-length: 13 + + PROPS-END + hi + + EOF + echo Revision-number: 2 && + echo Prop-content-length: $(wc -c <props2) && + echo Content-length: $(wc -c <props2) && + echo && + cat props2 && + cat <<-\EOF && + + Node-path: hello + Node-kind: file + Node-action: change + Text-delta: true + Prop-content-length: 10 + EOF + echo Text-content-length: $(wc -c <delta) && + echo Content-length: $((10 + $(wc -c <delta))) && + echo && + echo PROPS-END && + cat delta + } >delta.dump && + test_must_fail test-svn-fe delta.dump +' + +test_expect_success 'property deltas supported' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :100755 100644 OBJID OBJID M script.sh + EOF + { + properties \ + svn:author author@example.com \ + svn:date "1999-03-06T00:01:002.000000Z" \ + svn:log "make an executable, or chmod -x it" && + echo PROPS-END + } >revprops && + { + echo SVN-fs-dump-format-version: 3 && + echo && + echo Revision-number: 1 && + echo Prop-content-length: $(wc -c <revprops) && + echo Content-length: $(wc -c <revprops) && + echo && + cat revprops && + echo && + cat <<-\EOF && + Node-path: script.sh + Node-kind: file + Node-action: add + Text-content-length: 0 + Prop-content-length: 39 + Content-length: 39 + + K 14 + svn:executable + V 4 + true + PROPS-END + + EOF + echo Revision-number: 2 && + echo Prop-content-length: $(wc -c <revprops) && + echo Content-length: $(wc -c <revprops) && + echo && + cat revprops && + echo && + cat <<-\EOF + Node-path: script.sh + Node-kind: file + Node-action: change + Prop-delta: true + Prop-content-length: 30 + Content-length: 30 + + D 14 + svn:executable + PROPS-END + EOF + } >propdelta.dump && + test-svn-fe propdelta.dump >stream && + git fast-import <stream && + { + git rev-list HEAD | + git diff-tree --stdin | + sed "s/$_x40/OBJID/g" + } >actual && + test_cmp expect actual +' + +test_expect_success 'properties on /' ' + reinit_git && + cat <<-\EOF >expect && + OBJID + OBJID + :000000 100644 OBJID OBJID A greeting + EOF + sed -e "s/X$//" <<-\EOF >changeroot.dump && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: add + Text-content-length: 0 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: X + Node-kind: dir + Node-action: change + Prop-delta: true + Prop-content-length: 43 + Content-length: 43 + + K 10 + svn:ignore + V 11 + build-area + + PROPS-END + EOF + test-svn-fe changeroot.dump >stream && + git fast-import <stream && + { + git rev-list HEAD | + git diff-tree --root --always --stdin | + sed "s/$_x40/OBJID/g" + } >actual && + test_cmp expect actual +' + +test_expect_success 'deltas for typechange' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T test-file + OBJID + :100755 120000 OBJID OBJID T test-file + OBJID + :000000 100755 OBJID OBJID A test-file + EOF + cat >deleteprop.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: test-file + Node-kind: file + Node-action: add + Prop-delta: true + Prop-content-length: 35 + Text-content-length: 17 + Content-length: 52 + + K 14 + svn:executable + V 0 + + PROPS-END + link testing 123 + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: test-file + Node-kind: file + Node-action: change + Prop-delta: true + Prop-content-length: 53 + Text-content-length: 17 + Content-length: 70 + + K 11 + svn:special + V 1 + * + D 14 + svn:executable + PROPS-END + link testing 231 + + Revision-number: 3 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: test-file + Node-kind: file + Node-action: change + Prop-delta: true + Prop-content-length: 27 + Text-content-length: 17 + Content-length: 44 + + D 11 + svn:special + PROPS-END + link testing 321 + EOF + test-svn-fe deleteprop.dump >stream && + git fast-import <stream && + { + git rev-list HEAD | + git diff-tree --root --stdin | + sed "s/$_x40/OBJID/g" + } >actual && + test_cmp expect actual +' + + test_expect_success 'set up svn repo' ' svnconf=$PWD/svnconf && mkdir -p "$svnconf" && diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 52ac0e56d..6b1ba6c85 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -42,6 +42,14 @@ echo "$@"' >empty +test_expect_success 'setup: have pipes?' ' + rm -f frob && + if mkfifo frob + then + test_set_prereq PIPE + fi +' + ### ### series A ### @@ -898,6 +906,77 @@ test_expect_success \ git diff-tree -C --find-copies-harder -r N4^ N4 >actual && compare_diff_raw expect actual' +test_expect_success PIPE 'N: read and copy directory' ' + cat >expect <<-\EOF + :100755 100755 f1fb5da718392694d0076d677d6d0e364c79b0bc f1fb5da718392694d0076d677d6d0e364c79b0bc C100 file2/newf file3/newf + :100644 100644 7123f7f44e39be127c5eb701e5968176ee9d78b1 7123f7f44e39be127c5eb701e5968176ee9d78b1 C100 file2/oldf file3/oldf + EOF + git update-ref -d refs/heads/N4 && + rm -f backflow && + mkfifo backflow && + ( + exec <backflow && + cat <<-EOF && + commit refs/heads/N4 + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + copy by tree hash, part 2 + COMMIT + + from refs/heads/branch^0 + ls "file2" + EOF + read mode type tree filename && + echo "M 040000 $tree file3" + ) | + git fast-import --cat-blob-fd=3 3>backflow && + git diff-tree -C --find-copies-harder -r N4^ N4 >actual && + compare_diff_raw expect actual +' + +test_expect_success PIPE 'N: empty directory reads as missing' ' + cat <<-\EOF >expect && + OBJNAME + :000000 100644 OBJNAME OBJNAME A unrelated + EOF + echo "missing src" >expect.response && + git update-ref -d refs/heads/read-empty && + rm -f backflow && + mkfifo backflow && + ( + exec <backflow && + cat <<-EOF && + commit refs/heads/read-empty + committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE + data <<COMMIT + read "empty" (missing) directory + COMMIT + + M 100644 inline src/greeting + data <<BLOB + hello + BLOB + C src/greeting dst1/non-greeting + C src/greeting unrelated + # leave behind "empty" src directory + D src/greeting + ls "src" + EOF + read -r line && + printf "%s\n" "$line" >response && + cat <<-\EOF + D dst1 + D dst2 + EOF + ) | + git fast-import --cat-blob-fd=3 3>backflow && + test_cmp expect.response response && + git rev-list read-empty | + git diff-tree -r --root --stdin | + sed "s/$_x40/OBJNAME/g" >actual && + test_cmp expect actual +' + test_expect_success \ 'N: copy root directory by tree hash' \ 'cat >expect <<-\EOF && @@ -1861,6 +1940,11 @@ test_expect_success 'R: feature no-relative-marks should be honoured' ' test_cmp marks.new non-relative.out ' +test_expect_success 'R: feature ls supported' ' + echo "feature ls" | + git fast-import +' + test_expect_success 'R: feature cat-blob supported' ' echo "feature cat-blob" | git fast-import @@ -1986,14 +2070,6 @@ test_expect_success 'R: print two blobs to stdout' ' test_cmp expect actual ' -test_expect_success 'setup: have pipes?' ' - rm -f frob && - if mkfifo frob - then - test_set_prereq PIPE - fi -' - test_expect_success PIPE 'R: copy using cat-file' ' expect_id=$(git hash-object big) && expect_len=$(wc -c <big) && diff --git a/test-line-buffer.c b/test-line-buffer.c index c11bf7f96..25b20b93f 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -1,14 +1,9 @@ /* * test-line-buffer.c: code to exercise the svn importer's input helper - * - * Input format: - * number NL - * (number bytes) NL - * number NL - * ... */ #include "git-compat-util.h" +#include "strbuf.h" #include "vcs-svn/line_buffer.h" static uint32_t strtouint32(const char *s) @@ -20,27 +15,84 @@ static uint32_t strtouint32(const char *s) return (uint32_t) n; } +static void handle_command(const char *command, const char *arg, struct line_buffer *buf) +{ + switch (*command) { + case 'b': + if (!prefixcmp(command, "binary ")) { + struct strbuf sb = STRBUF_INIT; + strbuf_addch(&sb, '>'); + buffer_read_binary(buf, &sb, strtouint32(arg)); + fwrite(sb.buf, 1, sb.len, stdout); + strbuf_release(&sb); + return; + } + case 'c': + if (!prefixcmp(command, "copy ")) { + buffer_copy_bytes(buf, strtouint32(arg)); + return; + } + case 'r': + if (!prefixcmp(command, "read ")) { + const char *s = buffer_read_string(buf, strtouint32(arg)); + fputs(s, stdout); + return; + } + case 's': + if (!prefixcmp(command, "skip ")) { + buffer_skip_bytes(buf, strtouint32(arg)); + return; + } + default: + die("unrecognized command: %s", command); + } +} + +static void handle_line(const char *line, struct line_buffer *stdin_buf) +{ + const char *arg = strchr(line, ' '); + if (!arg) + die("no argument in line: %s", line); + handle_command(line, arg + 1, stdin_buf); +} + int main(int argc, char *argv[]) { + struct line_buffer stdin_buf = LINE_BUFFER_INIT; + struct line_buffer file_buf = LINE_BUFFER_INIT; + struct line_buffer *input = &stdin_buf; + const char *filename; char *s; - if (argc != 1) - usage("test-line-buffer < input.txt"); - if (buffer_init(NULL)) + if (argc == 1) + filename = NULL; + else if (argc == 2) + filename = argv[1]; + else + usage("test-line-buffer [file | &fd] < script"); + + if (buffer_init(&stdin_buf, NULL)) die_errno("open error"); - while ((s = buffer_read_line())) { - s = buffer_read_string(strtouint32(s)); - fputs(s, stdout); - fputc('\n', stdout); - buffer_skip_bytes(1); - if (!(s = buffer_read_line())) - break; - buffer_copy_bytes(strtouint32(s) + 1); + if (filename) { + if (*filename == '&') { + if (buffer_fdinit(&file_buf, strtouint32(filename + 1))) + die_errno("error opening fd %s", filename + 1); + } else { + if (buffer_init(&file_buf, filename)) + die_errno("error opening %s", filename); + } + input = &file_buf; } - if (buffer_deinit()) + + while ((s = buffer_read_line(&stdin_buf))) + handle_line(s, input); + + if (filename && buffer_deinit(&file_buf)) + die("error reading from %s", filename); + if (buffer_deinit(&stdin_buf)) die("input error"); if (ferror(stdout)) die("output error"); - buffer_reset(); + buffer_reset(&stdin_buf); return 0; } diff --git a/test-svn-fe.c b/test-svn-fe.c index 77cf78abc..b42ba789b 100644 --- a/test-svn-fe.c +++ b/test-svn-fe.c @@ -9,7 +9,8 @@ int main(int argc, char *argv[]) { if (argc != 2) usage("test-svn-fe <file>"); - svndump_init(argv[1]); + if (svndump_init(argv[1])) + return 1; svndump_read(NULL); svndump_deinit(); svndump_reset(); diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 6cfa256a3..260cf50e7 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -63,14 +63,14 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("progress Imported commit %"PRIu32".\n\n", revision); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(5); + buffer_skip_bytes(input, 5); len -= 5; } printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); - buffer_copy_bytes(len); + buffer_copy_bytes(input, len); fputc('\n', stdout); } diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2aaaea53d..054e7d5eb 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,11 +1,14 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ +#include "line_buffer.h" + void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); void fast_export_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, + struct line_buffer *input); #endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 154356709..aedf105b7 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -5,47 +5,76 @@ #include "git-compat-util.h" #include "line_buffer.h" -#include "obj_pool.h" +#include "strbuf.h" -#define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 -/* Create memory pool for char sequence of known length */ -obj_pool_gen(blob, char, 4096) +int buffer_init(struct line_buffer *buf, const char *filename) +{ + buf->infile = filename ? fopen(filename, "r") : stdin; + if (!buf->infile) + return -1; + return 0; +} -static char line_buffer[LINE_BUFFER_LEN]; -static char byte_buffer[COPY_BUFFER_LEN]; -static FILE *infile; +int buffer_fdinit(struct line_buffer *buf, int fd) +{ + buf->infile = fdopen(fd, "r"); + if (!buf->infile) + return -1; + return 0; +} -int buffer_init(const char *filename) +int buffer_tmpfile_init(struct line_buffer *buf) { - infile = filename ? fopen(filename, "r") : stdin; - if (!infile) + buf->infile = tmpfile(); + if (!buf->infile) return -1; return 0; } -int buffer_deinit(void) +int buffer_deinit(struct line_buffer *buf) { int err; - if (infile == stdin) - return ferror(infile); - err = ferror(infile); - err |= fclose(infile); + if (buf->infile == stdin) + return ferror(buf->infile); + err = ferror(buf->infile); + err |= fclose(buf->infile); return err; } +FILE *buffer_tmpfile_rewind(struct line_buffer *buf) +{ + rewind(buf->infile); + return buf->infile; +} + +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) +{ + long pos = ftell(buf->infile); + if (pos < 0) + return error("ftell error: %s", strerror(errno)); + if (fseek(buf->infile, 0, SEEK_SET)) + return error("seek error: %s", strerror(errno)); + return pos; +} + +int buffer_read_char(struct line_buffer *buf) +{ + return fgetc(buf->infile); +} + /* Read a line without trailing newline. */ -char *buffer_read_line(void) +char *buffer_read_line(struct line_buffer *buf) { char *end; - if (!fgets(line_buffer, sizeof(line_buffer), infile)) + if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) /* Error or data exhausted. */ return NULL; - end = line_buffer + strlen(line_buffer); + end = buf->line_buffer + strlen(buf->line_buffer); if (end[-1] == '\n') end[-1] = '\0'; - else if (feof(infile)) + else if (feof(buf->infile)) ; /* No newline at end of file. That's fine. */ else /* @@ -54,44 +83,50 @@ char *buffer_read_line(void) * but for now let's return an error. */ return NULL; - return line_buffer; + return buf->line_buffer; +} + +char *buffer_read_string(struct line_buffer *buf, uint32_t len) +{ + strbuf_reset(&buf->blob_buffer); + strbuf_fread(&buf->blob_buffer, len, buf->infile); + return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } -char *buffer_read_string(uint32_t len) +void buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, uint32_t size) { - char *s; - blob_free(blob_pool.size); - s = blob_pointer(blob_alloc(len + 1)); - s[fread(s, 1, len, infile)] = '\0'; - return ferror(infile) ? NULL : s; + strbuf_fread(sb, size, buf->infile); } -void buffer_copy_bytes(uint32_t len) +void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) { + char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { + while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); + in = fread(byte_buffer, 1, in, buf->infile); len -= in; fwrite(byte_buffer, 1, in, stdout); if (ferror(stdout)) { - buffer_skip_bytes(len); + buffer_skip_bytes(buf, len); return; } } } -void buffer_skip_bytes(uint32_t len) +void buffer_skip_bytes(struct line_buffer *buf, uint32_t len) { + char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { + while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); + in = fread(byte_buffer, 1, in, buf->infile); len -= in; } } -void buffer_reset(void) +void buffer_reset(struct line_buffer *buf) { - blob_reset(); + strbuf_release(&buf->blob_buffer); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 9c78ae11a..96ce966a2 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -1,12 +1,31 @@ #ifndef LINE_BUFFER_H_ #define LINE_BUFFER_H_ -int buffer_init(const char *filename); -int buffer_deinit(void); -char *buffer_read_line(void); -char *buffer_read_string(uint32_t len); -void buffer_copy_bytes(uint32_t len); -void buffer_skip_bytes(uint32_t len); -void buffer_reset(void); +#include "strbuf.h" + +#define LINE_BUFFER_LEN 10000 + +struct line_buffer { + char line_buffer[LINE_BUFFER_LEN]; + struct strbuf blob_buffer; + FILE *infile; +}; +#define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} + +int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_fdinit(struct line_buffer *buf, int fd); +int buffer_deinit(struct line_buffer *buf); +void buffer_reset(struct line_buffer *buf); + +int buffer_tmpfile_init(struct line_buffer *buf); +FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); + +char *buffer_read_line(struct line_buffer *buf); +char *buffer_read_string(struct line_buffer *buf, uint32_t len); +int buffer_read_char(struct line_buffer *buf); +void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); +void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); +void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 8906fb1f5..e89cc41d5 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -14,22 +14,46 @@ Calling sequence The calling program: + - initializes a `struct line_buffer` to LINE_BUFFER_INIT - specifies a file to read with `buffer_init` - processes input with `buffer_read_line`, `buffer_read_string`, `buffer_skip_bytes`, and `buffer_copy_bytes` - closes the file with `buffer_deinit`, perhaps to start over and read another file. -Before exiting, the caller can use `buffer_reset` to deallocate -resources for the benefit of profiling tools. +When finished, the caller can use `buffer_reset` to deallocate +resources. + +Using temporary files +--------------------- + +Temporary files provide a place to store data that should not outlive +the calling program. A program + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - requests a temporary file with `buffer_tmpfile_init` + - acquires an output handle by calling `buffer_tmpfile_rewind` + - uses standard I/O functions like `fprintf` and `fwrite` to fill + the temporary file + - declares writing is over with `buffer_tmpfile_prepare_to_read` + - can re-read what was written with `buffer_read_line`, + `buffer_read_string`, and so on + - can reuse the temporary file by calling `buffer_tmpfile_rewind` + again + - removes the temporary file with `buffer_deinit`, perhaps to + reuse the line_buffer for some other file. + +When finished, the calling program can use `buffer_reset` to deallocate +resources. Functions --------- -`buffer_init`:: - Open the named file for input. If filename is NULL, - start reading from stdin. On failure, returns -1 (with - errno indicating the nature of the failure). +`buffer_init`, `buffer_fdinit`:: + Open the named file or file descriptor for input. + buffer_init(buf, NULL) prepares to read from stdin. + On failure, returns -1 (with errno indicating the nature + of the failure). `buffer_deinit`:: Stop reading from the current file (closing it unless diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e3d1fa354..491f0135a 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -175,25 +175,18 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) repo_write_dirent(path, mode, blob_mark, 0); } -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark) { - uint32_t mode = 0; struct repo_dirent *src_dent; src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL) { - mode = src_dent->mode; - repo_write_dirent(path, mode, blob_mark, 0); - } - return mode; -} - -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL && blob_mark == 0) + if (!src_dent) + return 0; + if (!blob_mark) blob_mark = src_dent->content_offset; + if (!mode) + mode = src_dent->mode; repo_write_dirent(path, mode, blob_mark, 0); + return mode; } void repo_delete(uint32_t *path) diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 547617592..68baeb582 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -14,8 +14,7 @@ uint32_t next_blob_mark(void); uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, long unsigned timestamp); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 2ad2c307d..ee7c0bb2e 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -30,7 +30,9 @@ /* Create memory pool for log messages */ obj_pool_gen(log, char, 4096) -static char* log_copy(uint32_t length, char *log) +static struct line_buffer input = LINE_BUFFER_INIT; + +static char *log_copy(uint32_t length, const char *log) { char *buffer; log_free(log_pool.size); @@ -40,8 +42,9 @@ static char* log_copy(uint32_t length, char *log) } static struct { - uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t action, propLength, textLength, srcRev, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t text_delta, prop_delta; } node_ctx; static struct { @@ -58,7 +61,9 @@ static struct { uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, revision_number, node_path, node_kind, node_action, node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length, svn_fs_dump_format_version; + prop_content_length, content_length, svn_fs_dump_format_version, + /* version 3 format */ + text_delta, prop_delta; } keys; static void reset_node_ctx(char *fname) @@ -69,9 +74,9 @@ static void reset_node_ctx(char *fname) node_ctx.textLength = LENGTH_UNKNOWN; node_ctx.src[0] = ~0; node_ctx.srcRev = 0; - node_ctx.srcMode = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); - node_ctx.mark = 0; + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; } static void reset_rev_ctx(uint32_t revision) @@ -107,81 +112,148 @@ static void init_keys(void) keys.prop_content_length = pool_intern("Prop-content-length"); keys.content_length = pool_intern("Content-length"); keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); + /* version 3 format (Subversion 1.1.0) */ + keys.text_delta = pool_intern("Text-delta"); + keys.prop_delta = pool_intern("Prop-delta"); +} + +static void handle_property(uint32_t key, const char *val, uint32_t len, + uint32_t *type_set) +{ + if (key == keys.svn_log) { + if (!val) + die("invalid dump: unsets svn:log"); + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (!val) + die("invalid dump: unsets svn:date"); + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + warning("invalid timestamp: %s", val); + } else if (key == keys.svn_executable || key == keys.svn_special) { + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = REPO_MODE_BLB; + return; + } + *type_set = 1; + node_ctx.type = key == keys.svn_executable ? + REPO_MODE_EXE : + REPO_MODE_LNK; + } } static void read_props(void) { - uint32_t len; uint32_t key = ~0; - char *val = NULL; - char *t; - while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { - if (!strncmp(t, "K ", 2)) { - len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); - } else if (!strncmp(t, "V ", 2)) { - len = atoi(&t[2]); - val = buffer_read_string(len); - if (key == keys.svn_log) { - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; - } + const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { + uint32_t len; + const char *val; + const char type = t[0]; + + if (!type || t[1] != ' ') + die("invalid property line: %s\n", t); + len = atoi(&t[2]); + val = buffer_read_string(&input, len); + buffer_skip_bytes(&input, 1); /* Discard trailing newline. */ + + switch (type) { + case 'K': + key = pool_intern(val); + continue; + case 'D': + key = pool_intern(val); + val = NULL; + len = 0; + /* fall through */ + case 'V': + handle_property(key, val, len, &type_set); key = ~0; - buffer_read_line(); + continue; + default: + die("invalid property line: %s\n", t); } } } static void handle_node(void) { - if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) - read_props(); - - if (node_ctx.srcRev) - node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - - if (node_ctx.textLength != LENGTH_UNKNOWN && - node_ctx.type != REPO_MODE_DIR) - node_ctx.mark = next_blob_mark(); + uint32_t mark = 0; + const uint32_t type = node_ctx.type; + const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; + if (node_ctx.text_delta) + die("text deltas not supported"); + if (node_ctx.textLength != LENGTH_UNKNOWN) + mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { + if (mark || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + return repo_delete(node_ctx.dst); + } + if (node_ctx.action == NODEACT_REPLACE) { repo_delete(node_ctx.dst); - } else if (node_ctx.action == NODEACT_CHANGE || - node_ctx.action == NODEACT_REPLACE) { - if (node_ctx.action == NODEACT_REPLACE && - node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, node_ctx.mark); - else if (node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + node_ctx.action = NODEACT_ADD; + } + if (node_ctx.srcRev) { + repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; + } + if (mark && type == REPO_MODE_DIR) + die("invalid dump: directories cannot have text attached"); + if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (type != REPO_MODE_DIR) + die("invalid dump: root of tree is not a regular file"); + } else if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); + if (!mode) + die("invalid dump: path to be modified is missing"); + if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); - else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); - else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - node_ctx.textLength != LENGTH_UNKNOWN) - repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + if (!mark && type != REPO_MODE_DIR) + die("invalid dump: adds node without text"); + repo_add(node_ctx.dst, type, mark); + } else { + die("invalid dump: Node-path block lacks Node-action"); } - - if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) - node_ctx.type = node_ctx.srcMode; - - if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); + if (have_props) { + const uint32_t old_mode = node_ctx.type; + if (!node_ctx.prop_delta) + node_ctx.type = type; + if (node_ctx.propLength) + read_props(); + if (node_ctx.type != old_mode) + repo_modify_path(node_ctx.dst, node_ctx.type, mark); + } + if (mark) + fast_export_blob(node_ctx.type, mark, + node_ctx.textLength, &input); } static void handle_revision(void) @@ -200,7 +272,7 @@ void svndump_read(const char *url) uint32_t key; reset_dump_ctx(pool_intern(url)); - while ((t = buffer_read_line())) { + while ((t = buffer_read_line(&input))) { val = strstr(t, ": "); if (!val) continue; @@ -210,8 +282,8 @@ void svndump_read(const char *url) if (key == keys.svn_fs_dump_format_version) { dump_ctx.version = atoi(val); - if (dump_ctx.version > 2) - die("expected svn dump format version <= 2, found %"PRIu32, + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %"PRIu32, dump_ctx.version); } else if (key == keys.uuid) { dump_ctx.uuid = pool_intern(val); @@ -255,9 +327,13 @@ void svndump_read(const char *url) node_ctx.textLength = atoi(val); } else if (key == keys.prop_content_length) { node_ctx.propLength = atoi(val); + } else if (key == keys.text_delta) { + node_ctx.text_delta = !strcmp(val, "true"); + } else if (key == keys.prop_delta) { + node_ctx.prop_delta = !strcmp(val, "true"); } else if (key == keys.content_length) { len = atoi(val); - buffer_read_line(); + buffer_read_line(&input); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { @@ -265,7 +341,7 @@ void svndump_read(const char *url) active_ctx = REV_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(len); + buffer_skip_bytes(&input, len); } } } @@ -275,14 +351,16 @@ void svndump_read(const char *url) handle_revision(); } -void svndump_init(const char *filename) +int svndump_init(const char *filename) { - buffer_init(filename); + if (buffer_init(&input, filename)) + return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); init_keys(); + return 0; } void svndump_deinit(void) @@ -292,7 +370,7 @@ void svndump_deinit(void) reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); - if (buffer_deinit()) + if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) fprintf(stderr, "Output error\n"); @@ -301,7 +379,7 @@ void svndump_deinit(void) void svndump_reset(void) { log_reset(); - buffer_reset(); + buffer_reset(&input); repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h index 93c412f14..df9ceb0e8 100644 --- a/vcs-svn/svndump.h +++ b/vcs-svn/svndump.h @@ -1,7 +1,7 @@ #ifndef SVNDUMP_H_ #define SVNDUMP_H_ -void svndump_init(const char *filename); +int svndump_init(const char *filename); void svndump_read(const char *url); void svndump_deinit(void); void svndump_reset(void); |