diff options
Diffstat (limited to 'contrib/git-svn/git-svn.perl')
-rwxr-xr-x | contrib/git-svn/git-svn.perl | 429 |
1 files changed, 424 insertions, 5 deletions
diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl index d8f103ed9..d5c7e4796 100755 --- a/contrib/git-svn/git-svn.perl +++ b/contrib/git-svn/git-svn.perl @@ -35,8 +35,8 @@ my $sha1_short = qr/[a-f\d]{4,40}/; my ($_revision,$_stdin,$_no_ignore_ext,$_no_stop_copy,$_help,$_rmdir,$_edit, $_find_copies_harder, $_l, $_cp_similarity, $_repack, $_repack_nr, $_repack_flags, - $_template, $_shared, - $_version, $_upgrade, $_authors, $_branch_all_refs); + $_template, $_shared, $_no_default_regex, $_no_graft_copy, + $_version, $_upgrade, $_authors, $_branch_all_refs, @_opt_m); my (@_branch_from, %tree_map, %users); my ($_svn_co_url_revs, $_svn_pg_peg_revs); my @repo_path_split_cache; @@ -48,6 +48,12 @@ my %fc_opts = ( 'no-ignore-externals' => \$_no_ignore_ext, 'repack:i' => \$_repack, 'repack-flags|repack-args|repack-opts=s' => \$_repack_flags); +my ($_trunk, $_tags, $_branches); +my %multi_opts = ( 'trunk|T=s' => \$_trunk, + 'tags|t=s' => \$_tags, + 'branches|b=s' => \$_branches ); +my %init_opts = ( 'template=s' => \$_template, 'shared' => \$_shared ); + # yes, 'native' sets "\n". Patches to fix this for non-*nix systems welcome: my %EOL = ( CR => "\015", LF => "\012", CRLF => "\015\012", native => "\012" ); @@ -56,8 +62,7 @@ my %cmd = ( { 'revision|r=s' => \$_revision, %fc_opts } ], init => [ \&init, "Initialize a repo for tracking" . " (requires URL argument)", - { 'template=s' => \$_template, - 'shared' => \$_shared } ], + \%init_opts ], commit => [ \&commit, "Commit git revisions to SVN", { 'stdin|' => \$_stdin, 'edit|e' => \$_edit, @@ -71,7 +76,19 @@ my %cmd = ( rebuild => [ \&rebuild, "Rebuild git-svn metadata (after git clone)", { 'no-ignore-externals' => \$_no_ignore_ext, 'upgrade' => \$_upgrade } ], + 'graft-branches' => [ \&graft_branches, + 'Detect merges/branches from already imported history', + { 'merge-rx|m' => \@_opt_m, + 'no-default-regex' => \$_no_default_regex, + 'no-graft-copy' => \$_no_graft_copy } ], + 'multi-init' => [ \&multi_init, + 'Initialize multiple trees (like git-svnimport)', + { %multi_opts, %fc_opts } ], + 'multi-fetch' => [ \&multi_fetch, + 'Fetch multiple trees (like git-svnimport)', + \%fc_opts ], ); + my $cmd; for (my $i = 0; $i < @ARGV; $i++) { if (defined $cmd{$ARGV[$i]}) { @@ -96,7 +113,7 @@ init_vars(); load_authors() if $_authors; load_all_refs() if $_branch_all_refs; svn_compat_check(); -migration_check() unless $cmd eq 'init'; +migration_check() unless $cmd =~ /^(?:init|multi-init)$/; $cmd{$cmd}->[0]->(@ARGV); exit 0; @@ -219,6 +236,7 @@ when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN sub init { $SVN_URL = shift or die "SVN repository location required " . "as a command-line argument\n"; + $SVN_URL =~ s!/+$!!; # strip trailing slash unless (-d $GIT_DIR) { my @init_db = ('git-init-db'); push @init_db, "--template=$_template" if defined $_template; @@ -358,8 +376,283 @@ sub show_ignore { } } +sub graft_branches { + my $gr_file = "$GIT_DIR/info/grafts"; + my ($grafts, $comments) = read_grafts($gr_file); + my $gr_sha1; + + if (%$grafts) { + # temporarily disable our grafts file to make this idempotent + chomp($gr_sha1 = safe_qx(qw/git-hash-object -w/,$gr_file)); + rename $gr_file, "$gr_file~$gr_sha1" or croak $!; + } + + my $l_map = read_url_paths(); + my @re = map { qr/$_/is } @_opt_m if @_opt_m; + unless ($_no_default_regex) { + push @re, ( qr/\b(?:merge|merging|merged)\s+(\S.+)/is, + qr/\b(?:from|of)\s+(\S.+)/is ); + } + foreach my $u (keys %$l_map) { + if (@re) { + foreach my $p (keys %{$l_map->{$u}}) { + graft_merge_msg($grafts,$l_map,$u,$p); + } + } + graft_file_copy($grafts,$l_map,$u) unless $_no_graft_copy; + } + + write_grafts($grafts, $comments, $gr_file); + unlink "$gr_file~$gr_sha1" if $gr_sha1; +} + +sub multi_init { + my $url = shift; + $_trunk ||= 'trunk'; + $_trunk =~ s#/+$##; + $url =~ s#/+$## if $url; + if ($_trunk !~ m#^[a-z\+]+://#) { + $_trunk = '/' . $_trunk if ($_trunk !~ m#^/#); + unless ($url) { + print STDERR "E: '$_trunk' is not a complete URL ", + "and a separate URL is not specified\n"; + exit 1; + } + $_trunk = $url . $_trunk; + } + if ($GIT_SVN eq 'git-svn') { + print "GIT_SVN_ID set to 'trunk' for $_trunk\n"; + $GIT_SVN = $ENV{GIT_SVN_ID} = 'trunk'; + } + init_vars(); + init($_trunk); + complete_url_ls_init($url, $_branches, '--branches/-b', ''); + complete_url_ls_init($url, $_tags, '--tags/-t', 'tags/'); +} + +sub multi_fetch { + # try to do trunk first, since branches/tags + # may be descended from it. + if (-d "$GIT_DIR/svn/trunk") { + print "Fetching trunk\n"; + defined(my $pid = fork) or croak $!; + if (!$pid) { + $GIT_SVN = $ENV{GIT_SVN_ID} = 'trunk'; + init_vars(); + fetch(@_); + exit 0; + } + waitpid $pid, 0; + croak $? if $?; + } + rec_fetch('', "$GIT_DIR/svn", @_); +} + ########################### utility functions ######################### +sub rec_fetch { + my ($pfx, $p, @args) = @_; + my @dir; + foreach (sort <$p/*>) { + if (-r "$_/info/url") { + $pfx .= '/' if $pfx && $pfx !~ m!/$!; + my $id = $pfx . basename $_; + next if $id eq 'trunk'; + print "Fetching $id\n"; + defined(my $pid = fork) or croak $!; + if (!$pid) { + $GIT_SVN = $ENV{GIT_SVN_ID} = $id; + init_vars(); + fetch(@args); + exit 0; + } + waitpid $pid, 0; + croak $? if $?; + } elsif (-d $_) { + push @dir, $_; + } + } + foreach (@dir) { + my $x = $_; + $x =~ s!^\Q$GIT_DIR\E/svn/!!; + rec_fetch($x, $_); + } +} + +sub complete_url_ls_init { + my ($url, $var, $switch, $pfx) = @_; + unless ($var) { + print STDERR "W: $switch not specified\n"; + return; + } + $var =~ s#/+$##; + if ($var !~ m#^[a-z\+]+://#) { + $var = '/' . $var if ($var !~ m#^/#); + unless ($url) { + print STDERR "E: '$var' is not a complete URL ", + "and a separate URL is not specified\n"; + exit 1; + } + $var = $url . $var; + } + chomp(my @ls = safe_qx(qw/svn ls --non-interactive/, $var)); + my $old = $GIT_SVN; + defined(my $pid = fork) or croak $!; + if (!$pid) { + foreach my $u (map { "$var/$_" } (grep m!/$!, @ls)) { + $u =~ s#/+$##; + if ($u !~ m!\Q$var\E/(.+)$!) { + print STDERR "W: Unrecognized URL: $u\n"; + die "This should never happen\n"; + } + my $id = $pfx.$1; + print "init $u => $id\n"; + $GIT_SVN = $ENV{GIT_SVN_ID} = $id; + init_vars(); + init($u); + } + exit 0; + } + waitpid $pid, 0; + croak $? if $?; +} + +sub common_prefix { + my $paths = shift; + my %common; + foreach (@$paths) { + my @tmp = split m#/#, $_; + my $p = ''; + while (my $x = shift @tmp) { + $p .= "/$x"; + $common{$p} ||= 0; + $common{$p}++; + } + } + foreach (sort {length $b <=> length $a} keys %common) { + if ($common{$_} == @$paths) { + return $_; + } + } + return ''; +} + +# this isn't funky-filename safe, but good enough for now... +sub graft_file_copy { + my ($grafts, $l_map, $u) = @_; + my $paths = $l_map->{$u}; + my $pfx = common_prefix([keys %$paths]); + + my $pid = open my $fh, '-|'; + defined $pid or croak $!; + unless ($pid) { + exec(qw/svn log -v/, $u.$pfx) or croak $!; + } + my ($r, $mp) = (undef, undef); + while (<$fh>) { + chomp; + if (/^\-{72}$/) { + $mp = $r = undef; + } elsif (/^r(\d+) \| /) { + $r = $1 unless defined $r; + } elsif (/^Changed paths:/) { + $mp = 1; + } elsif ($mp && m#^ [AR] /(\S.*?) \(from /(\S+?):(\d+)\)$#) { + my $dbg = "r$r | $_"; + my ($p1, $p0, $r0) = ($1, $2, $3); + my $c; + foreach my $x (keys %$paths) { + next unless ($p1 =~ /^\Q$x\E/); + my $i = $paths->{$x}; + my $f = "$GIT_DIR/svn/$i/revs/$r"; + unless (-r $f) { + print STDERR "r$r of $i not imported,", + " $dbg\n"; + next; + } + $c = file_to_s($f); + } + next unless $c; + foreach my $x (keys %$paths) { + next unless ($p0 =~ /^\Q$x\E/); + my $i = $paths->{$x}; + my $f = "$GIT_DIR/svn/$i/revs/$r0"; + while ($r0 && !-r $f) { + # could be an older revision, too... + $r0--; + $f = "$GIT_DIR/svn/$i/revs/$r0"; + } + unless (-r $f) { + print STDERR "r$r0 of $i not imported,", + " $dbg\n"; + next; + } + my $r1 = file_to_s($f); + $grafts->{$c}->{$r1} = 1; + } + } + } +} + +sub process_merge_msg_matches { + my ($grafts, $l_map, $u, $p, $c, @matches) = @_; + my (@strong, @weak); + foreach (@matches) { + # merging with ourselves is not interesting + next if $_ eq $p; + if ($l_map->{$u}->{$_}) { + push @strong, $_; + } else { + push @weak, $_; + } + } + foreach my $w (@weak) { + last if @strong; + # no exact match, use branch name as regexp. + my $re = qr/\Q$w\E/i; + foreach (keys %{$l_map->{$u}}) { + if (/$re/) { + push @strong, $_; + last; + } + } + last if @strong; + $w = basename($w); + $re = qr/\Q$w\E/i; + foreach (keys %{$l_map->{$u}}) { + if (/$re/) { + push @strong, $_; + last; + } + } + } + my ($rev) = ($c->{m} =~ /^git-svn-id:\s(?:\S+?)\@(\d+) + \s(?:[a-f\d\-]+)$/xsm); + unless (defined $rev) { + ($rev) = ($c->{m} =~/^git-svn-id:\s(\d+) + \@(?:[a-f\d\-]+)/xsm); + return unless defined $rev; + } + foreach my $m (@strong) { + my ($r0, $s0) = find_rev_before($rev, $m); + $grafts->{$c->{c}}->{$s0} = 1 if defined $s0; + } +} + +sub graft_merge_msg { + my ($grafts, $l_map, $u, $p, @re) = @_; + + my $x = $l_map->{$u}->{$p}; + my $rl = rev_list_raw($x); + while (my $c = next_rev_list_entry($rl)) { + foreach my $re (@re) { + my (@br) = ($c->{m} =~ /$re/g); + next unless @br; + process_merge_msg_matches($grafts,$l_map,$u,$p,$c,@br); + } + } +} + sub read_uuid { return if $SVN_UUID; my $info = shift || svn_info('.'); @@ -402,6 +695,7 @@ sub repo_path_split { $url .= "/$n"; } push @repo_path_split_cache, qr/^(\Q$url\E)/; + $path = join('/',@paths); return ($url, $path); } @@ -806,6 +1100,38 @@ sub svn_commit_tree { return fetch("$committed=$commit")->{revision}; } +sub rev_list_raw { + my (@args) = @_; + my $pid = open my $fh, '-|'; + defined $pid or croak $!; + if (!$pid) { + exec(qw/git-rev-list --pretty=raw/, @args) or croak $!; + } + return { fh => $fh, t => { } }; +} + +sub next_rev_list_entry { + my $rl = shift; + my $fh = $rl->{fh}; + my $x = $rl->{t}; + while (<$fh>) { + if (/^commit ($sha1)$/o) { + if ($x->{c}) { + $rl->{t} = { c => $1 }; + return $x; + } else { + $x->{c} = $1; + } + } elsif (/^parent ($sha1)$/o) { + $x->{p}->{$1} = 1; + } elsif (s/^ //) { + $x->{m} ||= ''; + $x->{m} .= $_; + } + } + return ($x != $rl->{t}) ? $x : undef; +} + # read the entire log into a temporary file (which is removed ASAP) # and store the file handle + parser state sub svn_log_raw { @@ -1318,6 +1644,16 @@ sub svn_propget_base { return safe_qx(qw/svn propget/, $p, $f); } +sub git_svn_each { + my $sub = shift; + foreach (`git-rev-parse --symbolic --all`) { + next unless s#^refs/remotes/##; + chomp $_; + next unless -f "$GIT_DIR/svn/$_/info/url"; + &$sub($_); + } +} + sub migration_check { return if (-d "$GIT_DIR/svn" || !-d $GIT_DIR); print "Upgrading repository...\n"; @@ -1344,6 +1680,16 @@ sub migration_check { print "Done upgrading.\n"; } +sub find_rev_before { + my ($r, $git_svn_id) = @_; + my @revs = map { basename $_ } <$GIT_DIR/svn/$git_svn_id/revs/*>; + foreach my $r0 (sort { $b <=> $a } @revs) { + next if $r0 >= $r; + return ($r0, file_to_s("$GIT_DIR/svn/$git_svn_id/revs/$r0")); + } + return (undef, undef); +} + sub init_vars { $GIT_SVN ||= $ENV{GIT_SVN_ID} || 'git-svn'; $GIT_SVN_DIR = "$GIT_DIR/svn/$GIT_SVN"; @@ -1384,6 +1730,79 @@ sub set_default_vals { } } +sub read_grafts { + my $gr_file = shift; + my ($grafts, $comments) = ({}, {}); + if (open my $fh, '<', $gr_file) { + my @tmp; + while (<$fh>) { + if (/^($sha1)\s+/) { + my $c = $1; + if (@tmp) { + @{$comments->{$c}} = @tmp; + @tmp = (); + } + foreach my $p (split /\s+/, $_) { + $grafts->{$c}->{$p} = 1; + } + } else { + push @tmp, $_; + } + } + close $fh or croak $!; + @{$comments->{'END'}} = @tmp if @tmp; + } + return ($grafts, $comments); +} + +sub write_grafts { + my ($grafts, $comments, $gr_file) = @_; + + open my $fh, '>', $gr_file or croak $!; + foreach my $c (sort keys %$grafts) { + if ($comments->{$c}) { + print $fh $_ foreach @{$comments->{$c}}; + } + my $p = $grafts->{$c}; + delete $p->{$c}; # commits are not self-reproducing... + my $pid = open my $ch, '-|'; + defined $pid or croak $!; + if (!$pid) { + exec(qw/git-cat-file commit/, $c) or croak $!; + } + while (<$ch>) { + if (/^parent ([a-f\d]{40})/) { + $p->{$1} = 1; + } else { + last unless /^\S/i; + } + } + close $ch; # breaking the pipe + print $fh $c, ' ', join(' ', sort keys %$p),"\n"; + } + if ($comments->{'END'}) { + print $fh $_ foreach @{$comments->{'END'}}; + } + close $fh or croak $!; +} + +sub read_url_paths { + my $l_map = {}; + git_svn_each(sub { my $x = shift; + my $u = file_to_s("$GIT_DIR/svn/$x/info/repo_url"); + my $p = file_to_s("$GIT_DIR/svn/$x/info/repo_path"); + # we hate trailing slashes + if ($u =~ s#(?:^\/+|\/+$)##g) { + s_to_file($u,"$GIT_DIR/svn/$x/info/repo_url"); + } + if ($p =~ s#(?:^\/+|\/+$)##g) { + s_to_file($p,"$GIT_DIR/svn/$x/info/repo_path"); + } + $l_map->{$u}->{$p} = $x; + }); + return $l_map; +} + __END__ Data structures: |