diff options
-rwxr-xr-x | git-svn.perl | 92 | ||||
-rw-r--r-- | perl/Git/SVN.pm | 174 | ||||
-rw-r--r-- | perl/Git/SVN/Fetcher.pm | 2 | ||||
-rw-r--r-- | perl/Git/SVN/Migration.pm | 6 | ||||
-rw-r--r-- | perl/Git/SVN/Ra.pm | 92 | ||||
-rw-r--r-- | perl/Git/SVN/Utils.pm | 176 | ||||
-rw-r--r-- | t/Git-SVN/Utils/add_path_to_url.t | 27 | ||||
-rw-r--r-- | t/Git-SVN/Utils/canonicalize_url.t | 26 | ||||
-rw-r--r-- | t/Git-SVN/Utils/collapse_dotdot.t | 23 | ||||
-rw-r--r-- | t/Git-SVN/Utils/join_paths.t | 32 | ||||
-rwxr-xr-x | t/t9107-git-svn-migrate.sh | 6 | ||||
-rwxr-xr-x | t/t9118-git-svn-funky-branch-names.sh | 7 |
12 files changed, 489 insertions, 174 deletions
diff --git a/git-svn.perl b/git-svn.perl index 828b8f0c8..0d77ffb0b 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -29,7 +29,16 @@ use Git::SVN::Prompt; use Git::SVN::Log; use Git::SVN::Migration; -use Git::SVN::Utils qw(fatal can_compress); +use Git::SVN::Utils qw( + fatal + can_compress + canonicalize_path + canonicalize_url + join_paths + add_path_to_url + join_paths +); + use Git qw( git_cmd_try command @@ -1231,7 +1240,7 @@ sub cmd_show_ignore { my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); $gs ||= Git::SVN->new; my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum); - $gs->prop_walk($gs->{path}, $r, sub { + $gs->prop_walk($gs->path, $r, sub { my ($gs, $path, $props) = @_; print STDOUT "\n# $path\n"; my $s = $props->{'svn:ignore'} or return; @@ -1247,7 +1256,7 @@ sub cmd_show_externals { my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); $gs ||= Git::SVN->new; my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum); - $gs->prop_walk($gs->{path}, $r, sub { + $gs->prop_walk($gs->path, $r, sub { my ($gs, $path, $props) = @_; print STDOUT "\n# $path\n"; my $s = $props->{'svn:externals'} or return; @@ -1262,7 +1271,7 @@ sub cmd_create_ignore { my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); $gs ||= Git::SVN->new; my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum); - $gs->prop_walk($gs->{path}, $r, sub { + $gs->prop_walk($gs->path, $r, sub { my ($gs, $path, $props) = @_; # $path is of the form /path/to/dir/ $path = '.' . $path; @@ -1292,31 +1301,6 @@ sub cmd_mkdirs { $gs->mkemptydirs($_revision); } -sub canonicalize_path { - my ($path) = @_; - my $dot_slash_added = 0; - if (substr($path, 0, 1) ne "/") { - $path = "./" . $path; - $dot_slash_added = 1; - } - # File::Spec->canonpath doesn't collapse x/../y into y (for a - # good reason), so let's do this manually. - $path =~ s#/+#/#g; - $path =~ s#/\.(?:/|$)#/#g; - $path =~ s#/[^/]+/\.\.##g; - $path =~ s#/$##g; - $path =~ s#^\./## if $dot_slash_added; - $path =~ s#^/##; - $path =~ s#^\.$##; - return $path; -} - -sub canonicalize_url { - my ($url) = @_; - $url =~ s#^([^:]+://[^/]*/)(.*)$#$1 . canonicalize_path($2)#e; - return $url; -} - # get_svnprops(PATH) # ------------------ # Helper for cmd_propget and cmd_proplist below. @@ -1330,7 +1314,7 @@ sub get_svnprops { $path = $cmd_dir_prefix . $path; fatal("No such file or directory: $path") unless -e $path; my $is_dir = -d $path ? 1 : 0; - $path = $gs->{path} . '/' . $path; + $path = join_paths($gs->{path}, $path); # canonicalize the path (otherwise libsvn will abort or fail to # find the file) @@ -1431,8 +1415,8 @@ sub cmd_commit_diff { fatal("Needed URL or usable git-svn --id in ", "the command-line\n", $usage); } - $url = $gs->{url}; - $svn_path = $gs->{path}; + $url = $gs->url; + $svn_path = $gs->path; } unless (defined $_revision) { fatal("-r|--revision is a required argument\n", $usage); @@ -1466,24 +1450,6 @@ sub cmd_commit_diff { } } -sub escape_uri_only { - my ($uri) = @_; - my @tmp; - foreach (split m{/}, $uri) { - s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; - push @tmp, $_; - } - join('/', @tmp); -} - -sub escape_url { - my ($url) = @_; - if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) { - my ($scheme, $domain, $uri) = ($1, $2, escape_uri_only($3)); - $url = "$scheme://$domain$uri"; - } - $url; -} sub cmd_info { my $path = canonicalize_path(defined($_[0]) ? $_[0] : "."); @@ -1508,21 +1474,21 @@ sub cmd_info { # canonicalize_path() will return "" to make libsvn 1.5.x happy, $path = "." if $path eq ""; - my $full_url = $url . ($fullpath eq "" ? "" : "/$fullpath"); + my $full_url = canonicalize_url( add_path_to_url( $url, $fullpath ) ); if ($_url) { - print escape_url($full_url), "\n"; + print "$full_url\n"; return; } my $result = "Path: $path\n"; $result .= "Name: " . basename($path) . "\n" if $file_type ne "dir"; - $result .= "URL: " . escape_url($full_url) . "\n"; + $result .= "URL: $full_url\n"; eval { my $repos_root = $gs->repos_root; Git::SVN::remove_username($repos_root); - $result .= "Repository Root: " . escape_url($repos_root) . "\n"; + $result .= "Repository Root: " . canonicalize_url($repos_root) . "\n"; }; if ($@) { $result .= "Repository Root: (offline)\n"; @@ -1669,7 +1635,9 @@ sub post_fetch_checkout { sub complete_svn_url { my ($url, $path) = @_; - $path =~ s#/+$##; + $path = canonicalize_path($path); + + # If the path is not a URL... if ($path !~ m#^[a-z\+]+://#) { if (!defined $url || $url !~ m#^[a-z\+]+://#) { fatal("E: '$path' is not a complete URL ", @@ -1686,7 +1654,7 @@ sub complete_url_ls_init { print STDERR "W: $switch not specified\n"; return; } - $repo_path =~ s#/+$##; + $repo_path = canonicalize_path($repo_path); if ($repo_path =~ m#^[a-z\+]+://#) { $ra = Git::SVN::Ra->new($repo_path); $repo_path = ''; @@ -1697,18 +1665,18 @@ sub complete_url_ls_init { "and a separate URL is not specified"); } } - my $url = $ra->{url}; + my $url = $ra->url; my $gs = Git::SVN->init($url, undef, undef, undef, 1); my $k = "svn-remote.$gs->{repo_id}.url"; my $orig_url = eval { command_oneline(qw/config --get/, $k) }; - if ($orig_url && ($orig_url ne $gs->{url})) { + if ($orig_url && ($orig_url ne $gs->url)) { die "$k already set: $orig_url\n", - "wanted to set to: $gs->{url}\n"; + "wanted to set to: $gs->url\n"; } - command_oneline('config', $k, $gs->{url}) unless $orig_url; - my $remote_path = "$gs->{path}/$repo_path"; + command_oneline('config', $k, $gs->url) unless $orig_url; + + my $remote_path = join_paths( $gs->path, $repo_path ); $remote_path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg; - $remote_path =~ s#/+#/#g; $remote_path =~ s#^/##g; $remote_path .= "/*" if $remote_path !~ /\*/; my ($n) = ($switch =~ /^--(\w+)/); diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 8478d0c95..acb25394f 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -23,7 +23,14 @@ use Git qw( command_output_pipe command_close_pipe ); -use Git::SVN::Utils qw(fatal can_compress); +use Git::SVN::Utils qw( + fatal + can_compress + join_paths + canonicalize_path + canonicalize_url + add_path_to_url +); my $can_use_yaml; BEGIN { @@ -195,9 +202,9 @@ sub read_all_remotes { } elsif (m!^(.+)\.usesvmprops=\s*(.*)\s*$!) { $r->{$1}->{svm} = {}; } elsif (m!^(.+)\.url=\s*(.*)\s*$!) { - $r->{$1}->{url} = $2; + $r->{$1}->{url} = canonicalize_url($2); } elsif (m!^(.+)\.pushurl=\s*(.*)\s*$!) { - $r->{$1}->{pushurl} = $2; + $r->{$1}->{pushurl} = canonicalize_url($2); } elsif (m!^(.+)\.ignore-refs=\s*(.*)\s*$!) { $r->{$1}->{ignore_refs_regex} = $2; } elsif (m!^(.+)\.(branches|tags)=$svn_refspec$!) { @@ -290,7 +297,7 @@ sub find_existing_remote { sub init_remote_config { my ($self, $url, $no_write) = @_; - $url =~ s!/+$!!; # strip trailing slash + $url = canonicalize_url($url); my $r = read_all_remotes(); my $existing = find_existing_remote($url, $r); if ($existing) { @@ -314,12 +321,10 @@ sub init_remote_config { print STDERR "Using higher level of URL: ", "$url => $min_url\n"; } - my $old_path = $self->{path}; - $self->{path} = $url; - $self->{path} =~ s!^\Q$min_url\E(/|$)!!; - if (length $old_path) { - $self->{path} .= "/$old_path"; - } + my $old_path = $self->path; + $url =~ s!^\Q$min_url\E(/|$)!!; + $url = join_paths($url, $old_path); + $self->path($url); $url = $min_url; } } @@ -343,18 +348,22 @@ sub init_remote_config { unless ($no_write) { command_noisy('config', "svn-remote.$self->{repo_id}.url", $url); - $self->{path} =~ s{^/}{}; - $self->{path} =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg; + my $path = $self->path; + $path =~ s{^/}{}; + $path =~ s{%([0-9A-F]{2})}{chr hex($1)}ieg; + $self->path($path); command_noisy('config', '--add', "svn-remote.$self->{repo_id}.fetch", - "$self->{path}:".$self->refname); + $self->path.":".$self->refname); } - $self->{url} = $url; + $self->url($url); } sub find_by_url { # repos_root and, path are optional my ($class, $full_url, $repos_root, $path) = @_; + $full_url = canonicalize_url($full_url); + return undef unless defined $full_url; remove_username($full_url); remove_username($repos_root) if defined $repos_root; @@ -393,6 +402,11 @@ sub find_by_url { # repos_root and, path are optional } $p =~ s#^\Q$z\E(?:/|$)#$prefix# or next; } + + # remote fetch paths are not URI escaped. Decode ours + # so they match + $p = uri_decode($p); + foreach my $f (keys %$fetch) { next if $f ne $p; return Git::SVN->new($fetch->{$f}, $repo_id, $f); @@ -435,20 +449,25 @@ sub new { } } my $self = _new($class, $repo_id, $ref_id, $path); - if (!defined $self->{path} || !length $self->{path}) { + if (!defined $self->path || !length $self->path) { my $fetch = command_oneline('config', '--get', "svn-remote.$repo_id.fetch", ":$ref_id\$") or die "Failed to read \"svn-remote.$repo_id.fetch\" ", "\":$ref_id\$\" in config\n"; - ($self->{path}, undef) = split(/\s*:\s*/, $fetch); + my($path) = split(/\s*:\s*/, $fetch); + $self->path($path); } - $self->{path} =~ s{/+}{/}g; - $self->{path} =~ s{\A/}{}; - $self->{path} =~ s{/\z}{}; - $self->{url} = command_oneline('config', '--get', - "svn-remote.$repo_id.url") or + { + my $path = $self->path; + $path =~ s{\A/}{}; + $path =~ s{/\z}{}; + $self->path($path); + } + my $url = command_oneline('config', '--get', + "svn-remote.$repo_id.url") or die "Failed to read \"svn-remote.$repo_id.url\" in config\n"; + $self->url($url); $self->{pushurl} = eval { command_oneline('config', '--get', "svn-remote.$repo_id.pushurl") }; $self->rebuild; @@ -552,8 +571,7 @@ sub _set_svm_vars { # username is of no interest $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1}; - my $replace = $ra->{url}; - $replace .= "/$path" if length $path; + my $replace = add_path_to_url($ra->url, $path); my $section = "svn-remote.$self->{repo_id}"; tmp_config("$section.svm-source", $src); @@ -567,20 +585,21 @@ sub _set_svm_vars { } my $r = $ra->get_latest_revnum; - my $path = $self->{path}; + my $path = $self->path; my %tried; while (length $path) { - unless ($tried{"$self->{url}/$path"}) { + my $try = add_path_to_url($self->url, $path); + unless ($tried{$try}) { return $ra if $self->read_svm_props($ra, $path, $r); - $tried{"$self->{url}/$path"} = 1; + $tried{$try} = 1; } $path =~ s#/?[^/]+$##; } die "Path: '$path' should be ''\n" if $path ne ''; return $ra if $self->read_svm_props($ra, $path, $r); - $tried{"$self->{url}/$path"} = 1; + $tried{ add_path_to_url($self->url, $path) } = 1; - if ($ra->{repos_root} eq $self->{url}) { + if ($ra->{repos_root} eq $self->url) { die @err, (map { " $_\n" } keys %tried), "\n"; } @@ -590,20 +609,21 @@ sub _set_svm_vars { $path = $ra->{svn_path}; $ra = Git::SVN::Ra->new($ra->{repos_root}); while (length $path) { - unless ($tried{"$ra->{url}/$path"}) { + my $try = add_path_to_url($ra->url, $path); + unless ($tried{$try}) { $ok = $self->read_svm_props($ra, $path, $r); last if $ok; - $tried{"$ra->{url}/$path"} = 1; + $tried{$try} = 1; } $path =~ s#/?[^/]+$##; } die "Path: '$path' should be ''\n" if $path ne ''; $ok ||= $self->read_svm_props($ra, $path, $r); - $tried{"$ra->{url}/$path"} = 1; + $tried{ add_path_to_url($ra->url, $path) } = 1; if (!$ok) { die @err, (map { " $_\n" } keys %tried), "\n"; } - Git::SVN::Ra->new($self->{url}); + Git::SVN::Ra->new($self->url); } sub svnsync { @@ -670,7 +690,7 @@ sub ra_uuid { if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/i) { $self->{ra_uuid} = $uuid; } else { - die "ra_uuid called without URL\n" unless $self->{url}; + die "ra_uuid called without URL\n" unless $self->url; $self->{ra_uuid} = $self->ra->get_uuid; tmp_config('--add', $key, $self->{ra_uuid}); } @@ -694,7 +714,7 @@ sub repos_root { sub ra { my ($self) = shift; - my $ra = Git::SVN::Ra->new($self->{url}); + my $ra = Git::SVN::Ra->new($self->url); $self->_set_repos_root($ra->{repos_root}); if ($self->use_svm_props && !$self->{svm}) { if ($self->no_metadata) { @@ -728,7 +748,7 @@ sub prop_walk { $path =~ s#^/*#/#g; my $p = $path; # Strip the irrelevant part of the path. - $p =~ s#^/+\Q$self->{path}\E(/|$)#/#; + $p =~ s#^/+\Q@{[$self->path]}\E(/|$)#/#; # Ensure the path is terminated by a `/'. $p =~ s#/*$#/#; @@ -749,7 +769,7 @@ sub prop_walk { foreach (sort keys %$dirent) { next if $dirent->{$_}->{kind} != $SVN::Node::dir; - $self->prop_walk($self->{path} . $p . $_, $rev, $sub); + $self->prop_walk($self->path . $p . $_, $rev, $sub); } } @@ -919,20 +939,19 @@ sub rewrite_uuid { sub metadata_url { my ($self) = @_; - ($self->rewrite_root || $self->{url}) . - (length $self->{path} ? '/' . $self->{path} : ''); + my $url = $self->rewrite_root || $self->url; + return canonicalize_url( add_path_to_url( $url, $self->path ) ); } sub full_url { my ($self) = @_; - $self->{url} . (length $self->{path} ? '/' . $self->{path} : ''); + return canonicalize_url( add_path_to_url( $self->url, $self->path ) ); } sub full_pushurl { my ($self) = @_; if ($self->{pushurl}) { - return $self->{pushurl} . (length $self->{path} ? '/' . - $self->{path} : ''); + return canonicalize_url( add_path_to_url( $self->{pushurl}, $self->path ) ); } else { return $self->full_url; } @@ -1048,20 +1067,20 @@ sub do_git_commit { sub match_paths { my ($self, $paths, $r) = @_; - return 1 if $self->{path} eq ''; - if (my $path = $paths->{"/$self->{path}"}) { + return 1 if $self->path eq ''; + if (my $path = $paths->{"/".$self->path}) { return ($path->{action} eq 'D') ? 0 : 1; } - $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\//; + $self->{path_regex} ||= qr{^/\Q@{[$self->path]}\E/}; if (grep /$self->{path_regex}/, keys %$paths) { return 1; } my $c = ''; - foreach (split m#/#, $self->{path}) { + foreach (split m#/#, $self->path) { $c .= "/$_"; next unless ($paths->{$c} && ($paths->{$c}->{action} =~ /^[AR]$/)); - if ($self->ra->check_path($self->{path}, $r) == + if ($self->ra->check_path($self->path, $r) == $SVN::Node::dir) { return 1; } @@ -1075,14 +1094,14 @@ sub find_parent_branch { unless (defined $paths) { my $err_handler = $SVN::Error::handler; $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs; - $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1, + $self->ra->get_log([$self->path], $rev, $rev, 0, 1, 1, sub { $paths = $_[0] }); $SVN::Error::handler = $err_handler; } return undef unless defined $paths; # look for a parent from another branch: - my @b_path_components = split m#/#, $self->{path}; + my @b_path_components = split m#/#, $self->path; my @a_path_components; my $i; while (@b_path_components) { @@ -1099,8 +1118,8 @@ sub find_parent_branch { } my $r = $i->{copyfrom_rev}; my $repos_root = $self->ra->{repos_root}; - my $url = $self->ra->{url}; - my $new_url = $url . $branch_from; + my $url = $self->ra->url; + my $new_url = canonicalize_url( add_path_to_url( $url, $branch_from ) ); print STDERR "Found possible branch point: ", "$new_url => ", $self->full_url, ", $r\n" unless $::_q > 1; @@ -1114,7 +1133,7 @@ sub find_parent_branch { ($base, $head) = parse_revision_argument(0, $r); } else { if ($r0 < $r) { - $gs->ra->get_log([$gs->{path}], $r0 + 1, $r, 1, + $gs->ra->get_log([$gs->path], $r0 + 1, $r, 1, 0, 1, sub { $base = $_[1] - 1 }); } } @@ -1136,7 +1155,7 @@ sub find_parent_branch { # at the moment), so we can't rely on it $self->{last_rev} = $r0; $self->{last_commit} = $parent; - $ed = Git::SVN::Fetcher->new($self, $gs->{path}); + $ed = Git::SVN::Fetcher->new($self, $gs->path); $gs->ra->gs_do_switch($r0, $rev, $gs, $self->full_url, $ed) or die "SVN connection failed somewhere...\n"; @@ -1235,7 +1254,7 @@ sub mkemptydirs { close $fh; } - my $strip = qr/\A\Q$self->{path}\E(?:\/|$)/; + my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/; foreach my $d (sort keys %empty_dirs) { $d = uri_decode($d); $d =~ s/$strip//; @@ -1429,12 +1448,11 @@ sub find_extra_svk_parents { for my $ticket ( @tickets ) { my ($uuid, $path, $rev) = split /:/, $ticket; if ( $uuid eq $self->ra_uuid ) { - my $url = $self->{url}; - my $repos_root = $url; + my $repos_root = $self->url; my $branch_from = $path; $branch_from =~ s{^/}{}; - my $gs = $self->other_gs($repos_root."/".$branch_from, - $url, + my $gs = $self->other_gs(add_path_to_url( $repos_root, $branch_from ), + $repos_root, $branch_from, $rev, $self->{ref_id}); @@ -1693,7 +1711,7 @@ sub find_extra_svn_parents { # are now marked as merge, we can add the tip as a parent. my @merges = split "\n", $mergeinfo; my @merge_tips; - my $url = $self->{url}; + my $url = $self->url; my $uuid = $self->ra_uuid; my %ranges; for my $merge ( @merges ) { @@ -1875,8 +1893,9 @@ sub make_log_entry { $email ||= "$author\@$uuid"; $commit_email ||= "$author\@$uuid"; } elsif ($self->use_svnsync_props) { - my $full_url = $self->svnsync->{url}; - $full_url .= "/$self->{path}" if length $self->{path}; + my $full_url = canonicalize_url( + add_path_to_url( $self->svnsync->{url}, $self->path ) + ); remove_username($full_url); my $uuid = $self->svnsync->{uuid}; $log_entry{metadata} = "$full_url\@$rev $uuid"; @@ -1923,7 +1942,7 @@ sub set_tree { tree_b => $tree, editor_cb => sub { $self->set_tree_cb($log_entry, $tree, @_) }, - svn_path => $self->{path} ); + svn_path => $self->path ); if (!Git::SVN::Editor->new(\%ed_opts)->apply_diff) { print "No changes\nr$self->{last_rev} = $tree\n"; } @@ -2299,10 +2318,39 @@ sub _new { $_[3] = $path = '' unless (defined $path); mkpath([$dir]); - bless { + my $obj = bless { ref_id => $ref_id, dir => $dir, index => "$dir/index", - path => $path, config => "$ENV{GIT_DIR}/svn/config", + config => "$ENV{GIT_DIR}/svn/config", map_root => "$dir/.rev_map", repo_id => $repo_id }, $class; + + # Ensure it gets canonicalized + $obj->path($path); + + return $obj; +} + +sub path { + my $self = shift; + + if (@_) { + my $path = shift; + $self->{path} = canonicalize_path($path); + return; + } + + return $self->{path}; +} + +sub url { + my $self = shift; + + if (@_) { + my $url = shift; + $self->{url} = canonicalize_url($url); + return; + } + + return $self->{url}; } # for read-only access of old .rev_db formats diff --git a/perl/Git/SVN/Fetcher.pm b/perl/Git/SVN/Fetcher.pm index 76fae9bce..046a7a2f3 100644 --- a/perl/Git/SVN/Fetcher.pm +++ b/perl/Git/SVN/Fetcher.pm @@ -83,7 +83,7 @@ sub _mark_empty_symlinks { chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`); my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt); local $/ = "\0"; - my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path}; + my $pfx = defined($switch_path) ? $switch_path : $git_svn->path; $pfx .= '/' if length($pfx); while (<$ls>) { chomp; diff --git a/perl/Git/SVN/Migration.pm b/perl/Git/SVN/Migration.pm index 75d74298e..30daf3546 100644 --- a/perl/Git/SVN/Migration.pm +++ b/perl/Git/SVN/Migration.pm @@ -177,14 +177,14 @@ sub minimize_connections { my $ra = Git::SVN::Ra->new($url); # skip existing cases where we already connect to the root - if (($ra->{url} eq $ra->{repos_root}) || + if (($ra->url eq $ra->{repos_root}) || ($ra->{repos_root} eq $repo_id)) { - $root_repos->{$ra->{url}} = $repo_id; + $root_repos->{$ra->url} = $repo_id; next; } my $root_ra = Git::SVN::Ra->new($ra->{repos_root}); - my $root_path = $ra->{url}; + my $root_path = $ra->url; $root_path =~ s#^\Q$ra->{repos_root}\E(/|$)##; foreach my $path (keys %$fetch) { my $ref_id = $fetch->{$path}; diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm index 23ff43e86..90ec30bff 100644 --- a/perl/Git/SVN/Ra.pm +++ b/perl/Git/SVN/Ra.pm @@ -3,6 +3,12 @@ use vars qw/@ISA $config_dir $_ignore_refs_regex $_log_window_size/; use strict; use warnings; use SVN::Client; +use Git::SVN::Utils qw( + canonicalize_url + canonicalize_path + add_path_to_url +); + use SVN::Ra; BEGIN { @ISA = qw(SVN::Ra); @@ -62,29 +68,11 @@ sub _auth_providers () { \@rv; } -sub escape_uri_only { - my ($uri) = @_; - my @tmp; - foreach (split m{/}, $uri) { - s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; - push @tmp, $_; - } - join('/', @tmp); -} - -sub escape_url { - my ($url) = @_; - if ($url =~ m#^(https?)://([^/]+)(.*)$#) { - my ($scheme, $domain, $uri) = ($1, $2, escape_uri_only($3)); - $url = "$scheme://$domain$uri"; - } - $url; -} sub new { my ($class, $url) = @_; - $url =~ s!/+$!!; - return $RA if ($RA && $RA->{url} eq $url); + $url = canonicalize_url($url); + return $RA if ($RA && $RA->url eq $url); ::_req_svn(); @@ -115,17 +103,34 @@ sub new { $Git::SVN::Prompt::_no_auth_cache = 1; } } # no warnings 'once' - my $self = SVN::Ra->new(url => escape_url($url), auth => $baton, + + my $self = SVN::Ra->new(url => $url, auth => $baton, config => $config, pool => SVN::Pool->new, auth_provider_callbacks => $callbacks); - $self->{url} = $url; + $RA = bless $self, $class; + + # Make sure its canonicalized + $self->url($url); $self->{svn_path} = $url; $self->{repos_root} = $self->get_repos_root; $self->{svn_path} =~ s#^\Q$self->{repos_root}\E(/|$)##; $self->{cache} = { check_path => { r => 0, data => {} }, get_dir => { r => 0, data => {} } }; - $RA = bless $self, $class; + + return $RA; +} + +sub url { + my $self = shift; + + if (@_) { + my $url = shift; + $self->{url} = canonicalize_url($url); + return; + } + + return $self->{url}; } sub check_path { @@ -195,6 +200,7 @@ sub get_log { qw/copyfrom_path copyfrom_rev action/; if ($s{'copyfrom_path'}) { $s{'copyfrom_path'} =~ s/$prefix_regex//; + $s{'copyfrom_path'} = canonicalize_path($s{'copyfrom_path'}); } $_[0]{$p} = \%s; } @@ -246,7 +252,7 @@ sub get_commit_editor { sub gs_do_update { my ($self, $rev_a, $rev_b, $gs, $editor) = @_; my $new = ($rev_a == $rev_b); - my $path = $gs->{path}; + my $path = $gs->path; if ($new && -e $gs->{index}) { unlink $gs->{index} or die @@ -282,30 +288,33 @@ sub gs_do_update { # svn_ra_reparent didn't work before 1.4) sub gs_do_switch { my ($self, $rev_a, $rev_b, $gs, $url_b, $editor) = @_; - my $path = $gs->{path}; + my $path = $gs->path; my $pool = SVN::Pool->new; - my $full_url = $self->{url}; - my $old_url = $full_url; - $full_url .= '/' . $path if length $path; + my $old_url = $self->url; + my $full_url = add_path_to_url( $self->url, $path ); my ($ra, $reparented); if ($old_url =~ m#^svn(\+ssh)?://# || ($full_url =~ m#^https?://# && - escape_url($full_url) ne $full_url)) { + canonicalize_url($full_url) ne $full_url)) { $_[0] = undef; $self = undef; $RA = undef; $ra = Git::SVN::Ra->new($full_url); $ra_invalid = 1; } elsif ($old_url ne $full_url) { - SVN::_Ra::svn_ra_reparent($self->{session}, $full_url, $pool); - $self->{url} = $full_url; + SVN::_Ra::svn_ra_reparent( + $self->{session}, + canonicalize_url($full_url), + $pool + ); + $self->url($full_url); $reparented = 1; } $ra ||= $self; - $url_b = escape_url($url_b); + $url_b = canonicalize_url($url_b); my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool); my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : (); $reporter->set_path('', $rev_a, 0, @lock, $pool); @@ -313,7 +322,7 @@ sub gs_do_switch { if ($reparented) { SVN::_Ra::svn_ra_reparent($self->{session}, $old_url, $pool); - $self->{url} = $old_url; + $self->url($old_url); } $pool->clear; @@ -326,7 +335,7 @@ sub longest_common_path { my $common_max = scalar @$gsv; foreach my $gs (@$gsv) { - my @tmp = split m#/#, $gs->{path}; + my @tmp = split m#/#, $gs->path; my $p = ''; foreach (@tmp) { $p .= length($p) ? "/$_" : $_; @@ -362,7 +371,7 @@ sub gs_fetch_loop_common { my $inc = $_log_window_size; my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc); my $longest_path = longest_common_path($gsv, $globs); - my $ra_url = $self->{url}; + my $ra_url = $self->url; my $find_trailing_edge; while (1) { my %revs; @@ -508,7 +517,7 @@ sub match_globs { ($self->check_path($p, $r) != $SVN::Node::dir)); next unless $p =~ /$g->{path}->{regex}/; - $exists->{$p} = Git::SVN->init($self->{url}, $p, undef, + $exists->{$p} = Git::SVN->init($self->url, $p, undef, $g->{ref}->full_path($de), 1); } } @@ -532,7 +541,7 @@ sub match_globs { next if ($self->check_path($pathname, $r) != $SVN::Node::dir); $exists->{$pathname} = Git::SVN->init( - $self->{url}, $pathname, undef, + $self->url, $pathname, undef, $g->{ref}->full_path($p), 1); } my $c = ''; @@ -548,19 +557,20 @@ sub match_globs { sub minimize_url { my ($self) = @_; - return $self->{url} if ($self->{url} eq $self->{repos_root}); + return $self->url if ($self->url eq $self->{repos_root}); my $url = $self->{repos_root}; my @components = split(m!/!, $self->{svn_path}); my $c = ''; do { - $url .= "/$c" if length $c; + $url = add_path_to_url($url, $c); eval { my $ra = (ref $self)->new($url); my $latest = $ra->get_latest_revnum; $ra->get_log("", $latest, 0, 1, 0, 1, sub {}); }; } while ($@ && ($c = shift @components)); - $url; + + return canonicalize_url($url); } sub can_do_switch { @@ -568,7 +578,7 @@ sub can_do_switch { unless (defined $can_do_switch) { my $pool = SVN::Pool->new; my $rep = eval { - $self->do_switch(1, '', 0, $self->{url}, + $self->do_switch(1, '', 0, $self->url, SVN::Delta::Editor->new, $pool); }; if ($@) { diff --git a/perl/Git/SVN/Utils.pm b/perl/Git/SVN/Utils.pm index 496006bc7..4bb4dde89 100644 --- a/perl/Git/SVN/Utils.pm +++ b/perl/Git/SVN/Utils.pm @@ -3,9 +3,18 @@ package Git::SVN::Utils; use strict; use warnings; +use SVN::Core; + use base qw(Exporter); -our @EXPORT_OK = qw(fatal can_compress); +our @EXPORT_OK = qw( + fatal + can_compress + canonicalize_path + canonicalize_url + join_paths + add_path_to_url +); =head1 NAME @@ -56,4 +65,169 @@ sub can_compress { } +=head3 canonicalize_path + + my $canoncalized_path = canonicalize_path($path); + +Converts $path into a canonical form which is safe to pass to the SVN +API as a file path. + +=cut + +# Turn foo/../bar into bar +sub _collapse_dotdot { + my $path = shift; + + 1 while $path =~ s{/[^/]+/+\.\.}{}; + 1 while $path =~ s{[^/]+/+\.\./}{}; + 1 while $path =~ s{[^/]+/+\.\.}{}; + + return $path; +} + + +sub canonicalize_path { + my $path = shift; + my $rv; + + # The 1.7 way to do it + if ( defined &SVN::_Core::svn_dirent_canonicalize ) { + $path = _collapse_dotdot($path); + $rv = SVN::_Core::svn_dirent_canonicalize($path); + } + # The 1.6 way to do it + # This can return undef on subversion-perl-1.4.2-2.el5 (CentOS 5.2) + elsif ( defined &SVN::_Core::svn_path_canonicalize ) { + $path = _collapse_dotdot($path); + $rv = SVN::_Core::svn_path_canonicalize($path); + } + + return $rv if defined $rv; + + # No SVN API canonicalization is available, or the SVN API + # didn't return a successful result, do it ourselves + return _canonicalize_path_ourselves($path); +} + + +sub _canonicalize_path_ourselves { + my ($path) = @_; + my $dot_slash_added = 0; + if (substr($path, 0, 1) ne "/") { + $path = "./" . $path; + $dot_slash_added = 1; + } + $path =~ s#/+#/#g; + $path =~ s#/\.(?:/|$)#/#g; + $path = _collapse_dotdot($path); + $path =~ s#/$##g; + $path =~ s#^\./## if $dot_slash_added; + $path =~ s#^/##; + $path =~ s#^\.$##; + return $path; +} + + +=head3 canonicalize_url + + my $canonicalized_url = canonicalize_url($url); + +Converts $url into a canonical form which is safe to pass to the SVN +API as a URL. + +=cut + +sub canonicalize_url { + my $url = shift; + + # The 1.7 way to do it + if ( defined &SVN::_Core::svn_uri_canonicalize ) { + return SVN::_Core::svn_uri_canonicalize($url); + } + # There wasn't a 1.6 way to do it, so we do it ourself. + else { + return _canonicalize_url_ourselves($url); + } +} + + +sub _canonicalize_url_path { + my ($uri_path) = @_; + + my @parts; + foreach my $part (split m{/+}, $uri_path) { + $part =~ s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; + push @parts, $part; + } + + return join('/', @parts); +} + +sub _canonicalize_url_ourselves { + my ($url) = @_; + if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) { + my ($scheme, $domain, $uri) = ($1, $2, _canonicalize_url_path(canonicalize_path($3))); + $url = "$scheme://$domain$uri"; + } + $url; +} + + +=head3 join_paths + + my $new_path = join_paths(@paths); + +Appends @paths together into a single path. Any empty paths are ignored. + +=cut + +sub join_paths { + my @paths = @_; + + @paths = grep { defined $_ && length $_ } @paths; + + return '' unless @paths; + return $paths[0] if @paths == 1; + + my $new_path = shift @paths; + $new_path =~ s{/+$}{}; + + my $last_path = pop @paths; + $last_path =~ s{^/+}{}; + + for my $path (@paths) { + $path =~ s{^/+}{}; + $path =~ s{/+$}{}; + $new_path .= "/$path"; + } + + return $new_path .= "/$last_path"; +} + + +=head3 add_path_to_url + + my $new_url = add_path_to_url($url, $path); + +Appends $path onto the $url. If $path is empty, $url is returned unchanged. + +=cut + +sub add_path_to_url { + my($url, $path) = @_; + + return $url if !defined $path or !length $path; + + # Strip trailing and leading slashes so we don't + # wind up with http://x.com///path + $url =~ s{/+$}{}; + $path =~ s{^/+}{}; + + # If a path has a % in it, URI escape it so it's not + # mistaken for a URI escape later. + $path =~ s{%}{%25}g; + + return join '/', $url, $path; +} + 1; diff --git a/t/Git-SVN/Utils/add_path_to_url.t b/t/Git-SVN/Utils/add_path_to_url.t new file mode 100644 index 000000000..bfbd87845 --- /dev/null +++ b/t/Git-SVN/Utils/add_path_to_url.t @@ -0,0 +1,27 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use Test::More 'no_plan'; + +use Git::SVN::Utils qw( + add_path_to_url +); + +# A reference cannot be a hash key, so we use an array. +my @tests = ( + ["http://x.com", "bar"] => 'http://x.com/bar', + ["http://x.com", ""] => 'http://x.com', + ["http://x.com/foo/", undef] => 'http://x.com/foo/', + ["http://x.com/foo/", "/bar/baz/"] => 'http://x.com/foo/bar/baz/', + ["http://x.com", 'per%cent'] => 'http://x.com/per%25cent', +); + +while(@tests) { + my($have, $want) = splice @tests, 0, 2; + + my $args = join ", ", map { qq['$_'] } map { defined($_) ? $_ : 'undef' } @$have; + my $name = "add_path_to_url($args) eq $want"; + is add_path_to_url(@$have), $want, $name; +} diff --git a/t/Git-SVN/Utils/canonicalize_url.t b/t/Git-SVN/Utils/canonicalize_url.t new file mode 100644 index 000000000..05795ab63 --- /dev/null +++ b/t/Git-SVN/Utils/canonicalize_url.t @@ -0,0 +1,26 @@ +#!/usr/bin/env perl + +# Test our own home rolled URL canonicalizer. Test the private one +# directly because we can't predict what the SVN API is doing to do. + +use strict; +use warnings; + +use Test::More 'no_plan'; + +use Git::SVN::Utils; +my $canonicalize_url = \&Git::SVN::Utils::_canonicalize_url_ourselves; + +my %tests = ( + "http://x.com" => "http://x.com", + "http://x.com/" => "http://x.com", + "http://x.com/foo/bar" => "http://x.com/foo/bar", + "http://x.com//foo//bar//" => "http://x.com/foo/bar", + "http://x.com/ /%/" => "http://x.com/%20%20/%25", +); + +for my $arg (keys %tests) { + my $want = $tests{$arg}; + + is $canonicalize_url->($arg), $want, "canonicalize_url('$arg') => $want"; +} diff --git a/t/Git-SVN/Utils/collapse_dotdot.t b/t/Git-SVN/Utils/collapse_dotdot.t new file mode 100644 index 000000000..1da1cce15 --- /dev/null +++ b/t/Git-SVN/Utils/collapse_dotdot.t @@ -0,0 +1,23 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use Test::More 'no_plan'; + +use Git::SVN::Utils; +my $collapse_dotdot = \&Git::SVN::Utils::_collapse_dotdot; + +my %tests = ( + "foo/bar/baz" => "foo/bar/baz", + ".." => "..", + "foo/.." => "", + "/foo/bar/../../baz" => "/baz", + "deeply/.././deeply/nested" => "./deeply/nested", +); + +for my $arg (keys %tests) { + my $want = $tests{$arg}; + + is $collapse_dotdot->($arg), $want, "_collapse_dotdot('$arg') => $want"; +} diff --git a/t/Git-SVN/Utils/join_paths.t b/t/Git-SVN/Utils/join_paths.t new file mode 100644 index 000000000..d4488e716 --- /dev/null +++ b/t/Git-SVN/Utils/join_paths.t @@ -0,0 +1,32 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use Test::More 'no_plan'; + +use Git::SVN::Utils qw( + join_paths +); + +# A reference cannot be a hash key, so we use an array. +my @tests = ( + [] => '', + ["/x.com", "bar"] => '/x.com/bar', + ["x.com", ""] => 'x.com', + ["/x.com/foo/", undef, "bar"] => '/x.com/foo/bar', + ["x.com/foo/", "/bar/baz/"] => 'x.com/foo/bar/baz/', + ["foo", "bar"] => 'foo/bar', + ["/foo/bar", "baz", "/biff"] => '/foo/bar/baz/biff', + ["", undef, "."] => '.', + [] => '', + +); + +while(@tests) { + my($have, $want) = splice @tests, 0, 2; + + my $args = join ", ", map { qq['$_'] } map { defined($_) ? $_ : 'undef' } @$have; + my $name = "join_paths($args) eq '$want'"; + is join_paths(@$have), $want, $name; +} diff --git a/t/t9107-git-svn-migrate.sh b/t/t9107-git-svn-migrate.sh index 289fc313f..ee73013ee 100755 --- a/t/t9107-git-svn-migrate.sh +++ b/t/t9107-git-svn-migrate.sh @@ -27,15 +27,17 @@ test_expect_success 'setup old-looking metadata' ' head=`git rev-parse --verify refs/heads/git-svn-HEAD^0` test_expect_success 'git-svn-HEAD is a real HEAD' "test -n '$head'" +svnrepo_escaped=`echo $svnrepo | sed 's/ /%20/'` + test_expect_success 'initialize old-style (v0) git svn layout' ' mkdir -p "$GIT_DIR"/git-svn/info "$GIT_DIR"/svn/info && echo "$svnrepo" > "$GIT_DIR"/git-svn/info/url && echo "$svnrepo" > "$GIT_DIR"/svn/info/url && git svn migrate && - ! test -d "$GIT_DIR"/git svn && + ! test -d "$GIT_DIR"/git-svn && git rev-parse --verify refs/${remotes_git_svn}^0 && git rev-parse --verify refs/remotes/svn^0 && - test "$(git config --get svn-remote.svn.url)" = "$svnrepo" && + test "$(git config --get svn-remote.svn.url)" = "$svnrepo_escaped" && test `git config --get svn-remote.svn.fetch` = \ ":refs/${remotes_git_svn}" ' diff --git a/t/t9118-git-svn-funky-branch-names.sh b/t/t9118-git-svn-funky-branch-names.sh index 63fc982c8..193d3cabd 100755 --- a/t/t9118-git-svn-funky-branch-names.sh +++ b/t/t9118-git-svn-funky-branch-names.sh @@ -32,6 +32,11 @@ test_expect_success 'setup svnrepo' ' start_httpd ' +# SVN 1.7 will truncate "not-a%40{0]" to just "not-a". +# Look at what SVN wound up naming the branch and use that. +# Be sure to escape the @ if it shows up. +non_reflog=`svn_cmd ls "$svnrepo/pr ject/branches" | grep not-a | sed 's/\///' | sed 's/@/%40/'` + test_expect_success 'test clone with funky branch names' ' git svn clone -s "$svnrepo/pr ject" project && ( @@ -42,7 +47,7 @@ test_expect_success 'test clone with funky branch names' ' git rev-parse "refs/remotes/%2Eleading_dot" && git rev-parse "refs/remotes/trailing_dot%2E" && git rev-parse "refs/remotes/trailing_dotlock%2Elock" && - git rev-parse "refs/remotes/not-a%40{0}reflog" + git rev-parse "refs/remotes/$non_reflog" ) ' |