From 462d97daf69951f968f16b6271de9db34f7dd13c Mon Sep 17 00:00:00 2001 From: Matthieu Moy Date: Thu, 29 Nov 2012 18:00:55 +0100 Subject: git-remote-mediawiki: escape ", \, and LF in file names A mediawiki page can contain, and even start with a " character, we have to escape it when generating the fast-export stream, as well as \ character. While we're there, also escape newlines, but I don't think we can get them from MediaWiki pages. Signed-off-by: Matthieu Moy Signed-off-by: Junio C Hamano --- contrib/mw-to-git/git-remote-mediawiki | 16 +++++++++++++--- contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh | 26 ++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/contrib/mw-to-git/git-remote-mediawiki b/contrib/mw-to-git/git-remote-mediawiki index 68555d426..094129de0 100755 --- a/contrib/mw-to-git/git-remote-mediawiki +++ b/contrib/mw-to-git/git-remote-mediawiki @@ -711,6 +711,14 @@ sub fetch_mw_revisions { return ($n, @revisions); } +sub fe_escape_path { + my $path = shift; + $path =~ s/\\/\\\\/g; + $path =~ s/"/\\"/g; + $path =~ s/\n/\\n/g; + return '"' . $path . '"'; +} + sub import_file_revision { my $commit = shift; my %commit = %{$commit}; @@ -738,15 +746,17 @@ sub import_file_revision { print STDOUT "from refs/mediawiki/$remotename/master^0\n"; } if ($content ne DELETED_CONTENT) { - print STDOUT "M 644 inline $title.mw\n"; + print STDOUT "M 644 inline " . + fe_escape_path($title . ".mw") . "\n"; literal_data($content); if (%mediafile) { - print STDOUT "M 644 inline $mediafile{title}\n"; + print STDOUT "M 644 inline " + . fe_escape_path($mediafile{title}) . "\n"; literal_data_raw($mediafile{content}); } print STDOUT "\n\n"; } else { - print STDOUT "D $title.mw\n"; + print STDOUT "D " . fe_escape_path($title . ".mw") . "\n"; } # mediawiki revision number in the git note diff --git a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh index 246d47d8f..b6405ce26 100755 --- a/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh +++ b/contrib/mw-to-git/t/t9362-mw-to-git-utf8.sh @@ -318,4 +318,30 @@ test_expect_success 'git push with \ in format control' ' ' +test_expect_success 'fast-import meta-characters in page name (mw -> git)' ' + wiki_reset && + wiki_editpage \"file\"_\\_foo "expect to be called \"file\"_\\_foo" false && + git clone mediawiki::'"$WIKI_URL"' mw_dir_21 && + test_path_is_file mw_dir_21/\"file\"_\\_foo.mw && + wiki_getallpage ref_page_21 && + test_diff_directories mw_dir_21 ref_page_21 +' + + +test_expect_success 'fast-import meta-characters in page name (git -> mw) ' ' + wiki_reset && + git clone mediawiki::'"$WIKI_URL"' mw_dir_22 && + ( + cd mw_dir_22 && + echo "this file is called \"file\"_\\_foo.mw" >\"file\"_\\_foo && + git add . && + git commit -am "file \"file\"_\\_foo" && + git pull && + git push + ) && + wiki_getallpage ref_page_22 && + test_diff_directories mw_dir_22 ref_page_22 +' + + test_done -- cgit v1.2.1