aboutsummaryrefslogtreecommitdiff
path: root/contrib/mw-to-git/git-remote-mediawiki
blob: e929b7a728c04be911783e005afa5a49bcc1a48c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
#! /usr/bin/perl

# Copyright (C) 2011
#     Jérémie Nikaes <jeremie.nikaes@ensimag.imag.fr>
#     Arnaud Lacurie <arnaud.lacurie@ensimag.imag.fr>
#     Claire Fousse <claire.fousse@ensimag.imag.fr>
#     David Amouyal <david.amouyal@ensimag.imag.fr>
#     Matthieu Moy <matthieu.moy@grenoble-inp.fr>
# License: GPL v2 or later

# Gateway between Git and MediaWiki.
#   https://github.com/Bibzball/Git-Mediawiki/wiki
#
# Known limitations:
#
# - Only wiki pages are managed, no support for [[File:...]]
#   attachments.
#
# - Poor performance in the best case: it takes forever to check
#   whether we're up-to-date (on fetch or push) or to fetch a few
#   revisions from a large wiki, because we use exclusively a
#   page-based synchronization. We could switch to a wiki-wide
#   synchronization when the synchronization involves few revisions
#   but the wiki is large.
#
# - Git renames could be turned into MediaWiki renames (see TODO
#   below)
#
# - login/password support requires the user to write the password
#   cleartext in a file (see TODO below).
#
# - No way to import "one page, and all pages included in it"
#
# - Multiple remote MediaWikis have not been very well tested.

use strict;
use MediaWiki::API;
use DateTime::Format::ISO8601;
use encoding 'utf8';

# use encoding 'utf8' doesn't change STDERROR
# but we're going to output UTF-8 filenames to STDERR
binmode STDERR, ":utf8";

use URI::Escape;
use IPC::Open2;

use warnings;

# Mediawiki filenames can contain forward slashes. This variable decides by which pattern they should be replaced
use constant SLASH_REPLACEMENT => "%2F";

# It's not always possible to delete pages (may require some
# priviledges). Deleted pages are replaced with this content.
use constant DELETED_CONTENT => "[[Category:Deleted]]\n";

# It's not possible to create empty pages. New empty files in Git are
# sent with this content instead.
use constant EMPTY_CONTENT => "<!-- empty page -->\n";

# used to reflect file creation or deletion in diff.
use constant NULL_SHA1 => "0000000000000000000000000000000000000000";

my $remotename = $ARGV[0];
my $url = $ARGV[1];

# Accept both space-separated and multiple keys in config file.
# Spaces should be written as _ anyway because we'll use chomp.
my @tracked_pages = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".pages"));
chomp(@tracked_pages);

# Just like @tracked_pages, but for MediaWiki categories.
my @tracked_categories = split(/[ \n]/, run_git("config --get-all remote.". $remotename .".categories"));
chomp(@tracked_categories);

my $wiki_login = run_git("config --get remote.". $remotename .".mwLogin");
# TODO: ideally, this should be able to read from keyboard, but we're
# inside a remote helper, so our stdin is connect to git, not to a
# terminal.
my $wiki_passwd = run_git("config --get remote.". $remotename .".mwPassword");
my $wiki_domain = run_git("config --get remote.". $remotename .".mwDomain");
chomp($wiki_login);
chomp($wiki_passwd);
chomp($wiki_domain);

# Import only last revisions (both for clone and fetch)
my $shallow_import = run_git("config --get --bool remote.". $remotename .".shallow");
chomp($shallow_import);
$shallow_import = ($shallow_import eq "true");

# Dumb push: don't update notes and mediawiki ref to reflect the last push.
#
# Configurable with mediawiki.dumbPush, or per-remote with
# remote.<remotename>.dumbPush.
#
# This means the user will have to re-import the just-pushed
# revisions. On the other hand, this means that the Git revisions
# corresponding to MediaWiki revisions are all imported from the wiki,
# regardless of whether they were initially created in Git or from the
# web interface, hence all users will get the same history (i.e. if
# the push from Git to MediaWiki loses some information, everybody
# will get the history with information lost). If the import is
# deterministic, this means everybody gets the same sha1 for each
# MediaWiki revision.
my $dumb_push = run_git("config --get --bool remote.$remotename.dumbPush");
unless ($dumb_push) {
	$dumb_push = run_git("config --get --bool mediawiki.dumbPush");
}
chomp($dumb_push);
$dumb_push = ($dumb_push eq "true");

my $wiki_name = $url;
$wiki_name =~ s/[^\/]*:\/\///;
# If URL is like http://user:password@example.com/, we clearly don't
# want the password in $wiki_name. While we're there, also remove user
# and '@' sign, to avoid author like MWUser@HTTPUser@host.com
$wiki_name =~ s/^.*@//;

# Commands parser
my $entry;
my @cmd;
while (<STDIN>) {
	chomp;
	@cmd = split(/ /);
	if (defined($cmd[0])) {
		# Line not blank
		if ($cmd[0] eq "capabilities") {
			die("Too many arguments for capabilities") unless (!defined($cmd[1]));
			mw_capabilities();
		} elsif ($cmd[0] eq "list") {
			die("Too many arguments for list") unless (!defined($cmd[2]));
			mw_list($cmd[1]);
		} elsif ($cmd[0] eq "import") {
			die("Invalid arguments for import") unless ($cmd[1] ne "" && !defined($cmd[2]));
			mw_import($cmd[1]);
		} elsif ($cmd[0] eq "option") {
			die("Too many arguments for option") unless ($cmd[1] ne "" && $cmd[2] ne "" && !defined($cmd[3]));
			mw_option($cmd[1],$cmd[2]);
		} elsif ($cmd[0] eq "push") {
			mw_push($cmd[1]);
		} else {
			print STDERR "Unknown command. Aborting...\n";
			last;
		}
	} else {
		# blank line: we should terminate
		last;
	}

	BEGIN { $| = 1 } # flush STDOUT, to make sure the previous
			 # command is fully processed.
}

########################## Functions ##############################

## credential API management (generic functions)

sub credential_from_url {
	my $url = shift;
	my $parsed = URI->new($url);
	my %credential;

	if ($parsed->scheme) {
		$credential{protocol} = $parsed->scheme;
	}
	if ($parsed->host) {
		$credential{host} = $parsed->host;
	}
	if ($parsed->path) {
		$credential{path} = $parsed->path;
	}
	if ($parsed->userinfo) {
		if ($parsed->userinfo =~ /([^:]*):(.*)/) {
			$credential{username} = $1;
			$credential{password} = $2;
		} else {
			$credential{username} = $parsed->userinfo;
		}
	}

	return %credential;
}

sub credential_read {
	my %credential;
	my $reader = shift;
	my $op = shift;
	while (<$reader>) {
		my ($key, $value) = /([^=]*)=(.*)/;
		if (not defined $key) {
			die "ERROR receiving response from git credential $op:\n$_\n";
		}
		$credential{$key} = $value;
	}
	return %credential;
}

sub credential_write {
	my $credential = shift;
	my $writer = shift;
	while (my ($key, $value) = each(%$credential) ) {
		if ($value) {
			print $writer "$key=$value\n";
		}
	}
}

sub credential_run {
	my $op = shift;
	my $credential = shift;
	my $pid = open2(my $reader, my $writer, "git credential $op");
	credential_write($credential, $writer);
	print $writer "\n";
	close($writer);

	if ($op eq "fill") {
		%$credential = credential_read($reader, $op);
	} else {
		if (<$reader>) {
			die "ERROR while running git credential $op:\n$_";
		}
	}
	close($reader);
	waitpid($pid, 0);
	my $child_exit_status = $? >> 8;
	if ($child_exit_status != 0) {
		die "'git credential $op' failed with code $child_exit_status.";
	}
}

# MediaWiki API instance, created lazily.
my $mediawiki;

sub mw_connect_maybe {
	if ($mediawiki) {
		return;
	}
	$mediawiki = MediaWiki::API->new;
	$mediawiki->{config}->{api_url} = "$url/api.php";
	if ($wiki_login) {
		my %credential = credential_from_url($url);
		$credential{username} = $wiki_login;
		$credential{password} = $wiki_passwd;
		credential_run("fill", \%credential);
		my $request = {lgname => $credential{username},
			       lgpassword => $credential{password},
			       lgdomain => $wiki_domain};
		if ($mediawiki->login($request)) {
			credential_run("approve", \%credential);
			print STDERR "Logged in mediawiki user \"$credential{username}\".\n";
		} else {
			print STDERR "Failed to log in mediawiki user \"$credential{username}\" on $url\n";
			print STDERR "  (error " .
				$mediawiki->{error}->{code} . ': ' .
				$mediawiki->{error}->{details} . ")\n";
			credential_run("reject", \%credential);
			exit 1;
		}
	}
}

sub get_mw_first_pages {
	my $some_pages = shift;
	my @some_pages = @{$some_pages};

	my $pages = shift;

	# pattern 'page1|page2|...' required by the API
	my $titles = join('|', @some_pages);

	my $mw_pages = $mediawiki->api({
		action => 'query',
		titles => $titles,
	});
	if (!defined($mw_pages)) {
		print STDERR "fatal: could not query the list of wiki pages.\n";
		print STDERR "fatal: '$url' does not appear to be a mediawiki\n";
		print STDERR "fatal: make sure '$url/api.php' is a valid page.\n";
		exit 1;
	}
	while (my ($id, $page) = each(%{$mw_pages->{query}->{pages}})) {
		if ($id < 0) {
			print STDERR "Warning: page $page->{title} not found on wiki\n";
		} else {
			$pages->{$page->{title}} = $page;
		}
	}
}

sub get_mw_pages {
	mw_connect_maybe();

	my %pages; # hash on page titles to avoid duplicates
	my $user_defined;
	if (@tracked_pages) {
		$user_defined = 1;
		# The user provided a list of pages titles, but we
		# still need to query the API to get the page IDs.

		my @some_pages = @tracked_pages;
		while (@some_pages) {
			my $last = 50;
			if ($#some_pages < $last) {
				$last = $#some_pages;
			}
			my @slice = @some_pages[0..$last];
			get_mw_first_pages(\@slice, \%pages);
			@some_pages = @some_pages[51..$#some_pages];
		}
	}
	if (@tracked_categories) {
		$user_defined = 1;
		foreach my $category (@tracked_categories) {
			if (index($category, ':') < 0) {
				# Mediawiki requires the Category
				# prefix, but let's not force the user
				# to specify it.
				$category = "Category:" . $category;
			}
			my $mw_pages = $mediawiki->list( {
				action => 'query',
				list => 'categorymembers',
				cmtitle => $category,
				cmlimit => 'max' } )
			    || die $mediawiki->{error}->{code} . ': ' . $mediawiki->{error}->{details};
			foreach my $page (@{$mw_pages}) {
				$pages{$page->{title}} = $page;
			}
		}
	}
	if (!$user_defined) {
		# No user-provided list, get the list of pages from
		# the API.
		my $mw_pages = $mediawiki->list({
			action => 'query',
			list => 'allpages',
			aplimit => 500,
		});
		if (!defined($mw_pages)) {
			print STDERR "fatal: could not get the list of wiki pages.\n";
			print STDERR "fatal: '$url' does not appear to be a mediawiki\n";
			print STDERR "fatal: make sure '$url/api.php' is a valid page.\n";
			exit 1;
		}
		foreach my $page (@{$mw_pages}) {
			$pages{$page->{title}} = $page;
		}
	}
	return values(%pages);
}

sub run_git {
	open(my $git, "-|:encoding(UTF-8)", "git " . $_[0]);
	my $res = do { local $/; <$git> };
	close($git);

	return $res;
}


sub get_last_local_revision {
	# Get note regarding last mediawiki revision
	my $note = run_git("notes --ref=$remotename/mediawiki show refs/mediawiki/$remotename/master 2>/dev/null");
	my @note_info = split(/ /, $note);

	my $lastrevision_number;
	if (!(defined($note_info[0]) && $note_info[0] eq "mediawiki_revision:")) {
		print STDERR "No previous mediawiki revision found";
		$lastrevision_number = 0;
	} else {
		# Notes are formatted : mediawiki_revision: #number
		$lastrevision_number = $note_info[1];
		chomp($lastrevision_number);
		print STDERR "Last local mediawiki revision found is $lastrevision_number";
	}
	return $lastrevision_number;
}

# Remember the timestamp corresponding to a revision id.
my %basetimestamps;

sub get_last_remote_revision {
	mw_connect_maybe();

	my @pages = get_mw_pages();

	my $max_rev_num = 0;

	foreach my $page (@pages) {
		my $id = $page->{pageid};

		my $query = {
			action => 'query',
			prop => 'revisions',
			rvprop => 'ids|timestamp',
			pageids => $id,
		};

		my $result = $mediawiki->api($query);

		my $lastrev = pop(@{$result->{query}->{pages}->{$id}->{revisions}});

		$basetimestamps{$lastrev->{revid}} = $lastrev->{timestamp};

		$max_rev_num = ($lastrev->{revid} > $max_rev_num ? $lastrev->{revid} : $max_rev_num);
	}

	print STDERR "Last remote revision found is $max_rev_num.\n";
	return $max_rev_num;
}

# Clean content before sending it to MediaWiki
sub mediawiki_clean {
	my $string = shift;
	my $page_created = shift;
	# Mediawiki does not allow blank space at the end of a page and ends with a single \n.
	# This function right trims a string and adds a \n at the end to follow this rule
	$string =~ s/\s+$//;
	if ($string eq "" && $page_created) {
		# Creating empty pages is forbidden.
		$string = EMPTY_CONTENT;
	}
	return $string."\n";
}

# Filter applied on MediaWiki data before adding them to Git
sub mediawiki_smudge {
	my $string = shift;
	if ($string eq EMPTY_CONTENT) {
		$string = "";
	}
	# This \n is important. This is due to mediawiki's way to handle end of files.
	return $string."\n";
}

sub mediawiki_clean_filename {
	my $filename = shift;
	$filename =~ s/@{[SLASH_REPLACEMENT]}/\//g;
	# [, ], |, {, and } are forbidden by MediaWiki, even URL-encoded.
	# Do a variant of URL-encoding, i.e. looks like URL-encoding,
	# but with _ added to prevent MediaWiki from thinking this is
	# an actual special character.
	$filename =~ s/[\[\]\{\}\|]/sprintf("_%%_%x", ord($&))/ge;
	# If we use the uri escape before
	# we should unescape here, before anything

	return $filename;
}

sub mediawiki_smudge_filename {
	my $filename = shift;
	$filename =~ s/\//@{[SLASH_REPLACEMENT]}/g;
	$filename =~ s/ /_/g;
	# Decode forbidden characters encoded in mediawiki_clean_filename
	$filename =~ s/_%_([0-9a-fA-F][0-9a-fA-F])/sprintf("%c", hex($1))/ge;
	return $filename;
}

sub literal_data {
	my ($content) = @_;
	print STDOUT "data ", bytes::length($content), "\n", $content;
}

sub mw_capabilities {
	# Revisions are imported to the private namespace
	# refs/mediawiki/$remotename/ by the helper and fetched into
	# refs/remotes/$remotename later by fetch.
	print STDOUT "refspec refs/heads/*:refs/mediawiki/$remotename/*\n";
	print STDOUT "import\n";
	print STDOUT "list\n";
	print STDOUT "push\n";
	print STDOUT "\n";
}

sub mw_list {
	# MediaWiki do not have branches, we consider one branch arbitrarily
	# called master, and HEAD pointing to it.
	print STDOUT "? refs/heads/master\n";
	print STDOUT "\@refs/heads/master HEAD\n";
	print STDOUT "\n";
}

sub mw_option {
	print STDERR "remote-helper command 'option $_[0]' not yet implemented\n";
	print STDOUT "unsupported\n";
}

sub fetch_mw_revisions_for_page {
	my $page = shift;
	my $id = shift;
	my $fetch_from = shift;
	my @page_revs = ();
	my $query = {
		action => 'query',
		prop => 'revisions',
		rvprop => 'ids',
		rvdir => 'newer',
		rvstartid => $fetch_from,
		rvlimit => 500,
		pageids => $id,
	};

	my $revnum = 0;
	# Get 500 revisions at a time due to the mediawiki api limit
	while (1) {
		my $result = $mediawiki->api($query);

		# Parse each of those 500 revisions
		foreach my $revision (@{$result->{query}->{pages}->{$id}->{revisions}}) {
			my $page_rev_ids;
			$page_rev_ids->{pageid} = $page->{pageid};
			$page_rev_ids->{revid} = $revision->{revid};
			push(@page_revs, $page_rev_ids);
			$revnum++;
		}
		last unless $result->{'query-continue'};
		$query->{rvstartid} = $result->{'query-continue'}->{revisions}->{rvstartid};
	}
	if ($shallow_import && @page_revs) {
		print STDERR "  Found 1 revision (shallow import).\n";
		@page_revs = sort {$b->{revid} <=> $a->{revid}} (@page_revs);
		return $page_revs[0];
	}
	print STDERR "  Found ", $revnum, " revision(s).\n";
	return @page_revs;
}

sub fetch_mw_revisions {
	my $pages = shift; my @pages = @{$pages};
	my $fetch_from = shift;

	my @revisions = ();
	my $n = 1;
	foreach my $page (@pages) {
		my $id = $page->{pageid};

		print STDERR "page $n/", scalar(@pages), ": ". $page->{title} ."\n";
		$n++;
		my @page_revs = fetch_mw_revisions_for_page($page, $id, $fetch_from);
		@revisions = (@page_revs, @revisions);
	}

	return ($n, @revisions);
}

sub import_file_revision {
	my $commit = shift;
	my %commit = %{$commit};
	my $full_import = shift;
	my $n = shift;

	my $title = $commit{title};
	my $comment = $commit{comment};
	my $content = $commit{content};
	my $author = $commit{author};
	my $date = $commit{date};

	print STDOUT "commit refs/mediawiki/$remotename/master\n";
	print STDOUT "mark :$n\n";
	print STDOUT "committer $author <$author\@$wiki_name> ", $date->epoch, " +0000\n";
	literal_data($comment);

	# If it's not a clone, we need to know where to start from
	if (!$full_import && $n == 1) {
		print STDOUT "from refs/mediawiki/$remotename/master^0\n";
	}
	if ($content ne DELETED_CONTENT) {
		print STDOUT "M 644 inline $title.mw\n";
		literal_data($content);
		print STDOUT "\n\n";
	} else {
		print STDOUT "D $title.mw\n";
	}

	# mediawiki revision number in the git note
	if ($full_import && $n == 1) {
		print STDOUT "reset refs/notes/$remotename/mediawiki\n";
	}
	print STDOUT "commit refs/notes/$remotename/mediawiki\n";
	print STDOUT "committer $author <$author\@$wiki_name> ", $date->epoch, " +0000\n";
	literal_data("Note added by git-mediawiki during import");
	if (!$full_import && $n == 1) {
		print STDOUT "from refs/notes/$remotename/mediawiki^0\n";
	}
	print STDOUT "N inline :$n\n";
	literal_data("mediawiki_revision: " . $commit{mw_revision});
	print STDOUT "\n\n";
}

# parse a sequence of
# <cmd> <arg1>
# <cmd> <arg2>
# \n
# (like batch sequence of import and sequence of push statements)
sub get_more_refs {
	my $cmd = shift;
	my @refs;
	while (1) {
		my $line = <STDIN>;
		if ($line =~ m/^$cmd (.*)$/) {
			push(@refs, $1);
		} elsif ($line eq "\n") {
			return @refs;
		} else {
			die("Invalid command in a '$cmd' batch: ". $_);
		}
	}
}

sub mw_import {
	# multiple import commands can follow each other.
	my @refs = (shift, get_more_refs("import"));
	foreach my $ref (@refs) {
		mw_import_ref($ref);
	}
	print STDOUT "done\n";
}

sub mw_import_ref {
	my $ref = shift;
	# The remote helper will call "import HEAD" and
	# "import refs/heads/master".
	# Since HEAD is a symbolic ref to master (by convention,
	# followed by the output of the command "list" that we gave),
	# we don't need to do anything in this case.
	if ($ref eq "HEAD") {
		return;
	}

	mw_connect_maybe();

	my @pages = get_mw_pages();

	print STDERR "Searching revisions...\n";
	my $last_local = get_last_local_revision();
	my $fetch_from = $last_local + 1;
	if ($fetch_from == 1) {
		print STDERR ", fetching from beginning.\n";
	} else {
		print STDERR ", fetching from here.\n";
	}
	my ($n, @revisions) = fetch_mw_revisions(\@pages, $fetch_from);

	# Creation of the fast-import stream
	print STDERR "Fetching & writing export data...\n";

	$n = 0;
	my $last_timestamp = 0; # Placeholer in case $rev->timestamp is undefined

	foreach my $pagerevid (sort {$a->{revid} <=> $b->{revid}} @revisions) {
		# fetch the content of the pages
		my $query = {
			action => 'query',
			prop => 'revisions',
			rvprop => 'content|timestamp|comment|user|ids',
			revids => $pagerevid->{revid},
		};

		my $result = $mediawiki->api($query);

		my $rev = pop(@{$result->{query}->{pages}->{$pagerevid->{pageid}}->{revisions}});

		$n++;

		my %commit;
		$commit{author} = $rev->{user} || 'Anonymous';
		$commit{comment} = $rev->{comment} || '*Empty MediaWiki Message*';
		$commit{title} = mediawiki_smudge_filename(
			$result->{query}->{pages}->{$pagerevid->{pageid}}->{title}
		    );
		$commit{mw_revision} = $pagerevid->{revid};
		$commit{content} = mediawiki_smudge($rev->{'*'});

		if (!defined($rev->{timestamp})) {
			$last_timestamp++;
		} else {
			$last_timestamp = $rev->{timestamp};
		}
		$commit{date} = DateTime::Format::ISO8601->parse_datetime($last_timestamp);

		print STDERR "$n/", scalar(@revisions), ": Revision #$pagerevid->{revid} of $commit{title}\n";

		import_file_revision(\%commit, ($fetch_from == 1), $n);
	}

	if ($fetch_from == 1 && $n == 0) {
		print STDERR "You appear to have cloned an empty MediaWiki.\n";
		# Something has to be done remote-helper side. If nothing is done, an error is
		# thrown saying that HEAD is refering to unknown object 0000000000000000000
		# and the clone fails.
	}
}

sub error_non_fast_forward {
	my $advice = run_git("config --bool advice.pushNonFastForward");
	chomp($advice);
	if ($advice ne "false") {
		# Native git-push would show this after the summary.
		# We can't ask it to display it cleanly, so print it
		# ourselves before.
		print STDERR "To prevent you from losing history, non-fast-forward updates were rejected\n";
		print STDERR "Merge the remote changes (e.g. 'git pull') before pushing again. See the\n";
		print STDERR "'Note about fast-forwards' section of 'git push --help' for details.\n";
	}
	print STDOUT "error $_[0] \"non-fast-forward\"\n";
	return 0;
}

sub mw_push_file {
	my $diff_info = shift;
	# $diff_info contains a string in this format:
	# 100644 100644 <sha1_of_blob_before_commit> <sha1_of_blob_now> <status>
	my @diff_info_split = split(/[ \t]/, $diff_info);

	# Filename, including .mw extension
	my $complete_file_name = shift;
	# Commit message
	my $summary = shift;
	# MediaWiki revision number. Keep the previous one by default,
	# in case there's no edit to perform.
	my $newrevid = shift;

	my $new_sha1 = $diff_info_split[3];
	my $old_sha1 = $diff_info_split[2];
	my $page_created = ($old_sha1 eq NULL_SHA1);
	my $page_deleted = ($new_sha1 eq NULL_SHA1);
	$complete_file_name = mediawiki_clean_filename($complete_file_name);

	if (substr($complete_file_name,-3) eq ".mw") {
		my $title = substr($complete_file_name,0,-3);

		my $file_content;
		if ($page_deleted) {
			# Deleting a page usually requires
			# special priviledges. A common
			# convention is to replace the page
			# with this content instead:
			$file_content = DELETED_CONTENT;
		} else {
			$file_content = run_git("cat-file blob $new_sha1");
		}

		mw_connect_maybe();

		my $result = $mediawiki->edit( {
			action => 'edit',
			summary => $summary,
			title => $title,
			basetimestamp => $basetimestamps{$newrevid},
			text => mediawiki_clean($file_content, $page_created),
				  }, {
					  skip_encoding => 1 # Helps with names with accentuated characters
				  });
		if (!$result) {
			if ($mediawiki->{error}->{code} == 3) {
				# edit conflicts, considered as non-fast-forward
				print STDERR 'Warning: Error ' .
				    $mediawiki->{error}->{code} .
				    ' from mediwiki: ' . $mediawiki->{error}->{details} .
				    ".\n";
				return ($newrevid, "non-fast-forward");
			} else {
				# Other errors. Shouldn't happen => just die()
				die 'Fatal: Error ' .
				    $mediawiki->{error}->{code} .
				    ' from mediwiki: ' . $mediawiki->{error}->{details};
			}
		}
		$newrevid = $result->{edit}->{newrevid};
		print STDERR "Pushed file: $new_sha1 - $title\n";
	} else {
		print STDERR "$complete_file_name not a mediawiki file (Not pushable on this version of git-remote-mediawiki).\n"
	}
	return ($newrevid, "ok");
}

sub mw_push {
	# multiple push statements can follow each other
	my @refsspecs = (shift, get_more_refs("push"));
	my $pushed;
	for my $refspec (@refsspecs) {
		my ($force, $local, $remote) = $refspec =~ /^(\+)?([^:]*):([^:]*)$/
		    or die("Invalid refspec for push. Expected <src>:<dst> or +<src>:<dst>");
		if ($force) {
			print STDERR "Warning: forced push not allowed on a MediaWiki.\n";
		}
		if ($local eq "") {
			print STDERR "Cannot delete remote branch on a MediaWiki\n";
			print STDOUT "error $remote cannot delete\n";
			next;
		}
		if ($remote ne "refs/heads/master") {
			print STDERR "Only push to the branch 'master' is supported on a MediaWiki\n";
			print STDOUT "error $remote only master allowed\n";
			next;
		}
		if (mw_push_revision($local, $remote)) {
			$pushed = 1;
		}
	}

	# Notify Git that the push is done
	print STDOUT "\n";

	if ($pushed && $dumb_push) {
		print STDERR "Just pushed some revisions to MediaWiki.\n";
		print STDERR "The pushed revisions now have to be re-imported, and your current branch\n";
		print STDERR "needs to be updated with these re-imported commits. You can do this with\n";
		print STDERR "\n";
		print STDERR "  git pull --rebase\n";
		print STDERR "\n";
	}
}

sub mw_push_revision {
	my $local = shift;
	my $remote = shift; # actually, this has to be "refs/heads/master" at this point.
	my $last_local_revid = get_last_local_revision();
	print STDERR ".\n"; # Finish sentence started by get_last_local_revision()
	my $last_remote_revid = get_last_remote_revision();
	my $mw_revision = $last_remote_revid;

	# Get sha1 of commit pointed by local HEAD
	my $HEAD_sha1 = run_git("rev-parse $local 2>/dev/null"); chomp($HEAD_sha1);
	# Get sha1 of commit pointed by remotes/$remotename/master
	my $remoteorigin_sha1 = run_git("rev-parse refs/remotes/$remotename/master 2>/dev/null");
	chomp($remoteorigin_sha1);

	if ($last_local_revid > 0 &&
	    $last_local_revid < $last_remote_revid) {
		return error_non_fast_forward($remote);
	}

	if ($HEAD_sha1 eq $remoteorigin_sha1) {
		# nothing to push
		return 0;
	}

	# Get every commit in between HEAD and refs/remotes/origin/master,
	# including HEAD and refs/remotes/origin/master
	my @commit_pairs = ();
	if ($last_local_revid > 0) {
		my $parsed_sha1 = $remoteorigin_sha1;
		# Find a path from last MediaWiki commit to pushed commit
		while ($parsed_sha1 ne $HEAD_sha1) {
			my @commit_info =  grep(/^$parsed_sha1/, split(/\n/, run_git("rev-list --children $local")));
			if (!@commit_info) {
				return error_non_fast_forward($remote);
			}
			my @commit_info_split = split(/ |\n/, $commit_info[0]);
			# $commit_info_split[1] is the sha1 of the commit to export
			# $commit_info_split[0] is the sha1 of its direct child
			push(@commit_pairs, \@commit_info_split);
			$parsed_sha1 = $commit_info_split[1];
		}
	} else {
		# No remote mediawiki revision. Export the whole
		# history (linearized with --first-parent)
		print STDERR "Warning: no common ancestor, pushing complete history\n";
		my $history = run_git("rev-list --first-parent --children $local");
		my @history = split('\n', $history);
		@history = @history[1..$#history];
		foreach my $line (reverse @history) {
			my @commit_info_split = split(/ |\n/, $line);
			push(@commit_pairs, \@commit_info_split);
		}
	}

	foreach my $commit_info_split (@commit_pairs) {
		my $sha1_child = @{$commit_info_split}[0];
		my $sha1_commit = @{$commit_info_split}[1];
		my $diff_infos = run_git("diff-tree -r --raw -z $sha1_child $sha1_commit");
		# TODO: we could detect rename, and encode them with a #redirect on the wiki.
		# TODO: for now, it's just a delete+add
		my @diff_info_list = split(/\0/, $diff_infos);
		# Keep the subject line of the commit message as mediawiki comment for the revision
		my $commit_msg = run_git("log --no-walk --format=\"%s\" $sha1_commit");
		chomp($commit_msg);
		# Push every blob
		while (@diff_info_list) {
			my $status;
			# git diff-tree -z gives an output like
			# <metadata>\0<filename1>\0
			# <metadata>\0<filename2>\0
			# and we've split on \0.
			my $info = shift(@diff_info_list);
			my $file = shift(@diff_info_list);
			($mw_revision, $status) = mw_push_file($info, $file, $commit_msg, $mw_revision);
			if ($status eq "non-fast-forward") {
				# we may already have sent part of the
				# commit to MediaWiki, but it's too
				# late to cancel it. Stop the push in
				# the middle, but still give an
				# accurate error message.
				return error_non_fast_forward($remote);
			}
			if ($status ne "ok") {
				die("Unknown error from mw_push_file()");
			}
		}
		unless ($dumb_push) {
			run_git("notes --ref=$remotename/mediawiki add -m \"mediawiki_revision: $mw_revision\" $sha1_commit");
			run_git("update-ref -m \"Git-MediaWiki push\" refs/mediawiki/$remotename/master $sha1_commit $sha1_child");
		}
	}

	print STDOUT "ok $remote\n";
	return 1;
}