aboutsummaryrefslogtreecommitdiff
path: root/git-filter-branch.sh
blob: b5fa44920d40d3144567509a6922a07bb87977ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
#!/bin/sh
#
# Rewrite revision history
# Copyright (c) Petr Baudis, 2006
# Minimal changes to "port" it to core-git (c) Johannes Schindelin, 2007
#
# Lets you rewrite the revision history of the current branch, creating
# a new branch. You can specify a number of filters to modify the commits,
# files and trees.

USAGE="git-filter-branch [-d TEMPDIR] [FILTERS] DESTBRANCH [REV-RANGE]"
. git-sh-setup

warn () {
        echo "$*" >&2
}

map()
{
	# if it was not rewritten, take the original
	if test -r "$workdir/../map/$1"
	then
		cat "$workdir/../map/$1"
	else
		echo "$1"
	fi
}

# override die(): this version puts in an extra line break, so that
# the progress is still visible

die()
{
	echo >&2
	echo "$*" >&2
	exit 1
}

# When piped a commit, output a script to set the ident of either
# "author" or "committer

set_ident () {
	lid="$(echo "$1" | tr "A-Z" "a-z")"
	uid="$(echo "$1" | tr "a-z" "A-Z")"
	pick_id_script='
		/^'$lid' /{
			s/'\''/'\''\\'\'\''/g
			h
			s/^'$lid' \([^<]*\) <[^>]*> .*$/\1/
			s/'\''/'\''\'\'\''/g
			s/.*/export GIT_'$uid'_NAME='\''&'\''/p

			g
			s/^'$lid' [^<]* <\([^>]*\)> .*$/\1/
			s/'\''/'\''\'\'\''/g
			s/.*/export GIT_'$uid'_EMAIL='\''&'\''/p

			g
			s/^'$lid' [^<]* <[^>]*> \(.*\)$/\1/
			s/'\''/'\''\'\'\''/g
			s/.*/export GIT_'$uid'_DATE='\''&'\''/p

			q
		}
	'

	LANG=C LC_ALL=C sed -ne "$pick_id_script"
	# Ensure non-empty id name.
	echo "[ -n \"\$GIT_${uid}_NAME\" ] || export GIT_${uid}_NAME=\"\${GIT_${uid}_EMAIL%%@*}\""
}

tempdir=.git-rewrite
filter_env=
filter_tree=
filter_index=
filter_parent=
filter_msg=cat
filter_commit='git commit-tree "$@"'
filter_tag_name=
filter_subdir=
orig_namespace=refs/original/
force=
while case "$#" in 0) usage;; esac
do
	case "$1" in
	--)
		shift
		break
		;;
	--force|-f)
		shift
		force=t
		continue
		;;
	-*)
		;;
	*)
		break;
	esac

	# all switches take one argument
	ARG="$1"
	case "$#" in 1) usage ;; esac
	shift
	OPTARG="$1"
	shift

	case "$ARG" in
	-d)
		tempdir="$OPTARG"
		;;
	--env-filter)
		filter_env="$OPTARG"
		;;
	--tree-filter)
		filter_tree="$OPTARG"
		;;
	--index-filter)
		filter_index="$OPTARG"
		;;
	--parent-filter)
		filter_parent="$OPTARG"
		;;
	--msg-filter)
		filter_msg="$OPTARG"
		;;
	--commit-filter)
		filter_commit="$OPTARG"
		;;
	--tag-name-filter)
		filter_tag_name="$OPTARG"
		;;
	--subdirectory-filter)
		filter_subdir="$OPTARG"
		;;
	--original)
		orig_namespace="$OPTARG"
		;;
	*)
		usage
		;;
	esac
done

case "$force" in
t)
	rm -rf "$tempdir"
;;
'')
	test -d "$tempdir" &&
		die "$tempdir already exists, please remove it"
esac
mkdir -p "$tempdir/t" &&
tempdir="$(cd "$tempdir"; pwd)" &&
cd "$tempdir/t" &&
workdir="$(pwd)" ||
die ""

# Make sure refs/original is empty
git for-each-ref > "$tempdir"/backup-refs
while read sha1 type name
do
	case "$force,$name" in
	,$orig_namespace*)
		die "Namespace $orig_namespace not empty"
	;;
	t,$orig_namespace*)
		git update-ref -d "$name" $sha1
	;;
	esac
done < "$tempdir"/backup-refs

case "$GIT_DIR" in
/*)
	;;
*)
	GIT_DIR="$(pwd)/../../$GIT_DIR"
	;;
esac
export GIT_DIR GIT_WORK_TREE=.

# These refs should be updated if their heads were rewritten

git rev-parse --revs-only --symbolic "$@" |
while read ref
do
	# normalize ref
	case "$ref" in
	HEAD)
		ref="$(git symbolic-ref "$ref")"
	;;
	refs/*)
	;;
	*)
		ref="$(git for-each-ref --format='%(refname)' |
			grep /"$ref")"
	esac

	git check-ref-format "$ref" && echo "$ref"
done > "$tempdir"/heads

test -s "$tempdir"/heads ||
	die "Which ref do you want to rewrite?"

export GIT_INDEX_FILE="$(pwd)/../index"
git read-tree || die "Could not seed the index"

ret=0

# map old->new commit ids for rewriting parents
mkdir ../map || die "Could not create map/ directory"

case "$filter_subdir" in
"")
	git rev-list --reverse --topo-order --default HEAD \
		--parents "$@"
	;;
*)
	git rev-list --reverse --topo-order --default HEAD \
		--parents --full-history "$@" -- "$filter_subdir"
esac > ../revs || die "Could not get the commits"
commits=$(wc -l <../revs | tr -d " ")

test $commits -eq 0 && die "Found nothing to rewrite"

# Rewrite the commits

i=0
while read commit parents; do
	i=$(($i+1))
	printf "\rRewrite $commit ($i/$commits)"

	case "$filter_subdir" in
	"")
		git read-tree -i -m $commit
		;;
	*)
		git read-tree -i -m $commit:"$filter_subdir"
	esac || die "Could not initialize the index"

	export GIT_COMMIT=$commit
	git cat-file commit "$commit" >../commit ||
		die "Cannot read commit $commit"

	eval "$(set_ident AUTHOR <../commit)" ||
		die "setting author failed for commit $commit"
	eval "$(set_ident COMMITTER <../commit)" ||
		die "setting committer failed for commit $commit"
	eval "$filter_env" < /dev/null ||
		die "env filter failed: $filter_env"

	if [ "$filter_tree" ]; then
		git checkout-index -f -u -a ||
			die "Could not checkout the index"
		# files that $commit removed are now still in the working tree;
		# remove them, else they would be added again
		git ls-files -z --others | xargs -0 rm -f
		eval "$filter_tree" < /dev/null ||
			die "tree filter failed: $filter_tree"

		git diff-index -r $commit | cut -f 2- | tr '\n' '\0' | \
			xargs -0 git update-index --add --replace --remove
		git ls-files -z --others | \
			xargs -0 git update-index --add --replace --remove
	fi

	eval "$filter_index" < /dev/null ||
		die "index filter failed: $filter_index"

	parentstr=
	for parent in $parents; do
		for reparent in $(map "$parent"); do
			parentstr="$parentstr -p $reparent"
		done
	done
	if [ "$filter_parent" ]; then
		parentstr="$(echo "$parentstr" | eval "$filter_parent")" ||
				die "parent filter failed: $filter_parent"
	fi

	sed -e '1,/^$/d' <../commit | \
		eval "$filter_msg" > ../message ||
			die "msg filter failed: $filter_msg"
	sh -c "$filter_commit" "git commit-tree" \
		$(git write-tree) $parentstr < ../message > ../map/$commit
done <../revs

# In case of a subdirectory filter, it is possible that a specified head
# is not in the set of rewritten commits, because it was pruned by the
# revision walker.  Fix it by mapping these heads to the next rewritten
# ancestor(s), i.e. the boundaries in the set of rewritten commits.

# NEEDSWORK: we should sort the unmapped refs topologically first
while read ref
do
	sha1=$(git rev-parse "$ref"^0)
	test -f "$workdir"/../map/$sha1 && continue
	# Assign the boundarie(s) in the set of rewritten commits
	# as the replacement commit(s).
	# (This would look a bit nicer if --not --stdin worked.)
	for p in $( (cd "$workdir"/../map; ls | sed "s/^/^/") |
		git rev-list $ref --boundary --stdin |
		sed -n "s/^-//p")
	do
		map $p >> "$workdir"/../map/$sha1
	done
done < "$tempdir"/heads

# Finally update the refs

_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
count=0
echo
while read ref
do
	# avoid rewriting a ref twice
	test -f "$orig_namespace$ref" && continue

	sha1=$(git rev-parse "$ref"^0)
	rewritten=$(map $sha1)

	test $sha1 = "$rewritten" &&
		warn "WARNING: Ref '$ref' is unchanged" &&
		continue

	case "$rewritten" in
	'')
		echo "Ref '$ref' was deleted"
		git update-ref -m "filter-branch: delete" -d "$ref" $sha1 ||
			die "Could not delete $ref"
	;;
	$_x40)
		echo "Ref '$ref' was rewritten"
		git update-ref -m "filter-branch: rewrite" \
				"$ref" $rewritten $sha1 ||
			die "Could not rewrite $ref"
	;;
	*)
		# NEEDSWORK: possibly add -Werror, making this an error
		warn "WARNING: '$ref' was rewritten into multiple commits:"
		warn "$rewritten"
		warn "WARNING: Ref '$ref' points to the first one now."
		rewritten=$(echo "$rewritten" | head -n 1)
		git update-ref -m "filter-branch: rewrite to first" \
				"$ref" $rewritten $sha1 ||
			die "Could not rewrite $ref"
	;;
	esac
	git update-ref -m "filter-branch: backup" "$orig_namespace$ref" $sha1
	count=$(($count+1))
done < "$tempdir"/heads

# TODO: This should possibly go, with the semantics that all positive given
#       refs are updated, and their original heads stored in refs/original/
# Filter tags

if [ "$filter_tag_name" ]; then
	git for-each-ref --format='%(objectname) %(objecttype) %(refname)' refs/tags |
	while read sha1 type ref; do
		ref="${ref#refs/tags/}"
		# XXX: Rewrite tagged trees as well?
		if [ "$type" != "commit" -a "$type" != "tag" ]; then
			continue;
		fi

		if [ "$type" = "tag" ]; then
			# Dereference to a commit
			sha1t="$sha1"
			sha1="$(git rev-parse "$sha1"^{commit} 2>/dev/null)" || continue
		fi

		[ -f "../map/$sha1" ] || continue
		new_sha1="$(cat "../map/$sha1")"
		export GIT_COMMIT="$sha1"
		new_ref="$(echo "$ref" | eval "$filter_tag_name")" ||
			die "tag name filter failed: $filter_tag_name"

		echo "$ref -> $new_ref ($sha1 -> $new_sha1)"

		if [ "$type" = "tag" ]; then
			# Warn that we are not rewriting the tag object itself.
			warn "unreferencing tag object $sha1t"
		fi

		git update-ref "refs/tags/$new_ref" "$new_sha1" ||
			die "Could not write tag $new_ref"
	done
fi

cd ../..
rm -rf "$tempdir"
echo
test $count -gt 0 && echo "These refs were rewritten:"
git show-ref | grep ^"$orig_namespace"

exit $ret