aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2012-03-22 18:52:13 -0400
committerJunio C Hamano <gitster@pobox.com>2012-03-23 13:52:49 -0700
commit90d43b07687fdc51d1f2fc14948df538dc45584b (patch)
treec829369711eb89ebe5b11c164684269f721838e4
parentf8582cad8d11cae15e73fe5213b6180ee4b9fcce (diff)
downloadgit-90d43b07687fdc51d1f2fc14948df538dc45584b.tar.gz
git-90d43b07687fdc51d1f2fc14948df538dc45584b.tar.xz
teach diffcore-rename to optionally ignore empty content
Our rename detection is a heuristic, matching pairs of removed and added files with similar or identical content. It's unlikely to be wrong when there is actual content to compare, and we already take care not to do inexact rename detection when there is not enough content to produce good results. However, we always do exact rename detection, even when the blob is tiny or empty. It's easy to get false positives with an empty blob, simply because it is an obvious content to use as a boilerplate (e.g., when telling git that an empty directory is worth tracking via an empty .gitignore). This patch lets callers specify whether or not they are interested in using empty files as rename sources and destinations. The default is "yes", keeping the original behavior. It works by detecting the empty-blob sha1 for rename sources and destinations. One more flexible alternative would be to allow the caller to specify a minimum size for a blob to be "interesting" for rename detection. But that would catch small boilerplate files, not large ones (e.g., if you had the GPL COPYING file in many directories). A better alternative would be to allow a "-rename" gitattribute to allow boilerplate files to be marked as such. I'll leave the complexity of that solution until such time as somebody actually wants it. The complaints we've seen so far revolve around empty files, so let's start with the simple thing. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--diff.c5
-rw-r--r--diff.h2
-rw-r--r--diffcore-rename.c6
3 files changed, 12 insertions, 1 deletions
diff --git a/diff.c b/diff.c
index 377ec1ea4..0b70aadc4 100644
--- a/diff.c
+++ b/diff.c
@@ -3136,6 +3136,7 @@ void diff_setup(struct diff_options *options)
options->rename_limit = -1;
options->dirstat_permille = diff_dirstat_permille_default;
options->context = 3;
+ DIFF_OPT_SET(options, RENAME_EMPTY);
options->change = diff_change;
options->add_remove = diff_addremove;
@@ -3506,6 +3507,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
}
else if (!strcmp(arg, "--no-renames"))
options->detect_rename = 0;
+ else if (!strcmp(arg, "--rename-empty"))
+ DIFF_OPT_SET(options, RENAME_EMPTY);
+ else if (!strcmp(arg, "--no-rename-empty"))
+ DIFF_OPT_CLR(options, RENAME_EMPTY);
else if (!strcmp(arg, "--relative"))
DIFF_OPT_SET(options, RELATIVE_NAME);
else if (!prefixcmp(arg, "--relative=")) {
diff --git a/diff.h b/diff.h
index cb687436a..dd48eca71 100644
--- a/diff.h
+++ b/diff.h
@@ -60,7 +60,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
#define DIFF_OPT_SILENT_ON_REMOVE (1 << 5)
#define DIFF_OPT_FIND_COPIES_HARDER (1 << 6)
#define DIFF_OPT_FOLLOW_RENAMES (1 << 7)
-/* (1 << 8) unused */
+#define DIFF_OPT_RENAME_EMPTY (1 << 8)
/* (1 << 9) unused */
#define DIFF_OPT_HAS_CHANGES (1 << 10)
#define DIFF_OPT_QUICK (1 << 11)
diff --git a/diffcore-rename.c b/diffcore-rename.c
index f639601c7..216a7a4bb 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -512,9 +512,15 @@ void diffcore_rename(struct diff_options *options)
else if (options->single_follow &&
strcmp(options->single_follow, p->two->path))
continue; /* not interested */
+ else if (!DIFF_OPT_TST(options, RENAME_EMPTY) &&
+ is_empty_blob_sha1(p->two->sha1))
+ continue;
else
locate_rename_dst(p->two, 1);
}
+ else if (!DIFF_OPT_TST(options, RENAME_EMPTY) &&
+ is_empty_blob_sha1(p->one->sha1))
+ continue;
else if (!DIFF_PAIR_UNMERGED(p) && !DIFF_FILE_VALID(p->two)) {
/*
* If the source is a broken "delete", and