aboutsummaryrefslogtreecommitdiff
path: root/builtin-grep.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2007-09-14 00:31:00 -0700
committerJunio C Hamano <gitster@pobox.com>2007-09-14 15:16:43 -0700
commitd99ebf081797dbb43ff618ff59f4c607b0acf045 (patch)
tree58a480bcd263f0c49606916b5a3286ae38bc1c74 /builtin-grep.c
parent7c8b5eaf225db0b6b502126fc16f8f6814c38d24 (diff)
downloadgit-d99ebf081797dbb43ff618ff59f4c607b0acf045.tar.gz
git-d99ebf081797dbb43ff618ff59f4c607b0acf045.tar.xz
Split grep arguments in a way that does not requires to add /dev/null.
In order to (almost) always show the name of the file without relying on "-H" option of GNU grep, we used to add /dev/null to the argument list unless we are doing -l or -L. This caused "/dev/null:0" to show up when -c is given in the output. It is not enough to add -c to the set of options we do not pass /dev/null for. When we have too many files, we invoke grep multiple times and we need to avoid giving a widow filename to the last invocation -- otherwise we will not see the name. This keeps two filenames when the argv[] buffer is about to overflow and we have not finished iterating over the index, so that the last round will always have at least two paths to work with (and not require /dev/null). An obvious and the only exception is when there is only 1 file that is given to the underlying grep, and in that case we avoid passing /dev/null and let the external "grep -c" report only the number of matches. Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin-grep.c')
-rw-r--r--builtin-grep.c90
1 files changed, 76 insertions, 14 deletions
diff --git a/builtin-grep.c b/builtin-grep.c
index e13cb31f2..c7b45c4d5 100644
--- a/builtin-grep.c
+++ b/builtin-grep.c
@@ -187,6 +187,78 @@ static int exec_grep(int argc, const char **argv)
else die("maximum number of args exceeded"); \
} while (0)
+/*
+ * If you send a singleton filename to grep, it does not give
+ * the name of the file. GNU grep has "-H" but we would want
+ * that behaviour in a portable way.
+ *
+ * So we keep two pathnames in argv buffer unsent to grep in
+ * the main loop if we need to do more than one grep.
+ */
+static int flush_grep(struct grep_opt *opt,
+ int argc, int arg0, const char **argv, int *kept)
+{
+ int status;
+ int count = argc - arg0;
+ const char *kept_0 = NULL;
+
+ if (count <= 2) {
+ /*
+ * Because we keep at least 2 paths in the call from
+ * the main loop (i.e. kept != NULL), and MAXARGS is
+ * far greater than 2, this usually is a call to
+ * conclude the grep. However, the user could attempt
+ * to overflow the argv buffer by giving too many
+ * options to leave very small number of real
+ * arguments even for the call in the main loop.
+ */
+ if (kept)
+ die("insanely many options to grep");
+
+ /*
+ * If we have two or more paths, we do not have to do
+ * anything special, but we need to push /dev/null to
+ * get "-H" behaviour of GNU grep portably but when we
+ * are not doing "-l" nor "-L" nor "-c".
+ */
+ if (count == 1 &&
+ !opt->name_only &&
+ !opt->unmatch_name_only &&
+ !opt->count) {
+ argv[argc++] = "/dev/null";
+ argv[argc] = NULL;
+ }
+ }
+
+ else if (kept) {
+ /*
+ * Called because we found many paths and haven't finished
+ * iterating over the cache yet. We keep two paths
+ * for the concluding call. argv[argc-2] and argv[argc-1]
+ * has the last two paths, so save the first one away,
+ * replace it with NULL while sending the list to grep,
+ * and recover them after we are done.
+ */
+ *kept = 2;
+ kept_0 = argv[argc-2];
+ argv[argc-2] = NULL;
+ argc -= 2;
+ }
+
+ status = exec_grep(argc, argv);
+
+ if (kept_0) {
+ /*
+ * Then recover them. Now the last arg is beyond the
+ * terminating NULL which is at argc, and the second
+ * from the last is what we saved away in kept_0
+ */
+ argv[arg0++] = kept_0;
+ argv[arg0] = argv[argc+1];
+ }
+ return status;
+}
+
static int external_grep(struct grep_opt *opt, const char **paths, int cached)
{
int i, nr, argc, hit, len, status;
@@ -253,22 +325,12 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached)
push_arg(p->pattern);
}
- /*
- * To make sure we get the header printed out when we want it,
- * add /dev/null to the paths to grep. This is unnecessary
- * (and wrong) with "-l" or "-L", which always print out the
- * name anyway.
- *
- * GNU grep has "-H", but this is portable.
- */
- if (!opt->name_only && !opt->unmatch_name_only)
- push_arg("/dev/null");
-
hit = 0;
argc = nr;
for (i = 0; i < active_nr; i++) {
struct cache_entry *ce = active_cache[i];
char *name;
+ int kept;
if (!S_ISREG(ntohl(ce->ce_mode)))
continue;
if (!pathspec_matches(paths, ce->name))
@@ -283,10 +345,10 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached)
argv[argc++] = name;
if (argc < MAXARGS && !ce_stage(ce))
continue;
- status = exec_grep(argc, argv);
+ status = flush_grep(opt, argc, nr, argv, &kept);
if (0 < status)
hit = 1;
- argc = nr;
+ argc = nr + kept;
if (ce_stage(ce)) {
do {
i++;
@@ -296,7 +358,7 @@ static int external_grep(struct grep_opt *opt, const char **paths, int cached)
}
}
if (argc > nr) {
- status = exec_grep(argc, argv);
+ status = flush_grep(opt, argc, nr, argv, NULL);
if (0 < status)
hit = 1;
}