4 * Copyright (c) 2006 Junio C Hamano
11 #include "tree-walk.h"
16 #ifndef NO_EXTERNAL_GREP
18 #define NO_EXTERNAL_GREP 0
20 #define NO_EXTERNAL_GREP 1
24 static int builtin_grep;
26 static int grep_config(const char *var, const char *value, void *cb)
28 struct grep_opt *opt = cb;
30 if (!strcmp(var, "color.grep")) {
31 opt->color = git_config_colorbool(var, value, -1);
34 if (!strcmp(var, "color.grep.external"))
35 return git_config_string(&(opt->color_external), var, value);
36 if (!strcmp(var, "color.grep.match")) {
38 return config_error_nonbool(var);
39 color_parse(value, var, opt->color_match);
42 return git_color_default_config(var, value, cb);
46 * git grep pathspecs are somewhat different from diff-tree pathspecs;
47 * pathname wildcards are allowed.
49 static int pathspec_matches(const char **paths, const char *name)
52 if (!paths || !*paths)
54 namelen = strlen(name);
55 for (i = 0; paths[i]; i++) {
56 const char *match = paths[i];
57 int matchlen = strlen(match);
58 const char *cp, *meta;
61 ((matchlen <= namelen) &&
62 !strncmp(name, match, matchlen) &&
63 (match[matchlen-1] == '/' ||
64 name[matchlen] == '\0' || name[matchlen] == '/')))
66 if (!fnmatch(match, name, 0))
68 if (name[namelen-1] != '/')
71 /* We are being asked if the directory ("name") is worth
74 * Find the longest leading directory name that does
75 * not have metacharacter in the pathspec; the name
76 * we are looking at must overlap with that directory.
78 for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
80 if (ch == '*' || ch == '[' || ch == '?') {
86 meta = cp; /* fully literal */
88 if (namelen <= meta - match) {
89 /* Looking at "Documentation/" and
90 * the pattern says "Documentation/howto/", or
91 * "Documentation/diff*.txt". The name we
92 * have should match prefix.
94 if (!memcmp(match, name, namelen))
99 if (meta - match < namelen) {
100 /* Looking at "Documentation/howto/" and
101 * the pattern says "Documentation/h*";
102 * match up to "Do.../h"; this avoids descending
103 * into "Documentation/technical/".
105 if (!memcmp(match, name, meta - match))
113 static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
117 enum object_type type;
119 struct strbuf pathbuf = STRBUF_INIT;
121 data = read_sha1_file(sha1, &type, &size);
123 error("'%s': unable to read %s", name, sha1_to_hex(sha1));
126 if (opt->relative && opt->prefix_length) {
127 quote_path_relative(name + tree_name_len, -1, &pathbuf, opt->prefix);
128 strbuf_insert(&pathbuf, 0, name, tree_name_len);
131 hit = grep_buffer(opt, name, data, size);
132 strbuf_release(&pathbuf);
137 static int grep_file(struct grep_opt *opt, const char *filename)
143 struct strbuf buf = STRBUF_INIT;
145 if (lstat(filename, &st) < 0) {
148 error("'%s': %s", filename, strerror(errno));
152 return 0; /* empty file -- no grep hit */
153 if (!S_ISREG(st.st_mode))
155 sz = xsize_t(st.st_size);
156 i = open(filename, O_RDONLY);
159 data = xmalloc(sz + 1);
160 if (st.st_size != read_in_full(i, data, sz)) {
161 error("'%s': short read %s", filename, strerror(errno));
167 if (opt->relative && opt->prefix_length)
168 filename = quote_path_relative(filename, -1, &buf, opt->prefix);
169 i = grep_buffer(opt, filename, data, sz);
170 strbuf_release(&buf);
175 #if !NO_EXTERNAL_GREP
176 static int exec_grep(int argc, const char **argv)
186 execvp("grep", (char **) argv);
189 while (waitpid(pid, &status, 0) < 0) {
194 if (WIFEXITED(status)) {
195 if (!WEXITSTATUS(status))
204 #define push_arg(a) do { \
205 if (nr < MAXARGS) argv[nr++] = (a); \
206 else die("maximum number of args exceeded"); \
210 * If you send a singleton filename to grep, it does not give
211 * the name of the file. GNU grep has "-H" but we would want
212 * that behaviour in a portable way.
214 * So we keep two pathnames in argv buffer unsent to grep in
215 * the main loop if we need to do more than one grep.
217 static int flush_grep(struct grep_opt *opt,
218 int argc, int arg0, const char **argv, int *kept)
221 int count = argc - arg0;
222 const char *kept_0 = NULL;
226 * Because we keep at least 2 paths in the call from
227 * the main loop (i.e. kept != NULL), and MAXARGS is
228 * far greater than 2, this usually is a call to
229 * conclude the grep. However, the user could attempt
230 * to overflow the argv buffer by giving too many
231 * options to leave very small number of real
232 * arguments even for the call in the main loop.
235 die("insanely many options to grep");
238 * If we have two or more paths, we do not have to do
239 * anything special, but we need to push /dev/null to
240 * get "-H" behaviour of GNU grep portably but when we
241 * are not doing "-l" nor "-L" nor "-c".
245 !opt->unmatch_name_only &&
247 argv[argc++] = "/dev/null";
254 * Called because we found many paths and haven't finished
255 * iterating over the cache yet. We keep two paths
256 * for the concluding call. argv[argc-2] and argv[argc-1]
257 * has the last two paths, so save the first one away,
258 * replace it with NULL while sending the list to grep,
259 * and recover them after we are done.
262 kept_0 = argv[argc-2];
267 status = exec_grep(argc, argv);
271 * Then recover them. Now the last arg is beyond the
272 * terminating NULL which is at argc, and the second
273 * from the last is what we saved away in kept_0
275 argv[arg0++] = kept_0;
276 argv[arg0] = argv[argc+1];
281 static void grep_add_color(struct strbuf *sb, const char *escape_seq)
283 size_t orig_len = sb->len;
285 while (*escape_seq) {
286 if (*escape_seq == 'm')
287 strbuf_addch(sb, ';');
288 else if (*escape_seq != '\033' && *escape_seq != '[')
289 strbuf_addch(sb, *escape_seq);
292 if (sb->len > orig_len && sb->buf[sb->len - 1] == ';')
293 strbuf_setlen(sb, sb->len - 1);
296 static int external_grep(struct grep_opt *opt, const char **paths, int cached)
298 int i, nr, argc, hit, len, status;
299 const char *argv[MAXARGS+1];
300 char randarg[ARGBUF];
301 char *argptr = randarg;
304 if (opt->extended || (opt->relative && opt->prefix_length))
314 if (opt->regflags & REG_EXTENDED)
316 if (opt->regflags & REG_ICASE)
318 if (opt->binary == GREP_BINARY_NOMATCH)
320 if (opt->word_regexp)
324 if (opt->unmatch_name_only)
326 if (opt->null_following_name)
327 /* in GNU grep git's "-z" translates to "-Z" */
331 if (opt->post_context || opt->pre_context) {
332 if (opt->post_context != opt->pre_context) {
333 if (opt->pre_context) {
335 len += snprintf(argptr, sizeof(randarg)-len,
336 "%u", opt->pre_context) + 1;
337 if (sizeof(randarg) <= len)
338 die("maximum length of args exceeded");
342 if (opt->post_context) {
344 len += snprintf(argptr, sizeof(randarg)-len,
345 "%u", opt->post_context) + 1;
346 if (sizeof(randarg) <= len)
347 die("maximum length of args exceeded");
354 len += snprintf(argptr, sizeof(randarg)-len,
355 "%u", opt->post_context) + 1;
356 if (sizeof(randarg) <= len)
357 die("maximum length of args exceeded");
362 for (p = opt->pattern_list; p; p = p->next) {
364 push_arg(p->pattern);
367 struct strbuf sb = STRBUF_INIT;
369 grep_add_color(&sb, opt->color_match);
370 setenv("GREP_COLOR", sb.buf, 1);
373 strbuf_addstr(&sb, "mt=");
374 grep_add_color(&sb, opt->color_match);
375 strbuf_addstr(&sb, ":sl=:cx=:fn=:ln=:bn=:se=");
376 setenv("GREP_COLORS", sb.buf, 1);
380 if (opt->color_external && strlen(opt->color_external) > 0)
381 push_arg(opt->color_external);
386 for (i = 0; i < active_nr; i++) {
387 struct cache_entry *ce = active_cache[i];
390 if (!S_ISREG(ce->ce_mode))
392 if (!pathspec_matches(paths, ce->name))
395 if (name[0] == '-') {
396 int len = ce_namelen(ce);
397 name = xmalloc(len + 3);
398 memcpy(name, "./", 2);
399 memcpy(name + 2, ce->name, len + 1);
402 if (MAXARGS <= argc) {
403 status = flush_grep(opt, argc, nr, argv, &kept);
411 } while (i < active_nr &&
412 !strcmp(ce->name, active_cache[i]->name));
413 i--; /* compensate for loop control */
417 status = flush_grep(opt, argc, nr, argv, NULL);
425 static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
431 #if !NO_EXTERNAL_GREP
433 * Use the external "grep" command for the case where
434 * we grep through the checked-out files. It tends to
435 * be a lot more optimized
437 if (!cached && !builtin_grep) {
438 hit = external_grep(opt, paths, cached);
445 for (nr = 0; nr < active_nr; nr++) {
446 struct cache_entry *ce = active_cache[nr];
447 if (!S_ISREG(ce->ce_mode))
449 if (!pathspec_matches(paths, ce->name))
452 * If CE_VALID is on, we assume worktree file and its cache entry
453 * are identical, even if worktree file has been modified, so use
454 * cache version instead
456 if (cached || (ce->ce_flags & CE_VALID)) {
459 hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
462 hit |= grep_file(opt, ce->name);
466 } while (nr < active_nr &&
467 !strcmp(ce->name, active_cache[nr]->name));
468 nr--; /* compensate for loop control */
471 free_grep_patterns(opt);
475 static int grep_tree(struct grep_opt *opt, const char **paths,
476 struct tree_desc *tree,
477 const char *tree_name, const char *base)
481 struct name_entry entry;
483 int tn_len = strlen(tree_name);
484 struct strbuf pathbuf;
486 strbuf_init(&pathbuf, PATH_MAX + tn_len);
489 strbuf_add(&pathbuf, tree_name, tn_len);
490 strbuf_addch(&pathbuf, ':');
491 tn_len = pathbuf.len;
493 strbuf_addstr(&pathbuf, base);
496 while (tree_entry(tree, &entry)) {
497 int te_len = tree_entry_len(entry.path, entry.sha1);
499 strbuf_add(&pathbuf, entry.path, te_len);
501 if (S_ISDIR(entry.mode))
502 /* Match "abc/" against pathspec to
503 * decide if we want to descend into "abc"
506 strbuf_addch(&pathbuf, '/');
508 down = pathbuf.buf + tn_len;
509 if (!pathspec_matches(paths, down))
511 else if (S_ISREG(entry.mode))
512 hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
513 else if (S_ISDIR(entry.mode)) {
514 enum object_type type;
515 struct tree_desc sub;
519 data = read_sha1_file(entry.sha1, &type, &size);
521 die("unable to read tree (%s)",
522 sha1_to_hex(entry.sha1));
523 init_tree_desc(&sub, data, size);
524 hit |= grep_tree(opt, paths, &sub, tree_name, down);
528 strbuf_release(&pathbuf);
532 static int grep_object(struct grep_opt *opt, const char **paths,
533 struct object *obj, const char *name)
535 if (obj->type == OBJ_BLOB)
536 return grep_sha1(opt, obj->sha1, name, 0);
537 if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
538 struct tree_desc tree;
542 data = read_object_with_reference(obj->sha1, tree_type,
545 die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
546 init_tree_desc(&tree, data, size);
547 hit = grep_tree(opt, paths, &tree, name, "");
551 die("unable to grep from object of type %s", typename(obj->type));
554 static const char builtin_grep_usage[] =
555 "git grep <option>* [-e] <pattern> <rev>* [[--] <path>...]";
557 static const char emsg_invalid_context_len[] =
558 "%s: invalid context length argument";
559 static const char emsg_missing_context_len[] =
560 "missing context length argument";
561 static const char emsg_missing_argument[] =
562 "option requires an argument -%s";
564 int cmd_grep(int argc, const char **argv, const char *prefix)
568 int seen_dashdash = 0;
570 struct object_array list = { 0, 0, NULL };
571 const char **paths = NULL;
574 memset(&opt, 0, sizeof(opt));
576 opt.prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
579 opt.pattern_tail = &opt.pattern_list;
580 opt.regflags = REG_NEWLINE;
582 strcpy(opt.color_match, GIT_COLOR_RED GIT_COLOR_BOLD);
584 git_config(grep_config, &opt);
586 opt.color = git_use_color_default;
589 * If there is no -- then the paths must exist in the working
590 * tree. If there is no explicit pattern specified with -e or
591 * -f, we take the first unrecognized non option to be the
592 * pattern, but then what follows it must be zero or more
593 * valid refs up to the -- (if exists), and then existing
594 * paths. If there is an explicit pattern, then the first
595 * unrecognized non option is the beginning of the refs list
596 * that continues up to the -- (if exists), and then paths.
600 const char *arg = argv[1];
602 if (!strcmp("--cached", arg)) {
606 if (!strcmp("--no-ext-grep", arg)) {
610 if (!strcmp("-a", arg) ||
611 !strcmp("--text", arg)) {
612 opt.binary = GREP_BINARY_TEXT;
615 if (!strcmp("-i", arg) ||
616 !strcmp("--ignore-case", arg)) {
617 opt.regflags |= REG_ICASE;
620 if (!strcmp("-I", arg)) {
621 opt.binary = GREP_BINARY_NOMATCH;
624 if (!strcmp("-v", arg) ||
625 !strcmp("--invert-match", arg)) {
629 if (!strcmp("-E", arg) ||
630 !strcmp("--extended-regexp", arg)) {
631 opt.regflags |= REG_EXTENDED;
634 if (!strcmp("-F", arg) ||
635 !strcmp("--fixed-strings", arg)) {
639 if (!strcmp("-G", arg) ||
640 !strcmp("--basic-regexp", arg)) {
641 opt.regflags &= ~REG_EXTENDED;
644 if (!strcmp("-n", arg)) {
648 if (!strcmp("-h", arg)) {
652 if (!strcmp("-H", arg)) {
656 if (!strcmp("-l", arg) ||
657 !strcmp("--name-only", arg) ||
658 !strcmp("--files-with-matches", arg)) {
662 if (!strcmp("-L", arg) ||
663 !strcmp("--files-without-match", arg)) {
664 opt.unmatch_name_only = 1;
667 if (!strcmp("-z", arg) ||
668 !strcmp("--null", arg)) {
669 opt.null_following_name = 1;
672 if (!strcmp("-c", arg) ||
673 !strcmp("--count", arg)) {
677 if (!strcmp("-w", arg) ||
678 !strcmp("--word-regexp", arg)) {
682 if (!prefixcmp(arg, "-A") ||
683 !prefixcmp(arg, "-B") ||
684 !prefixcmp(arg, "-C") ||
685 (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) {
689 case 'A': case 'B': case 'C':
692 die(emsg_missing_context_len);
703 if (strtoul_ui(scan, 10, &num))
704 die(emsg_invalid_context_len, scan);
707 opt.post_context = num;
711 opt.post_context = num;
713 opt.pre_context = num;
718 if (!strcmp("-f", arg)) {
723 die(emsg_missing_argument, arg);
724 patterns = fopen(argv[1], "r");
726 die("'%s': %s", argv[1], strerror(errno));
727 while (fgets(buf, sizeof(buf), patterns)) {
728 int len = strlen(buf);
729 if (len && buf[len-1] == '\n')
731 /* ignore empty line like grep does */
734 append_grep_pattern(&opt, xstrdup(buf),
743 if (!strcmp("--not", arg)) {
744 append_grep_pattern(&opt, arg, "command line", 0,
748 if (!strcmp("--and", arg)) {
749 append_grep_pattern(&opt, arg, "command line", 0,
753 if (!strcmp("--or", arg))
754 continue; /* no-op */
755 if (!strcmp("(", arg)) {
756 append_grep_pattern(&opt, arg, "command line", 0,
760 if (!strcmp(")", arg)) {
761 append_grep_pattern(&opt, arg, "command line", 0,
765 if (!strcmp("--all-match", arg)) {
769 if (!strcmp("-e", arg)) {
771 append_grep_pattern(&opt, argv[1],
778 die(emsg_missing_argument, arg);
780 if (!strcmp("--full-name", arg)) {
784 if (!strcmp("--color", arg)) {
788 if (!strcmp("--no-color", arg)) {
792 if (!strcmp("--", arg)) {
793 /* later processing wants to have this at argv[1] */
799 usage(builtin_grep_usage);
801 /* First unrecognized non-option token */
802 if (!opt.pattern_list) {
803 append_grep_pattern(&opt, arg, "command line", 0,
808 /* We are looking at the first path or rev;
809 * it is found at argv[1] after leaving the
817 if (opt.color && !opt.color_external)
819 if (!opt.pattern_list)
820 die("no pattern given.");
821 if ((opt.regflags != REG_NEWLINE) && opt.fixed)
822 die("cannot mix --fixed-strings and regexp");
823 compile_grep_patterns(&opt);
825 /* Check revs and then paths */
826 for (i = 1; i < argc; i++) {
827 const char *arg = argv[i];
828 unsigned char sha1[20];
830 if (!get_sha1(arg, sha1)) {
831 struct object *object = parse_object(sha1);
833 die("bad object %s", arg);
834 add_object_array(object, arg, &list);
837 if (!strcmp(arg, "--")) {
844 /* The rest are paths */
845 if (!seen_dashdash) {
847 for (j = i; j < argc; j++)
848 verify_filename(prefix, argv[j]);
852 paths = get_pathspec(prefix, argv + i);
854 paths = xcalloc(2, sizeof(const char *));
862 return !grep_cache(&opt, paths, cached);
866 die("both --cached and trees are given.");
868 for (i = 0; i < list.nr; i++) {
869 struct object *real_obj;
870 real_obj = deref_tag(list.objects[i].item, NULL, 0);
871 if (grep_object(&opt, paths, real_obj, list.objects[i].name))
874 free_grep_patterns(&opt);