locking/atomic: Correct (cmp)xchg() instrumentation
[linux-block.git] / scripts / list-gitignored.c
CommitLineData
5c3d1d0a
MY
1// SPDX-License-Identifier: GPL-2.0-only
2//
3// Traverse the source tree, parsing all .gitignore files, and print file paths
4// that are ignored by git.
5// The output is suitable to the --exclude-from option of tar.
6// This is useful until the --exclude-vcs-ignores option gets working correctly.
7//
8// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
9// (a lot of code imported from GIT)
10
11#include <assert.h>
12#include <dirent.h>
13#include <errno.h>
14#include <fcntl.h>
15#include <getopt.h>
16#include <stdarg.h>
17#include <stdbool.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <sys/stat.h>
22#include <sys/types.h>
23#include <unistd.h>
24
25// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
26//
27//---------------------------(IMPORT FROM GIT BEGIN)---------------------------
28
29// Copied from environment.c
30
31static bool ignore_case;
32
33// Copied from git-compat-util.h
34
35/* Sane ctype - no locale, and works with signed chars */
36#undef isascii
37#undef isspace
38#undef isdigit
39#undef isalpha
40#undef isalnum
41#undef isprint
42#undef islower
43#undef isupper
44#undef tolower
45#undef toupper
46#undef iscntrl
47#undef ispunct
48#undef isxdigit
49
50static const unsigned char sane_ctype[256];
51#define GIT_SPACE 0x01
52#define GIT_DIGIT 0x02
53#define GIT_ALPHA 0x04
54#define GIT_GLOB_SPECIAL 0x08
55#define GIT_REGEX_SPECIAL 0x10
56#define GIT_PATHSPEC_MAGIC 0x20
57#define GIT_CNTRL 0x40
58#define GIT_PUNCT 0x80
59#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
60#define isascii(x) (((x) & ~0x7f) == 0)
61#define isspace(x) sane_istest(x,GIT_SPACE)
62#define isdigit(x) sane_istest(x,GIT_DIGIT)
63#define isalpha(x) sane_istest(x,GIT_ALPHA)
64#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
65#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
66#define islower(x) sane_iscase(x, 1)
67#define isupper(x) sane_iscase(x, 0)
68#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
69#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
70#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
71 GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
72#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
73#define tolower(x) sane_case((unsigned char)(x), 0x20)
74#define toupper(x) sane_case((unsigned char)(x), 0)
75
76static inline int sane_case(int x, int high)
77{
78 if (sane_istest(x, GIT_ALPHA))
79 x = (x & ~0x20) | high;
80 return x;
81}
82
83static inline int sane_iscase(int x, int is_lower)
84{
85 if (!sane_istest(x, GIT_ALPHA))
86 return 0;
87
88 if (is_lower)
89 return (x & 0x20) != 0;
90 else
91 return (x & 0x20) == 0;
92}
93
94// Copied from ctype.c
95
96enum {
97 S = GIT_SPACE,
98 A = GIT_ALPHA,
99 D = GIT_DIGIT,
100 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
101 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
102 P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
103 X = GIT_CNTRL,
104 U = GIT_PUNCT,
105 Z = GIT_CNTRL | GIT_SPACE
106};
107
108static const unsigned char sane_ctype[256] = {
109 X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
110 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
111 S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
112 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
113 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
114 A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
115 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
116 A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
117 /* Nothing in the 128.. range */
118};
119
120// Copied from hex.c
121
122static const signed char hexval_table[256] = {
123 -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
124 -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
125 -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
126 -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
127 -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
128 -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
129 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */
130 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */
131 -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
132 -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
133 -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
134 -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
135 -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
136 -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
137 -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
138 -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
139 -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
140 -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
141 -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
142 -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
143 -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
144 -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
145 -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
146 -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
147 -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
148 -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
149 -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
150 -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
151 -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
152 -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
153 -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
154 -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
155};
156
157// Copied from wildmatch.h
158
159#define WM_CASEFOLD 1
160#define WM_PATHNAME 2
161
162#define WM_NOMATCH 1
163#define WM_MATCH 0
164#define WM_ABORT_ALL -1
165#define WM_ABORT_TO_STARSTAR -2
166
167// Copied from wildmatch.c
168
169typedef unsigned char uchar;
170
171// local modification: remove NEGATE_CLASS(2)
172
173#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
174 && *(class) == *(litmatch) \
175 && strncmp((char*)class, litmatch, len) == 0)
176
177// local modification: simpilify macros
178#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
179#define ISGRAPH(c) (isprint(c) && !isspace(c))
180#define ISPRINT(c) isprint(c)
181#define ISDIGIT(c) isdigit(c)
182#define ISALNUM(c) isalnum(c)
183#define ISALPHA(c) isalpha(c)
184#define ISCNTRL(c) iscntrl(c)
185#define ISLOWER(c) islower(c)
186#define ISPUNCT(c) ispunct(c)
187#define ISSPACE(c) isspace(c)
188#define ISUPPER(c) isupper(c)
189#define ISXDIGIT(c) isxdigit(c)
190
191/* Match pattern "p" against "text" */
192static int dowild(const uchar *p, const uchar *text, unsigned int flags)
193{
194 uchar p_ch;
195 const uchar *pattern = p;
196
197 for ( ; (p_ch = *p) != '\0'; text++, p++) {
198 int matched, match_slash, negated;
199 uchar t_ch, prev_ch;
200 if ((t_ch = *text) == '\0' && p_ch != '*')
201 return WM_ABORT_ALL;
202 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
203 t_ch = tolower(t_ch);
204 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
205 p_ch = tolower(p_ch);
206 switch (p_ch) {
207 case '\\':
208 /* Literal match with following character. Note that the test
209 * in "default" handles the p[1] == '\0' failure case. */
210 p_ch = *++p;
211 /* FALLTHROUGH */
212 default:
213 if (t_ch != p_ch)
214 return WM_NOMATCH;
215 continue;
216 case '?':
217 /* Match anything but '/'. */
218 if ((flags & WM_PATHNAME) && t_ch == '/')
219 return WM_NOMATCH;
220 continue;
221 case '*':
222 if (*++p == '*') {
223 const uchar *prev_p = p - 2;
224 while (*++p == '*') {}
225 if (!(flags & WM_PATHNAME))
226 /* without WM_PATHNAME, '*' == '**' */
227 match_slash = 1;
228 else if ((prev_p < pattern || *prev_p == '/') &&
229 (*p == '\0' || *p == '/' ||
230 (p[0] == '\\' && p[1] == '/'))) {
231 /*
232 * Assuming we already match 'foo/' and are at
233 * <star star slash>, just assume it matches
234 * nothing and go ahead match the rest of the
235 * pattern with the remaining string. This
236 * helps make foo/<*><*>/bar (<> because
237 * otherwise it breaks C comment syntax) match
238 * both foo/bar and foo/a/bar.
239 */
240 if (p[0] == '/' &&
241 dowild(p + 1, text, flags) == WM_MATCH)
242 return WM_MATCH;
243 match_slash = 1;
244 } else /* WM_PATHNAME is set */
245 match_slash = 0;
246 } else
247 /* without WM_PATHNAME, '*' == '**' */
248 match_slash = flags & WM_PATHNAME ? 0 : 1;
249 if (*p == '\0') {
250 /* Trailing "**" matches everything. Trailing "*" matches
251 * only if there are no more slash characters. */
252 if (!match_slash) {
253 if (strchr((char *)text, '/'))
254 return WM_NOMATCH;
255 }
256 return WM_MATCH;
257 } else if (!match_slash && *p == '/') {
258 /*
259 * _one_ asterisk followed by a slash
260 * with WM_PATHNAME matches the next
261 * directory
262 */
263 const char *slash = strchr((char*)text, '/');
264 if (!slash)
265 return WM_NOMATCH;
266 text = (const uchar*)slash;
267 /* the slash is consumed by the top-level for loop */
268 break;
269 }
270 while (1) {
271 if (t_ch == '\0')
272 break;
273 /*
274 * Try to advance faster when an asterisk is
275 * followed by a literal. We know in this case
276 * that the string before the literal
277 * must belong to "*".
278 * If match_slash is false, do not look past
279 * the first slash as it cannot belong to '*'.
280 */
281 if (!is_glob_special(*p)) {
282 p_ch = *p;
283 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
284 p_ch = tolower(p_ch);
285 while ((t_ch = *text) != '\0' &&
286 (match_slash || t_ch != '/')) {
287 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
288 t_ch = tolower(t_ch);
289 if (t_ch == p_ch)
290 break;
291 text++;
292 }
293 if (t_ch != p_ch)
294 return WM_NOMATCH;
295 }
296 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
297 if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
298 return matched;
299 } else if (!match_slash && t_ch == '/')
300 return WM_ABORT_TO_STARSTAR;
301 t_ch = *++text;
302 }
303 return WM_ABORT_ALL;
304 case '[':
305 p_ch = *++p;
306 if (p_ch == '^')
307 p_ch = '!';
308 /* Assign literal 1/0 because of "matched" comparison. */
309 negated = p_ch == '!' ? 1 : 0;
310 if (negated) {
311 /* Inverted character class. */
312 p_ch = *++p;
313 }
314 prev_ch = 0;
315 matched = 0;
316 do {
317 if (!p_ch)
318 return WM_ABORT_ALL;
319 if (p_ch == '\\') {
320 p_ch = *++p;
321 if (!p_ch)
322 return WM_ABORT_ALL;
323 if (t_ch == p_ch)
324 matched = 1;
325 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
326 p_ch = *++p;
327 if (p_ch == '\\') {
328 p_ch = *++p;
329 if (!p_ch)
330 return WM_ABORT_ALL;
331 }
332 if (t_ch <= p_ch && t_ch >= prev_ch)
333 matched = 1;
334 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
335 uchar t_ch_upper = toupper(t_ch);
336 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
337 matched = 1;
338 }
339 p_ch = 0; /* This makes "prev_ch" get set to 0. */
340 } else if (p_ch == '[' && p[1] == ':') {
341 const uchar *s;
342 int i;
343 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
344 if (!p_ch)
345 return WM_ABORT_ALL;
346 i = p - s - 1;
347 if (i < 0 || p[-1] != ':') {
348 /* Didn't find ":]", so treat like a normal set. */
349 p = s - 2;
350 p_ch = '[';
351 if (t_ch == p_ch)
352 matched = 1;
353 continue;
354 }
355 if (CC_EQ(s,i, "alnum")) {
356 if (ISALNUM(t_ch))
357 matched = 1;
358 } else if (CC_EQ(s,i, "alpha")) {
359 if (ISALPHA(t_ch))
360 matched = 1;
361 } else if (CC_EQ(s,i, "blank")) {
362 if (ISBLANK(t_ch))
363 matched = 1;
364 } else if (CC_EQ(s,i, "cntrl")) {
365 if (ISCNTRL(t_ch))
366 matched = 1;
367 } else if (CC_EQ(s,i, "digit")) {
368 if (ISDIGIT(t_ch))
369 matched = 1;
370 } else if (CC_EQ(s,i, "graph")) {
371 if (ISGRAPH(t_ch))
372 matched = 1;
373 } else if (CC_EQ(s,i, "lower")) {
374 if (ISLOWER(t_ch))
375 matched = 1;
376 } else if (CC_EQ(s,i, "print")) {
377 if (ISPRINT(t_ch))
378 matched = 1;
379 } else if (CC_EQ(s,i, "punct")) {
380 if (ISPUNCT(t_ch))
381 matched = 1;
382 } else if (CC_EQ(s,i, "space")) {
383 if (ISSPACE(t_ch))
384 matched = 1;
385 } else if (CC_EQ(s,i, "upper")) {
386 if (ISUPPER(t_ch))
387 matched = 1;
388 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
389 matched = 1;
390 } else if (CC_EQ(s,i, "xdigit")) {
391 if (ISXDIGIT(t_ch))
392 matched = 1;
393 } else /* malformed [:class:] string */
394 return WM_ABORT_ALL;
395 p_ch = 0; /* This makes "prev_ch" get set to 0. */
396 } else if (t_ch == p_ch)
397 matched = 1;
398 } while (prev_ch = p_ch, (p_ch = *++p) != ']');
399 if (matched == negated ||
400 ((flags & WM_PATHNAME) && t_ch == '/'))
401 return WM_NOMATCH;
402 continue;
403 }
404 }
405
406 return *text ? WM_NOMATCH : WM_MATCH;
407}
408
409/* Match the "pattern" against the "text" string. */
410static int wildmatch(const char *pattern, const char *text, unsigned int flags)
411{
412 // local modification: move WM_CASEFOLD here
413 if (ignore_case)
414 flags |= WM_CASEFOLD;
415
416 return dowild((const uchar*)pattern, (const uchar*)text, flags);
417}
418
419// Copied from dir.h
420
421#define PATTERN_FLAG_NODIR 1
422#define PATTERN_FLAG_ENDSWITH 4
423#define PATTERN_FLAG_MUSTBEDIR 8
424#define PATTERN_FLAG_NEGATIVE 16
425
426// Copied from dir.c
427
428static int fspathncmp(const char *a, const char *b, size_t count)
429{
430 return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
431}
432
433static int simple_length(const char *match)
434{
435 int len = -1;
436
437 for (;;) {
438 unsigned char c = *match++;
439 len++;
440 if (c == '\0' || is_glob_special(c))
441 return len;
442 }
443}
444
445static int no_wildcard(const char *string)
446{
447 return string[simple_length(string)] == '\0';
448}
449
450static void parse_path_pattern(const char **pattern,
451 int *patternlen,
452 unsigned *flags,
453 int *nowildcardlen)
454{
455 const char *p = *pattern;
456 size_t i, len;
457
458 *flags = 0;
459 if (*p == '!') {
460 *flags |= PATTERN_FLAG_NEGATIVE;
461 p++;
462 }
463 len = strlen(p);
464 if (len && p[len - 1] == '/') {
465 len--;
466 *flags |= PATTERN_FLAG_MUSTBEDIR;
467 }
468 for (i = 0; i < len; i++) {
469 if (p[i] == '/')
470 break;
471 }
472 if (i == len)
473 *flags |= PATTERN_FLAG_NODIR;
474 *nowildcardlen = simple_length(p);
475 /*
476 * we should have excluded the trailing slash from 'p' too,
477 * but that's one more allocation. Instead just make sure
478 * nowildcardlen does not exceed real patternlen
479 */
480 if (*nowildcardlen > len)
481 *nowildcardlen = len;
482 if (*p == '*' && no_wildcard(p + 1))
483 *flags |= PATTERN_FLAG_ENDSWITH;
484 *pattern = p;
485 *patternlen = len;
486}
487
488static void trim_trailing_spaces(char *buf)
489{
490 char *p, *last_space = NULL;
491
492 for (p = buf; *p; p++)
493 switch (*p) {
494 case ' ':
495 if (!last_space)
496 last_space = p;
497 break;
498 case '\\':
499 p++;
500 if (!*p)
501 return;
502 /* fallthrough */
503 default:
504 last_space = NULL;
505 }
506
507 if (last_space)
508 *last_space = '\0';
509}
510
511static int match_basename(const char *basename, int basenamelen,
512 const char *pattern, int prefix, int patternlen,
513 unsigned flags)
514{
515 if (prefix == patternlen) {
516 if (patternlen == basenamelen &&
517 !fspathncmp(pattern, basename, basenamelen))
518 return 1;
519 } else if (flags & PATTERN_FLAG_ENDSWITH) {
520 /* "*literal" matching against "fooliteral" */
521 if (patternlen - 1 <= basenamelen &&
522 !fspathncmp(pattern + 1,
523 basename + basenamelen - (patternlen - 1),
524 patternlen - 1))
525 return 1;
526 } else {
527 // local modification: call wildmatch() directly
528 if (!wildmatch(pattern, basename, flags))
529 return 1;
530 }
531 return 0;
532}
533
534static int match_pathname(const char *pathname, int pathlen,
535 const char *base, int baselen,
536 const char *pattern, int prefix, int patternlen)
537{
538 // local modification: remove local variables
539
540 /*
541 * match with FNM_PATHNAME; the pattern has base implicitly
542 * in front of it.
543 */
544 if (*pattern == '/') {
545 pattern++;
546 patternlen--;
547 prefix--;
548 }
549
550 /*
551 * baselen does not count the trailing slash. base[] may or
552 * may not end with a trailing slash though.
553 */
554 if (pathlen < baselen + 1 ||
555 (baselen && pathname[baselen] != '/') ||
556 fspathncmp(pathname, base, baselen))
557 return 0;
558
559 // local modification: simplified because always baselen > 0
560 pathname += baselen + 1;
561 pathlen -= baselen + 1;
562
563 if (prefix) {
564 /*
565 * if the non-wildcard part is longer than the
566 * remaining pathname, surely it cannot match.
567 */
568 if (prefix > pathlen)
569 return 0;
570
571 if (fspathncmp(pattern, pathname, prefix))
572 return 0;
573 pattern += prefix;
574 patternlen -= prefix;
575 pathname += prefix;
576 pathlen -= prefix;
577
578 /*
579 * If the whole pattern did not have a wildcard,
580 * then our prefix match is all we need; we
581 * do not need to call fnmatch at all.
582 */
583 if (!patternlen && !pathlen)
584 return 1;
585 }
586
587 // local modification: call wildmatch() directly
588 return !wildmatch(pattern, pathname, WM_PATHNAME);
589}
590
591// Copied from git/utf8.c
592
593static const char utf8_bom[] = "\357\273\277";
594
595//----------------------------(IMPORT FROM GIT END)----------------------------
596
597struct pattern {
598 unsigned int flags;
599 int nowildcardlen;
600 int patternlen;
601 int dirlen;
602 char pattern[];
603};
604
605static struct pattern **pattern_list;
606static int nr_patterns, alloced_patterns;
607
608// Remember the number of patterns at each directory level
609static int *nr_patterns_at;
610// Track the current/max directory level;
611static int depth, max_depth;
612static bool debug_on;
613static FILE *out_fp, *stat_fp;
614static char *prefix = "";
615static char *progname;
616
617static void __attribute__((noreturn)) perror_exit(const char *s)
618{
619 perror(s);
620
621 exit(EXIT_FAILURE);
622}
623
624static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
625{
626 va_list args;
627
628 fprintf(stderr, "%s: error: ", progname);
629
630 va_start(args, fmt);
631 vfprintf(stderr, fmt, args);
632 va_end(args);
633
634 exit(EXIT_FAILURE);
635}
636
637static void debug(const char *fmt, ...)
638{
639 va_list args;
640 int i;
641
642 if (!debug_on)
643 return;
644
645 fprintf(stderr, "[DEBUG] ");
646
647 for (i = 0; i < depth * 2; i++)
648 fputc(' ', stderr);
649
650 va_start(args, fmt);
651 vfprintf(stderr, fmt, args);
652 va_end(args);
653}
654
655static void *xrealloc(void *ptr, size_t size)
656{
657 ptr = realloc(ptr, size);
658 if (!ptr)
659 perror_exit(progname);
660
661 return ptr;
662}
663
664static void *xmalloc(size_t size)
665{
666 return xrealloc(NULL, size);
667}
668
669// similar to last_matching_pattern_from_list() in GIT
670static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
671{
672 int i;
673
674 // Search in the reverse order because the last matching pattern wins.
675 for (i = nr_patterns - 1; i >= 0; i--) {
676 struct pattern *p = pattern_list[i];
677 unsigned int flags = p->flags;
678 const char *gitignore_dir = p->pattern + p->patternlen + 1;
679 bool ignored;
680
681 if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
682 continue;
683
684 if (flags & PATTERN_FLAG_NODIR) {
685 if (!match_basename(path + dirlen + 1,
686 pathlen - dirlen - 1,
687 p->pattern,
688 p->nowildcardlen,
689 p->patternlen,
690 p->flags))
691 continue;
692 } else {
693 if (!match_pathname(path, pathlen,
694 gitignore_dir, p->dirlen,
695 p->pattern,
696 p->nowildcardlen,
697 p->patternlen))
698 continue;
699 }
700
701 debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
702 flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
703 flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
704 gitignore_dir);
705
706 ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
707 if (ignored)
708 debug("Ignore: %s\n", path);
709
710 return ignored;
711 }
712
713 debug("%s: no match\n", path);
714
715 return false;
716}
717
718static void add_pattern(const char *string, const char *dir, int dirlen)
719{
720 struct pattern *p;
721 int patternlen, nowildcardlen;
722 unsigned int flags;
723
724 parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
725
726 if (patternlen == 0)
727 return;
728
729 p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);
730
731 memcpy(p->pattern, string, patternlen);
732 p->pattern[patternlen] = 0;
733 memcpy(p->pattern + patternlen + 1, dir, dirlen);
734 p->pattern[patternlen + 1 + dirlen] = 0;
735
736 p->patternlen = patternlen;
737 p->nowildcardlen = nowildcardlen;
738 p->dirlen = dirlen;
739 p->flags = flags;
740
741 debug("Add pattern: %s%s%s\n",
742 flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
743 flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");
744
745 if (nr_patterns >= alloced_patterns) {
746 alloced_patterns += 128;
747 pattern_list = xrealloc(pattern_list,
748 sizeof(*pattern_list) * alloced_patterns);
749 }
750
751 pattern_list[nr_patterns++] = p;
752}
753
754// similar to add_patterns_from_buffer() in GIT
755static void add_patterns_from_gitignore(const char *dir, int dirlen)
756{
757 struct stat st;
758 char path[PATH_MAX], *buf, *entry;
759 size_t size;
760 int fd, pathlen, i;
761
762 pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
763 if (pathlen >= sizeof(path))
764 error_exit("%s: too long path was truncated\n", path);
765
766 fd = open(path, O_RDONLY | O_NOFOLLOW);
767 if (fd < 0) {
768 if (errno != ENOENT)
769 return perror_exit(path);
770 return;
771 }
772
773 if (fstat(fd, &st) < 0)
774 perror_exit(path);
775
776 size = st.st_size;
777
778 buf = xmalloc(size + 1);
779 if (read(fd, buf, st.st_size) != st.st_size)
780 perror_exit(path);
781
782 buf[st.st_size] = '\n';
783 if (close(fd))
784 perror_exit(path);
785
786 debug("Parse %s\n", path);
787
788 entry = buf;
789
790 // skip utf8 bom
791 if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
792 entry += strlen(utf8_bom);
793
794 for (i = entry - buf; i < size; i++) {
795 if (buf[i] == '\n') {
796 if (entry != buf + i && entry[0] != '#') {
797 buf[i - (i && buf[i-1] == '\r')] = 0;
798 trim_trailing_spaces(entry);
799 add_pattern(entry, dir, dirlen);
800 }
801 entry = buf + i + 1;
802 }
803 }
804
805 free(buf);
806}
807
808// Save the current number of patterns and increment the depth
809static void increment_depth(void)
810{
811 if (depth >= max_depth) {
812 max_depth += 1;
813 nr_patterns_at = xrealloc(nr_patterns_at,
814 sizeof(*nr_patterns_at) * max_depth);
815 }
816
817 nr_patterns_at[depth] = nr_patterns;
818 depth++;
819}
820
821// Decrement the depth, and free up the patterns of this directory level.
822static void decrement_depth(void)
823{
824 depth--;
825 assert(depth >= 0);
826
827 while (nr_patterns > nr_patterns_at[depth])
828 free(pattern_list[--nr_patterns]);
829}
830
831static void print_path(const char *path)
832{
833 // The path always starts with "./"
834 assert(strlen(path) >= 2);
835
836 // Replace the root directory with a preferred prefix.
837 // This is useful for the tar command.
838 fprintf(out_fp, "%s%s\n", prefix, path + 2);
839}
840
841static void print_stat(const char *path, struct stat *st)
842{
843 if (!stat_fp)
844 return;
845
846 if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
847 return;
848
849 assert(strlen(path) >= 2);
850
851 fprintf(stat_fp, "%c %9ld %10ld %s\n",
852 S_ISLNK(st->st_mode) ? 'l' : '-',
853 st->st_size, st->st_mtim.tv_sec, path + 2);
854}
855
856// Traverse the entire directory tree, parsing .gitignore files.
857// Print file paths that are not tracked by git.
858//
859// Return true if all files under the directory are ignored, false otherwise.
860static bool traverse_directory(const char *dir, int dirlen)
861{
862 bool all_ignored = true;
863 DIR *dirp;
864
865 debug("Enter[%d]: %s\n", depth, dir);
866 increment_depth();
867
868 add_patterns_from_gitignore(dir, dirlen);
869
870 dirp = opendir(dir);
871 if (!dirp)
872 perror_exit(dir);
873
874 while (1) {
875 struct dirent *d;
876 struct stat st;
877 char path[PATH_MAX];
878 int pathlen;
879 bool ignored;
880
881 errno = 0;
882 d = readdir(dirp);
883 if (!d) {
884 if (errno)
885 perror_exit(dir);
886 break;
887 }
888
889 if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
890 continue;
891
892 pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
893 if (pathlen >= sizeof(path))
894 error_exit("%s: too long path was truncated\n", path);
895
896 if (lstat(path, &st) < 0)
897 perror_exit(path);
898
899 if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
900 is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
901 ignored = true;
902 } else {
903 if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
904 // If all the files in a directory are ignored,
905 // let's ignore that directory as well. This
906 // will avoid empty directories in the tarball.
907 ignored = traverse_directory(path, pathlen);
908 else
909 ignored = false;
910 }
911
912 if (ignored) {
913 print_path(path);
914 } else {
915 print_stat(path, &st);
916 all_ignored = false;
917 }
918 }
919
920 if (closedir(dirp))
921 perror_exit(dir);
922
923 decrement_depth();
924 debug("Leave[%d]: %s\n", depth, dir);
925
926 return all_ignored;
927}
928
929static void usage(void)
930{
931 fprintf(stderr,
932 "usage: %s [options]\n"
933 "\n"
934 "Show files that are ignored by git\n"
935 "\n"
936 "options:\n"
937 " -d, --debug print debug messages to stderr\n"
938 " -e, --exclude PATTERN add the given exclude pattern\n"
939 " -h, --help show this help message and exit\n"
940 " -i, --ignore-case Ignore case differences between the patterns and the files\n"
941 " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n"
942 " -p, --prefix PREFIX prefix added to each path (default: empty string)\n"
943 " -r, --rootdir DIR root of the source tree (default: current working directory)\n"
944 " -s, --stat FILE output the file stat of non-ignored files to a file\n",
945 progname);
946}
947
948static void open_output(const char *pathname, FILE **fp)
949{
950 if (strcmp(pathname, "-")) {
951 *fp = fopen(pathname, "w");
952 if (!*fp)
953 perror_exit(pathname);
954 } else {
955 *fp = stdout;
956 }
957}
958
959static void close_output(const char *pathname, FILE *fp)
960{
961 fflush(fp);
962
963 if (ferror(fp))
964 error_exit("not all data was written to the output\n");
965
966 if (fclose(fp))
967 perror_exit(pathname);
968}
969
970int main(int argc, char *argv[])
971{
972 const char *output = "-";
973 const char *rootdir = ".";
974 const char *stat = NULL;
975
976 progname = strrchr(argv[0], '/');
977 if (progname)
978 progname++;
979 else
980 progname = argv[0];
981
982 while (1) {
983 static struct option long_options[] = {
984 {"debug", no_argument, NULL, 'd'},
985 {"help", no_argument, NULL, 'h'},
986 {"ignore-case", no_argument, NULL, 'i'},
987 {"output", required_argument, NULL, 'o'},
988 {"prefix", required_argument, NULL, 'p'},
989 {"rootdir", required_argument, NULL, 'r'},
990 {"stat", required_argument, NULL, 's'},
991 {"exclude", required_argument, NULL, 'x'},
992 {},
993 };
994
995 int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);
996
997 if (c == -1)
998 break;
999
1000 switch (c) {
1001 case 'd':
1002 debug_on = true;
1003 break;
1004 case 'h':
1005 usage();
1006 exit(0);
1007 case 'i':
1008 ignore_case = true;
1009 break;
1010 case 'o':
1011 output = optarg;
1012 break;
1013 case 'p':
1014 prefix = optarg;
1015 break;
1016 case 'r':
1017 rootdir = optarg;
1018 break;
1019 case 's':
1020 stat = optarg;
1021 break;
1022 case 'x':
1023 add_pattern(optarg, ".", strlen("."));
1024 break;
1025 case '?':
1026 usage();
1027 /* fallthrough */
1028 default:
1029 exit(EXIT_FAILURE);
1030 }
1031 }
1032
1033 open_output(output, &out_fp);
1034 if (stat && stat[0])
1035 open_output(stat, &stat_fp);
1036
1037 if (chdir(rootdir))
1038 perror_exit(rootdir);
1039
1040 add_pattern(".git/", ".", strlen("."));
1041
1042 if (traverse_directory(".", strlen(".")))
1043 print_path("./");
1044
1045 assert(depth == 0);
1046
1047 while (nr_patterns > 0)
1048 free(pattern_list[--nr_patterns]);
1049 free(pattern_list);
1050 free(nr_patterns_at);
1051
1052 close_output(output, out_fp);
1053 if (stat_fp)
1054 close_output(stat, stat_fp);
1055
1056 return 0;
1057}