Commit | Line | Data |
---|---|---|
5c3d1d0a MY |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | // | |
3 | // Traverse the source tree, parsing all .gitignore files, and print file paths | |
4 | // that are ignored by git. | |
5 | // The output is suitable to the --exclude-from option of tar. | |
6 | // This is useful until the --exclude-vcs-ignores option gets working correctly. | |
7 | // | |
8 | // Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org> | |
9 | // (a lot of code imported from GIT) | |
10 | ||
11 | #include <assert.h> | |
12 | #include <dirent.h> | |
13 | #include <errno.h> | |
14 | #include <fcntl.h> | |
15 | #include <getopt.h> | |
16 | #include <stdarg.h> | |
17 | #include <stdbool.h> | |
18 | #include <stdio.h> | |
19 | #include <stdlib.h> | |
20 | #include <string.h> | |
21 | #include <sys/stat.h> | |
22 | #include <sys/types.h> | |
23 | #include <unistd.h> | |
24 | ||
25 | // Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT | |
26 | // | |
27 | //---------------------------(IMPORT FROM GIT BEGIN)--------------------------- | |
28 | ||
29 | // Copied from environment.c | |
30 | ||
31 | static bool ignore_case; | |
32 | ||
33 | // Copied from git-compat-util.h | |
34 | ||
35 | /* Sane ctype - no locale, and works with signed chars */ | |
36 | #undef isascii | |
37 | #undef isspace | |
38 | #undef isdigit | |
39 | #undef isalpha | |
40 | #undef isalnum | |
41 | #undef isprint | |
42 | #undef islower | |
43 | #undef isupper | |
44 | #undef tolower | |
45 | #undef toupper | |
46 | #undef iscntrl | |
47 | #undef ispunct | |
48 | #undef isxdigit | |
49 | ||
50 | static const unsigned char sane_ctype[256]; | |
51 | #define GIT_SPACE 0x01 | |
52 | #define GIT_DIGIT 0x02 | |
53 | #define GIT_ALPHA 0x04 | |
54 | #define GIT_GLOB_SPECIAL 0x08 | |
55 | #define GIT_REGEX_SPECIAL 0x10 | |
56 | #define GIT_PATHSPEC_MAGIC 0x20 | |
57 | #define GIT_CNTRL 0x40 | |
58 | #define GIT_PUNCT 0x80 | |
59 | #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) | |
60 | #define isascii(x) (((x) & ~0x7f) == 0) | |
61 | #define isspace(x) sane_istest(x,GIT_SPACE) | |
62 | #define isdigit(x) sane_istest(x,GIT_DIGIT) | |
63 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | |
64 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | |
65 | #define isprint(x) ((x) >= 0x20 && (x) <= 0x7e) | |
66 | #define islower(x) sane_iscase(x, 1) | |
67 | #define isupper(x) sane_iscase(x, 0) | |
68 | #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) | |
69 | #define iscntrl(x) (sane_istest(x,GIT_CNTRL)) | |
70 | #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ | |
71 | GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) | |
72 | #define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) | |
73 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | |
74 | #define toupper(x) sane_case((unsigned char)(x), 0) | |
75 | ||
76 | static inline int sane_case(int x, int high) | |
77 | { | |
78 | if (sane_istest(x, GIT_ALPHA)) | |
79 | x = (x & ~0x20) | high; | |
80 | return x; | |
81 | } | |
82 | ||
83 | static inline int sane_iscase(int x, int is_lower) | |
84 | { | |
85 | if (!sane_istest(x, GIT_ALPHA)) | |
86 | return 0; | |
87 | ||
88 | if (is_lower) | |
89 | return (x & 0x20) != 0; | |
90 | else | |
91 | return (x & 0x20) == 0; | |
92 | } | |
93 | ||
94 | // Copied from ctype.c | |
95 | ||
96 | enum { | |
97 | S = GIT_SPACE, | |
98 | A = GIT_ALPHA, | |
99 | D = GIT_DIGIT, | |
100 | G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ | |
101 | R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */ | |
102 | P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */ | |
103 | X = GIT_CNTRL, | |
104 | U = GIT_PUNCT, | |
105 | Z = GIT_CNTRL | GIT_SPACE | |
106 | }; | |
107 | ||
108 | static const unsigned char sane_ctype[256] = { | |
109 | X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */ | |
110 | X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */ | |
111 | S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ | |
112 | D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ | |
113 | P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ | |
114 | A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */ | |
115 | P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ | |
116 | A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */ | |
117 | /* Nothing in the 128.. range */ | |
118 | }; | |
119 | ||
120 | // Copied from hex.c | |
121 | ||
122 | static const signed char hexval_table[256] = { | |
123 | -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */ | |
124 | -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */ | |
125 | -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */ | |
126 | -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */ | |
127 | -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */ | |
128 | -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */ | |
129 | 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */ | |
130 | 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */ | |
131 | -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */ | |
132 | -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */ | |
133 | -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */ | |
134 | -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */ | |
135 | -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */ | |
136 | -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */ | |
137 | -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */ | |
138 | -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */ | |
139 | -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */ | |
140 | -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */ | |
141 | -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */ | |
142 | -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */ | |
143 | -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */ | |
144 | -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */ | |
145 | -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */ | |
146 | -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */ | |
147 | -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */ | |
148 | -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */ | |
149 | -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */ | |
150 | -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */ | |
151 | -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */ | |
152 | -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */ | |
153 | -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */ | |
154 | -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */ | |
155 | }; | |
156 | ||
157 | // Copied from wildmatch.h | |
158 | ||
159 | #define WM_CASEFOLD 1 | |
160 | #define WM_PATHNAME 2 | |
161 | ||
162 | #define WM_NOMATCH 1 | |
163 | #define WM_MATCH 0 | |
164 | #define WM_ABORT_ALL -1 | |
165 | #define WM_ABORT_TO_STARSTAR -2 | |
166 | ||
167 | // Copied from wildmatch.c | |
168 | ||
169 | typedef unsigned char uchar; | |
170 | ||
171 | // local modification: remove NEGATE_CLASS(2) | |
172 | ||
173 | #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ | |
174 | && *(class) == *(litmatch) \ | |
175 | && strncmp((char*)class, litmatch, len) == 0) | |
176 | ||
177 | // local modification: simpilify macros | |
178 | #define ISBLANK(c) ((c) == ' ' || (c) == '\t') | |
179 | #define ISGRAPH(c) (isprint(c) && !isspace(c)) | |
180 | #define ISPRINT(c) isprint(c) | |
181 | #define ISDIGIT(c) isdigit(c) | |
182 | #define ISALNUM(c) isalnum(c) | |
183 | #define ISALPHA(c) isalpha(c) | |
184 | #define ISCNTRL(c) iscntrl(c) | |
185 | #define ISLOWER(c) islower(c) | |
186 | #define ISPUNCT(c) ispunct(c) | |
187 | #define ISSPACE(c) isspace(c) | |
188 | #define ISUPPER(c) isupper(c) | |
189 | #define ISXDIGIT(c) isxdigit(c) | |
190 | ||
191 | /* Match pattern "p" against "text" */ | |
192 | static int dowild(const uchar *p, const uchar *text, unsigned int flags) | |
193 | { | |
194 | uchar p_ch; | |
195 | const uchar *pattern = p; | |
196 | ||
197 | for ( ; (p_ch = *p) != '\0'; text++, p++) { | |
198 | int matched, match_slash, negated; | |
199 | uchar t_ch, prev_ch; | |
200 | if ((t_ch = *text) == '\0' && p_ch != '*') | |
201 | return WM_ABORT_ALL; | |
202 | if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) | |
203 | t_ch = tolower(t_ch); | |
204 | if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) | |
205 | p_ch = tolower(p_ch); | |
206 | switch (p_ch) { | |
207 | case '\\': | |
208 | /* Literal match with following character. Note that the test | |
209 | * in "default" handles the p[1] == '\0' failure case. */ | |
210 | p_ch = *++p; | |
211 | /* FALLTHROUGH */ | |
212 | default: | |
213 | if (t_ch != p_ch) | |
214 | return WM_NOMATCH; | |
215 | continue; | |
216 | case '?': | |
217 | /* Match anything but '/'. */ | |
218 | if ((flags & WM_PATHNAME) && t_ch == '/') | |
219 | return WM_NOMATCH; | |
220 | continue; | |
221 | case '*': | |
222 | if (*++p == '*') { | |
223 | const uchar *prev_p = p - 2; | |
224 | while (*++p == '*') {} | |
225 | if (!(flags & WM_PATHNAME)) | |
226 | /* without WM_PATHNAME, '*' == '**' */ | |
227 | match_slash = 1; | |
228 | else if ((prev_p < pattern || *prev_p == '/') && | |
229 | (*p == '\0' || *p == '/' || | |
230 | (p[0] == '\\' && p[1] == '/'))) { | |
231 | /* | |
232 | * Assuming we already match 'foo/' and are at | |
233 | * <star star slash>, just assume it matches | |
234 | * nothing and go ahead match the rest of the | |
235 | * pattern with the remaining string. This | |
236 | * helps make foo/<*><*>/bar (<> because | |
237 | * otherwise it breaks C comment syntax) match | |
238 | * both foo/bar and foo/a/bar. | |
239 | */ | |
240 | if (p[0] == '/' && | |
241 | dowild(p + 1, text, flags) == WM_MATCH) | |
242 | return WM_MATCH; | |
243 | match_slash = 1; | |
244 | } else /* WM_PATHNAME is set */ | |
245 | match_slash = 0; | |
246 | } else | |
247 | /* without WM_PATHNAME, '*' == '**' */ | |
248 | match_slash = flags & WM_PATHNAME ? 0 : 1; | |
249 | if (*p == '\0') { | |
250 | /* Trailing "**" matches everything. Trailing "*" matches | |
251 | * only if there are no more slash characters. */ | |
252 | if (!match_slash) { | |
253 | if (strchr((char *)text, '/')) | |
254 | return WM_NOMATCH; | |
255 | } | |
256 | return WM_MATCH; | |
257 | } else if (!match_slash && *p == '/') { | |
258 | /* | |
259 | * _one_ asterisk followed by a slash | |
260 | * with WM_PATHNAME matches the next | |
261 | * directory | |
262 | */ | |
263 | const char *slash = strchr((char*)text, '/'); | |
264 | if (!slash) | |
265 | return WM_NOMATCH; | |
266 | text = (const uchar*)slash; | |
267 | /* the slash is consumed by the top-level for loop */ | |
268 | break; | |
269 | } | |
270 | while (1) { | |
271 | if (t_ch == '\0') | |
272 | break; | |
273 | /* | |
274 | * Try to advance faster when an asterisk is | |
275 | * followed by a literal. We know in this case | |
276 | * that the string before the literal | |
277 | * must belong to "*". | |
278 | * If match_slash is false, do not look past | |
279 | * the first slash as it cannot belong to '*'. | |
280 | */ | |
281 | if (!is_glob_special(*p)) { | |
282 | p_ch = *p; | |
283 | if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) | |
284 | p_ch = tolower(p_ch); | |
285 | while ((t_ch = *text) != '\0' && | |
286 | (match_slash || t_ch != '/')) { | |
287 | if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) | |
288 | t_ch = tolower(t_ch); | |
289 | if (t_ch == p_ch) | |
290 | break; | |
291 | text++; | |
292 | } | |
293 | if (t_ch != p_ch) | |
294 | return WM_NOMATCH; | |
295 | } | |
296 | if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { | |
297 | if (!match_slash || matched != WM_ABORT_TO_STARSTAR) | |
298 | return matched; | |
299 | } else if (!match_slash && t_ch == '/') | |
300 | return WM_ABORT_TO_STARSTAR; | |
301 | t_ch = *++text; | |
302 | } | |
303 | return WM_ABORT_ALL; | |
304 | case '[': | |
305 | p_ch = *++p; | |
306 | if (p_ch == '^') | |
307 | p_ch = '!'; | |
308 | /* Assign literal 1/0 because of "matched" comparison. */ | |
309 | negated = p_ch == '!' ? 1 : 0; | |
310 | if (negated) { | |
311 | /* Inverted character class. */ | |
312 | p_ch = *++p; | |
313 | } | |
314 | prev_ch = 0; | |
315 | matched = 0; | |
316 | do { | |
317 | if (!p_ch) | |
318 | return WM_ABORT_ALL; | |
319 | if (p_ch == '\\') { | |
320 | p_ch = *++p; | |
321 | if (!p_ch) | |
322 | return WM_ABORT_ALL; | |
323 | if (t_ch == p_ch) | |
324 | matched = 1; | |
325 | } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { | |
326 | p_ch = *++p; | |
327 | if (p_ch == '\\') { | |
328 | p_ch = *++p; | |
329 | if (!p_ch) | |
330 | return WM_ABORT_ALL; | |
331 | } | |
332 | if (t_ch <= p_ch && t_ch >= prev_ch) | |
333 | matched = 1; | |
334 | else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) { | |
335 | uchar t_ch_upper = toupper(t_ch); | |
336 | if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) | |
337 | matched = 1; | |
338 | } | |
339 | p_ch = 0; /* This makes "prev_ch" get set to 0. */ | |
340 | } else if (p_ch == '[' && p[1] == ':') { | |
341 | const uchar *s; | |
342 | int i; | |
343 | for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ | |
344 | if (!p_ch) | |
345 | return WM_ABORT_ALL; | |
346 | i = p - s - 1; | |
347 | if (i < 0 || p[-1] != ':') { | |
348 | /* Didn't find ":]", so treat like a normal set. */ | |
349 | p = s - 2; | |
350 | p_ch = '['; | |
351 | if (t_ch == p_ch) | |
352 | matched = 1; | |
353 | continue; | |
354 | } | |
355 | if (CC_EQ(s,i, "alnum")) { | |
356 | if (ISALNUM(t_ch)) | |
357 | matched = 1; | |
358 | } else if (CC_EQ(s,i, "alpha")) { | |
359 | if (ISALPHA(t_ch)) | |
360 | matched = 1; | |
361 | } else if (CC_EQ(s,i, "blank")) { | |
362 | if (ISBLANK(t_ch)) | |
363 | matched = 1; | |
364 | } else if (CC_EQ(s,i, "cntrl")) { | |
365 | if (ISCNTRL(t_ch)) | |
366 | matched = 1; | |
367 | } else if (CC_EQ(s,i, "digit")) { | |
368 | if (ISDIGIT(t_ch)) | |
369 | matched = 1; | |
370 | } else if (CC_EQ(s,i, "graph")) { | |
371 | if (ISGRAPH(t_ch)) | |
372 | matched = 1; | |
373 | } else if (CC_EQ(s,i, "lower")) { | |
374 | if (ISLOWER(t_ch)) | |
375 | matched = 1; | |
376 | } else if (CC_EQ(s,i, "print")) { | |
377 | if (ISPRINT(t_ch)) | |
378 | matched = 1; | |
379 | } else if (CC_EQ(s,i, "punct")) { | |
380 | if (ISPUNCT(t_ch)) | |
381 | matched = 1; | |
382 | } else if (CC_EQ(s,i, "space")) { | |
383 | if (ISSPACE(t_ch)) | |
384 | matched = 1; | |
385 | } else if (CC_EQ(s,i, "upper")) { | |
386 | if (ISUPPER(t_ch)) | |
387 | matched = 1; | |
388 | else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) | |
389 | matched = 1; | |
390 | } else if (CC_EQ(s,i, "xdigit")) { | |
391 | if (ISXDIGIT(t_ch)) | |
392 | matched = 1; | |
393 | } else /* malformed [:class:] string */ | |
394 | return WM_ABORT_ALL; | |
395 | p_ch = 0; /* This makes "prev_ch" get set to 0. */ | |
396 | } else if (t_ch == p_ch) | |
397 | matched = 1; | |
398 | } while (prev_ch = p_ch, (p_ch = *++p) != ']'); | |
399 | if (matched == negated || | |
400 | ((flags & WM_PATHNAME) && t_ch == '/')) | |
401 | return WM_NOMATCH; | |
402 | continue; | |
403 | } | |
404 | } | |
405 | ||
406 | return *text ? WM_NOMATCH : WM_MATCH; | |
407 | } | |
408 | ||
409 | /* Match the "pattern" against the "text" string. */ | |
410 | static int wildmatch(const char *pattern, const char *text, unsigned int flags) | |
411 | { | |
412 | // local modification: move WM_CASEFOLD here | |
413 | if (ignore_case) | |
414 | flags |= WM_CASEFOLD; | |
415 | ||
416 | return dowild((const uchar*)pattern, (const uchar*)text, flags); | |
417 | } | |
418 | ||
419 | // Copied from dir.h | |
420 | ||
421 | #define PATTERN_FLAG_NODIR 1 | |
422 | #define PATTERN_FLAG_ENDSWITH 4 | |
423 | #define PATTERN_FLAG_MUSTBEDIR 8 | |
424 | #define PATTERN_FLAG_NEGATIVE 16 | |
425 | ||
426 | // Copied from dir.c | |
427 | ||
428 | static int fspathncmp(const char *a, const char *b, size_t count) | |
429 | { | |
430 | return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); | |
431 | } | |
432 | ||
433 | static int simple_length(const char *match) | |
434 | { | |
435 | int len = -1; | |
436 | ||
437 | for (;;) { | |
438 | unsigned char c = *match++; | |
439 | len++; | |
440 | if (c == '\0' || is_glob_special(c)) | |
441 | return len; | |
442 | } | |
443 | } | |
444 | ||
445 | static int no_wildcard(const char *string) | |
446 | { | |
447 | return string[simple_length(string)] == '\0'; | |
448 | } | |
449 | ||
450 | static void parse_path_pattern(const char **pattern, | |
451 | int *patternlen, | |
452 | unsigned *flags, | |
453 | int *nowildcardlen) | |
454 | { | |
455 | const char *p = *pattern; | |
456 | size_t i, len; | |
457 | ||
458 | *flags = 0; | |
459 | if (*p == '!') { | |
460 | *flags |= PATTERN_FLAG_NEGATIVE; | |
461 | p++; | |
462 | } | |
463 | len = strlen(p); | |
464 | if (len && p[len - 1] == '/') { | |
465 | len--; | |
466 | *flags |= PATTERN_FLAG_MUSTBEDIR; | |
467 | } | |
468 | for (i = 0; i < len; i++) { | |
469 | if (p[i] == '/') | |
470 | break; | |
471 | } | |
472 | if (i == len) | |
473 | *flags |= PATTERN_FLAG_NODIR; | |
474 | *nowildcardlen = simple_length(p); | |
475 | /* | |
476 | * we should have excluded the trailing slash from 'p' too, | |
477 | * but that's one more allocation. Instead just make sure | |
478 | * nowildcardlen does not exceed real patternlen | |
479 | */ | |
480 | if (*nowildcardlen > len) | |
481 | *nowildcardlen = len; | |
482 | if (*p == '*' && no_wildcard(p + 1)) | |
483 | *flags |= PATTERN_FLAG_ENDSWITH; | |
484 | *pattern = p; | |
485 | *patternlen = len; | |
486 | } | |
487 | ||
488 | static void trim_trailing_spaces(char *buf) | |
489 | { | |
490 | char *p, *last_space = NULL; | |
491 | ||
492 | for (p = buf; *p; p++) | |
493 | switch (*p) { | |
494 | case ' ': | |
495 | if (!last_space) | |
496 | last_space = p; | |
497 | break; | |
498 | case '\\': | |
499 | p++; | |
500 | if (!*p) | |
501 | return; | |
502 | /* fallthrough */ | |
503 | default: | |
504 | last_space = NULL; | |
505 | } | |
506 | ||
507 | if (last_space) | |
508 | *last_space = '\0'; | |
509 | } | |
510 | ||
511 | static int match_basename(const char *basename, int basenamelen, | |
512 | const char *pattern, int prefix, int patternlen, | |
513 | unsigned flags) | |
514 | { | |
515 | if (prefix == patternlen) { | |
516 | if (patternlen == basenamelen && | |
517 | !fspathncmp(pattern, basename, basenamelen)) | |
518 | return 1; | |
519 | } else if (flags & PATTERN_FLAG_ENDSWITH) { | |
520 | /* "*literal" matching against "fooliteral" */ | |
521 | if (patternlen - 1 <= basenamelen && | |
522 | !fspathncmp(pattern + 1, | |
523 | basename + basenamelen - (patternlen - 1), | |
524 | patternlen - 1)) | |
525 | return 1; | |
526 | } else { | |
527 | // local modification: call wildmatch() directly | |
528 | if (!wildmatch(pattern, basename, flags)) | |
529 | return 1; | |
530 | } | |
531 | return 0; | |
532 | } | |
533 | ||
534 | static int match_pathname(const char *pathname, int pathlen, | |
535 | const char *base, int baselen, | |
536 | const char *pattern, int prefix, int patternlen) | |
537 | { | |
538 | // local modification: remove local variables | |
539 | ||
540 | /* | |
541 | * match with FNM_PATHNAME; the pattern has base implicitly | |
542 | * in front of it. | |
543 | */ | |
544 | if (*pattern == '/') { | |
545 | pattern++; | |
546 | patternlen--; | |
547 | prefix--; | |
548 | } | |
549 | ||
550 | /* | |
551 | * baselen does not count the trailing slash. base[] may or | |
552 | * may not end with a trailing slash though. | |
553 | */ | |
554 | if (pathlen < baselen + 1 || | |
555 | (baselen && pathname[baselen] != '/') || | |
556 | fspathncmp(pathname, base, baselen)) | |
557 | return 0; | |
558 | ||
559 | // local modification: simplified because always baselen > 0 | |
560 | pathname += baselen + 1; | |
561 | pathlen -= baselen + 1; | |
562 | ||
563 | if (prefix) { | |
564 | /* | |
565 | * if the non-wildcard part is longer than the | |
566 | * remaining pathname, surely it cannot match. | |
567 | */ | |
568 | if (prefix > pathlen) | |
569 | return 0; | |
570 | ||
571 | if (fspathncmp(pattern, pathname, prefix)) | |
572 | return 0; | |
573 | pattern += prefix; | |
574 | patternlen -= prefix; | |
575 | pathname += prefix; | |
576 | pathlen -= prefix; | |
577 | ||
578 | /* | |
579 | * If the whole pattern did not have a wildcard, | |
580 | * then our prefix match is all we need; we | |
581 | * do not need to call fnmatch at all. | |
582 | */ | |
583 | if (!patternlen && !pathlen) | |
584 | return 1; | |
585 | } | |
586 | ||
587 | // local modification: call wildmatch() directly | |
588 | return !wildmatch(pattern, pathname, WM_PATHNAME); | |
589 | } | |
590 | ||
591 | // Copied from git/utf8.c | |
592 | ||
593 | static const char utf8_bom[] = "\357\273\277"; | |
594 | ||
595 | //----------------------------(IMPORT FROM GIT END)---------------------------- | |
596 | ||
597 | struct pattern { | |
598 | unsigned int flags; | |
599 | int nowildcardlen; | |
600 | int patternlen; | |
601 | int dirlen; | |
602 | char pattern[]; | |
603 | }; | |
604 | ||
605 | static struct pattern **pattern_list; | |
606 | static int nr_patterns, alloced_patterns; | |
607 | ||
608 | // Remember the number of patterns at each directory level | |
609 | static int *nr_patterns_at; | |
610 | // Track the current/max directory level; | |
611 | static int depth, max_depth; | |
612 | static bool debug_on; | |
613 | static FILE *out_fp, *stat_fp; | |
614 | static char *prefix = ""; | |
615 | static char *progname; | |
616 | ||
617 | static void __attribute__((noreturn)) perror_exit(const char *s) | |
618 | { | |
619 | perror(s); | |
620 | ||
621 | exit(EXIT_FAILURE); | |
622 | } | |
623 | ||
624 | static void __attribute__((noreturn)) error_exit(const char *fmt, ...) | |
625 | { | |
626 | va_list args; | |
627 | ||
628 | fprintf(stderr, "%s: error: ", progname); | |
629 | ||
630 | va_start(args, fmt); | |
631 | vfprintf(stderr, fmt, args); | |
632 | va_end(args); | |
633 | ||
634 | exit(EXIT_FAILURE); | |
635 | } | |
636 | ||
637 | static void debug(const char *fmt, ...) | |
638 | { | |
639 | va_list args; | |
640 | int i; | |
641 | ||
642 | if (!debug_on) | |
643 | return; | |
644 | ||
645 | fprintf(stderr, "[DEBUG] "); | |
646 | ||
647 | for (i = 0; i < depth * 2; i++) | |
648 | fputc(' ', stderr); | |
649 | ||
650 | va_start(args, fmt); | |
651 | vfprintf(stderr, fmt, args); | |
652 | va_end(args); | |
653 | } | |
654 | ||
655 | static void *xrealloc(void *ptr, size_t size) | |
656 | { | |
657 | ptr = realloc(ptr, size); | |
658 | if (!ptr) | |
659 | perror_exit(progname); | |
660 | ||
661 | return ptr; | |
662 | } | |
663 | ||
664 | static void *xmalloc(size_t size) | |
665 | { | |
666 | return xrealloc(NULL, size); | |
667 | } | |
668 | ||
669 | // similar to last_matching_pattern_from_list() in GIT | |
670 | static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir) | |
671 | { | |
672 | int i; | |
673 | ||
674 | // Search in the reverse order because the last matching pattern wins. | |
675 | for (i = nr_patterns - 1; i >= 0; i--) { | |
676 | struct pattern *p = pattern_list[i]; | |
677 | unsigned int flags = p->flags; | |
678 | const char *gitignore_dir = p->pattern + p->patternlen + 1; | |
679 | bool ignored; | |
680 | ||
681 | if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir) | |
682 | continue; | |
683 | ||
684 | if (flags & PATTERN_FLAG_NODIR) { | |
685 | if (!match_basename(path + dirlen + 1, | |
686 | pathlen - dirlen - 1, | |
687 | p->pattern, | |
688 | p->nowildcardlen, | |
689 | p->patternlen, | |
690 | p->flags)) | |
691 | continue; | |
692 | } else { | |
693 | if (!match_pathname(path, pathlen, | |
694 | gitignore_dir, p->dirlen, | |
695 | p->pattern, | |
696 | p->nowildcardlen, | |
697 | p->patternlen)) | |
698 | continue; | |
699 | } | |
700 | ||
701 | debug("%s: matches %s%s%s (%s/.gitignore)\n", path, | |
702 | flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, | |
703 | flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "", | |
704 | gitignore_dir); | |
705 | ||
706 | ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0; | |
707 | if (ignored) | |
708 | debug("Ignore: %s\n", path); | |
709 | ||
710 | return ignored; | |
711 | } | |
712 | ||
713 | debug("%s: no match\n", path); | |
714 | ||
715 | return false; | |
716 | } | |
717 | ||
718 | static void add_pattern(const char *string, const char *dir, int dirlen) | |
719 | { | |
720 | struct pattern *p; | |
721 | int patternlen, nowildcardlen; | |
722 | unsigned int flags; | |
723 | ||
724 | parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); | |
725 | ||
726 | if (patternlen == 0) | |
727 | return; | |
728 | ||
729 | p = xmalloc(sizeof(*p) + patternlen + dirlen + 2); | |
730 | ||
731 | memcpy(p->pattern, string, patternlen); | |
732 | p->pattern[patternlen] = 0; | |
733 | memcpy(p->pattern + patternlen + 1, dir, dirlen); | |
734 | p->pattern[patternlen + 1 + dirlen] = 0; | |
735 | ||
736 | p->patternlen = patternlen; | |
737 | p->nowildcardlen = nowildcardlen; | |
738 | p->dirlen = dirlen; | |
739 | p->flags = flags; | |
740 | ||
741 | debug("Add pattern: %s%s%s\n", | |
742 | flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, | |
743 | flags & PATTERN_FLAG_MUSTBEDIR ? "/" : ""); | |
744 | ||
745 | if (nr_patterns >= alloced_patterns) { | |
746 | alloced_patterns += 128; | |
747 | pattern_list = xrealloc(pattern_list, | |
748 | sizeof(*pattern_list) * alloced_patterns); | |
749 | } | |
750 | ||
751 | pattern_list[nr_patterns++] = p; | |
752 | } | |
753 | ||
754 | // similar to add_patterns_from_buffer() in GIT | |
755 | static void add_patterns_from_gitignore(const char *dir, int dirlen) | |
756 | { | |
757 | struct stat st; | |
758 | char path[PATH_MAX], *buf, *entry; | |
759 | size_t size; | |
760 | int fd, pathlen, i; | |
761 | ||
762 | pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir); | |
763 | if (pathlen >= sizeof(path)) | |
764 | error_exit("%s: too long path was truncated\n", path); | |
765 | ||
766 | fd = open(path, O_RDONLY | O_NOFOLLOW); | |
767 | if (fd < 0) { | |
768 | if (errno != ENOENT) | |
769 | return perror_exit(path); | |
770 | return; | |
771 | } | |
772 | ||
773 | if (fstat(fd, &st) < 0) | |
774 | perror_exit(path); | |
775 | ||
776 | size = st.st_size; | |
777 | ||
778 | buf = xmalloc(size + 1); | |
779 | if (read(fd, buf, st.st_size) != st.st_size) | |
780 | perror_exit(path); | |
781 | ||
782 | buf[st.st_size] = '\n'; | |
783 | if (close(fd)) | |
784 | perror_exit(path); | |
785 | ||
786 | debug("Parse %s\n", path); | |
787 | ||
788 | entry = buf; | |
789 | ||
790 | // skip utf8 bom | |
791 | if (!strncmp(entry, utf8_bom, strlen(utf8_bom))) | |
792 | entry += strlen(utf8_bom); | |
793 | ||
794 | for (i = entry - buf; i < size; i++) { | |
795 | if (buf[i] == '\n') { | |
796 | if (entry != buf + i && entry[0] != '#') { | |
797 | buf[i - (i && buf[i-1] == '\r')] = 0; | |
798 | trim_trailing_spaces(entry); | |
799 | add_pattern(entry, dir, dirlen); | |
800 | } | |
801 | entry = buf + i + 1; | |
802 | } | |
803 | } | |
804 | ||
805 | free(buf); | |
806 | } | |
807 | ||
808 | // Save the current number of patterns and increment the depth | |
809 | static void increment_depth(void) | |
810 | { | |
811 | if (depth >= max_depth) { | |
812 | max_depth += 1; | |
813 | nr_patterns_at = xrealloc(nr_patterns_at, | |
814 | sizeof(*nr_patterns_at) * max_depth); | |
815 | } | |
816 | ||
817 | nr_patterns_at[depth] = nr_patterns; | |
818 | depth++; | |
819 | } | |
820 | ||
821 | // Decrement the depth, and free up the patterns of this directory level. | |
822 | static void decrement_depth(void) | |
823 | { | |
824 | depth--; | |
825 | assert(depth >= 0); | |
826 | ||
827 | while (nr_patterns > nr_patterns_at[depth]) | |
828 | free(pattern_list[--nr_patterns]); | |
829 | } | |
830 | ||
831 | static void print_path(const char *path) | |
832 | { | |
833 | // The path always starts with "./" | |
834 | assert(strlen(path) >= 2); | |
835 | ||
836 | // Replace the root directory with a preferred prefix. | |
837 | // This is useful for the tar command. | |
838 | fprintf(out_fp, "%s%s\n", prefix, path + 2); | |
839 | } | |
840 | ||
841 | static void print_stat(const char *path, struct stat *st) | |
842 | { | |
843 | if (!stat_fp) | |
844 | return; | |
845 | ||
846 | if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode)) | |
847 | return; | |
848 | ||
849 | assert(strlen(path) >= 2); | |
850 | ||
851 | fprintf(stat_fp, "%c %9ld %10ld %s\n", | |
852 | S_ISLNK(st->st_mode) ? 'l' : '-', | |
853 | st->st_size, st->st_mtim.tv_sec, path + 2); | |
854 | } | |
855 | ||
856 | // Traverse the entire directory tree, parsing .gitignore files. | |
857 | // Print file paths that are not tracked by git. | |
858 | // | |
859 | // Return true if all files under the directory are ignored, false otherwise. | |
860 | static bool traverse_directory(const char *dir, int dirlen) | |
861 | { | |
862 | bool all_ignored = true; | |
863 | DIR *dirp; | |
864 | ||
865 | debug("Enter[%d]: %s\n", depth, dir); | |
866 | increment_depth(); | |
867 | ||
868 | add_patterns_from_gitignore(dir, dirlen); | |
869 | ||
870 | dirp = opendir(dir); | |
871 | if (!dirp) | |
872 | perror_exit(dir); | |
873 | ||
874 | while (1) { | |
875 | struct dirent *d; | |
876 | struct stat st; | |
877 | char path[PATH_MAX]; | |
878 | int pathlen; | |
879 | bool ignored; | |
880 | ||
881 | errno = 0; | |
882 | d = readdir(dirp); | |
883 | if (!d) { | |
884 | if (errno) | |
885 | perror_exit(dir); | |
886 | break; | |
887 | } | |
888 | ||
889 | if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) | |
890 | continue; | |
891 | ||
892 | pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name); | |
893 | if (pathlen >= sizeof(path)) | |
894 | error_exit("%s: too long path was truncated\n", path); | |
895 | ||
896 | if (lstat(path, &st) < 0) | |
897 | perror_exit(path); | |
898 | ||
899 | if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) || | |
900 | is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) { | |
901 | ignored = true; | |
902 | } else { | |
903 | if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) | |
904 | // If all the files in a directory are ignored, | |
905 | // let's ignore that directory as well. This | |
906 | // will avoid empty directories in the tarball. | |
907 | ignored = traverse_directory(path, pathlen); | |
908 | else | |
909 | ignored = false; | |
910 | } | |
911 | ||
912 | if (ignored) { | |
913 | print_path(path); | |
914 | } else { | |
915 | print_stat(path, &st); | |
916 | all_ignored = false; | |
917 | } | |
918 | } | |
919 | ||
920 | if (closedir(dirp)) | |
921 | perror_exit(dir); | |
922 | ||
923 | decrement_depth(); | |
924 | debug("Leave[%d]: %s\n", depth, dir); | |
925 | ||
926 | return all_ignored; | |
927 | } | |
928 | ||
929 | static void usage(void) | |
930 | { | |
931 | fprintf(stderr, | |
932 | "usage: %s [options]\n" | |
933 | "\n" | |
934 | "Show files that are ignored by git\n" | |
935 | "\n" | |
936 | "options:\n" | |
937 | " -d, --debug print debug messages to stderr\n" | |
938 | " -e, --exclude PATTERN add the given exclude pattern\n" | |
939 | " -h, --help show this help message and exit\n" | |
940 | " -i, --ignore-case Ignore case differences between the patterns and the files\n" | |
941 | " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n" | |
942 | " -p, --prefix PREFIX prefix added to each path (default: empty string)\n" | |
943 | " -r, --rootdir DIR root of the source tree (default: current working directory)\n" | |
944 | " -s, --stat FILE output the file stat of non-ignored files to a file\n", | |
945 | progname); | |
946 | } | |
947 | ||
948 | static void open_output(const char *pathname, FILE **fp) | |
949 | { | |
950 | if (strcmp(pathname, "-")) { | |
951 | *fp = fopen(pathname, "w"); | |
952 | if (!*fp) | |
953 | perror_exit(pathname); | |
954 | } else { | |
955 | *fp = stdout; | |
956 | } | |
957 | } | |
958 | ||
959 | static void close_output(const char *pathname, FILE *fp) | |
960 | { | |
961 | fflush(fp); | |
962 | ||
963 | if (ferror(fp)) | |
964 | error_exit("not all data was written to the output\n"); | |
965 | ||
966 | if (fclose(fp)) | |
967 | perror_exit(pathname); | |
968 | } | |
969 | ||
970 | int main(int argc, char *argv[]) | |
971 | { | |
972 | const char *output = "-"; | |
973 | const char *rootdir = "."; | |
974 | const char *stat = NULL; | |
975 | ||
976 | progname = strrchr(argv[0], '/'); | |
977 | if (progname) | |
978 | progname++; | |
979 | else | |
980 | progname = argv[0]; | |
981 | ||
982 | while (1) { | |
983 | static struct option long_options[] = { | |
984 | {"debug", no_argument, NULL, 'd'}, | |
985 | {"help", no_argument, NULL, 'h'}, | |
986 | {"ignore-case", no_argument, NULL, 'i'}, | |
987 | {"output", required_argument, NULL, 'o'}, | |
988 | {"prefix", required_argument, NULL, 'p'}, | |
989 | {"rootdir", required_argument, NULL, 'r'}, | |
990 | {"stat", required_argument, NULL, 's'}, | |
991 | {"exclude", required_argument, NULL, 'x'}, | |
992 | {}, | |
993 | }; | |
994 | ||
995 | int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL); | |
996 | ||
997 | if (c == -1) | |
998 | break; | |
999 | ||
1000 | switch (c) { | |
1001 | case 'd': | |
1002 | debug_on = true; | |
1003 | break; | |
1004 | case 'h': | |
1005 | usage(); | |
1006 | exit(0); | |
1007 | case 'i': | |
1008 | ignore_case = true; | |
1009 | break; | |
1010 | case 'o': | |
1011 | output = optarg; | |
1012 | break; | |
1013 | case 'p': | |
1014 | prefix = optarg; | |
1015 | break; | |
1016 | case 'r': | |
1017 | rootdir = optarg; | |
1018 | break; | |
1019 | case 's': | |
1020 | stat = optarg; | |
1021 | break; | |
1022 | case 'x': | |
1023 | add_pattern(optarg, ".", strlen(".")); | |
1024 | break; | |
1025 | case '?': | |
1026 | usage(); | |
1027 | /* fallthrough */ | |
1028 | default: | |
1029 | exit(EXIT_FAILURE); | |
1030 | } | |
1031 | } | |
1032 | ||
1033 | open_output(output, &out_fp); | |
1034 | if (stat && stat[0]) | |
1035 | open_output(stat, &stat_fp); | |
1036 | ||
1037 | if (chdir(rootdir)) | |
1038 | perror_exit(rootdir); | |
1039 | ||
1040 | add_pattern(".git/", ".", strlen(".")); | |
1041 | ||
1042 | if (traverse_directory(".", strlen("."))) | |
1043 | print_path("./"); | |
1044 | ||
1045 | assert(depth == 0); | |
1046 | ||
1047 | while (nr_patterns > 0) | |
1048 | free(pattern_list[--nr_patterns]); | |
1049 | free(pattern_list); | |
1050 | free(nr_patterns_at); | |
1051 | ||
1052 | close_output(output, out_fp); | |
1053 | if (stat_fp) | |
1054 | close_output(stat, stat_fp); | |
1055 | ||
1056 | return 0; | |
1057 | } |