Commit | Line | Data |
---|---|---|
7c5227af | 1 | #!/usr/bin/env python3 |
4f19048f | 2 | # SPDX-License-Identifier: GPL-2.0-only |
24fe1f03 | 3 | |
b1a3f243 | 4 | """Find Kconfig symbols that are referenced but not defined.""" |
24fe1f03 | 5 | |
8e8e3331 | 6 | # (c) 2014-2017 Valentin Rothberg <valentinrothberg@gmail.com> |
cc641d55 | 7 | # (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> |
24fe1f03 | 8 | # |
24fe1f03 VR |
9 | |
10 | ||
14390e31 | 11 | import argparse |
1b2c8414 | 12 | import difflib |
24fe1f03 VR |
13 | import os |
14 | import re | |
e2042a8a | 15 | import signal |
f175ba17 | 16 | import subprocess |
b1a3f243 | 17 | import sys |
e2042a8a | 18 | from multiprocessing import Pool, cpu_count |
24fe1f03 | 19 | |
cc641d55 VR |
20 | |
21 | # regex expressions | |
24fe1f03 | 22 | OPERATORS = r"&|\(|\)|\||\!" |
ef3f5543 VR |
23 | SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}" |
24 | DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*" | |
25 | EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+" | |
0bd38ae3 | 26 | DEFAULT = r"default\s+.*?(?:if\s.+){,1}" |
3b28f4f2 | 27 | STMT = r"^\s*(?:if|select|imply|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR |
ef3f5543 | 28 | SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")" |
24fe1f03 | 29 | |
cc641d55 | 30 | # regex objects |
24fe1f03 | 31 | REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") |
ef3f5543 VR |
32 | REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)') |
33 | REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) | |
cc641d55 | 34 | REGEX_KCONFIG_DEF = re.compile(DEF) |
24fe1f03 VR |
35 | REGEX_KCONFIG_EXPR = re.compile(EXPR) |
36 | REGEX_KCONFIG_STMT = re.compile(STMT) | |
ef3f5543 | 37 | REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") |
0bd38ae3 | 38 | REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") |
e2042a8a | 39 | REGEX_QUOTES = re.compile("(\"(.*?)\")") |
24fe1f03 VR |
40 | |
41 | ||
b1a3f243 VR |
42 | def parse_options(): |
43 | """The user interface of this module.""" | |
14390e31 VR |
44 | usage = "Run this tool to detect Kconfig symbols that are referenced but " \ |
45 | "not defined in Kconfig. If no option is specified, " \ | |
46 | "checkkconfigsymbols defaults to check your current tree. " \ | |
47 | "Please note that specifying commits will 'git reset --hard\' " \ | |
48 | "your current tree! You may save uncommitted changes to avoid " \ | |
49 | "losing data." | |
50 | ||
51 | parser = argparse.ArgumentParser(description=usage) | |
52 | ||
53 | parser.add_argument('-c', '--commit', dest='commit', action='store', | |
54 | default="", | |
55 | help="check if the specified commit (hash) introduces " | |
56 | "undefined Kconfig symbols") | |
57 | ||
58 | parser.add_argument('-d', '--diff', dest='diff', action='store', | |
59 | default="", | |
60 | help="diff undefined symbols between two commits " | |
61 | "(e.g., -d commmit1..commit2)") | |
62 | ||
63 | parser.add_argument('-f', '--find', dest='find', action='store_true', | |
64 | default=False, | |
65 | help="find and show commits that may cause symbols to be " | |
66 | "missing (required to run with --diff)") | |
67 | ||
68 | parser.add_argument('-i', '--ignore', dest='ignore', action='store', | |
69 | default="", | |
70 | help="ignore files matching this Python regex " | |
71 | "(e.g., -i '.*defconfig')") | |
72 | ||
73 | parser.add_argument('-s', '--sim', dest='sim', action='store', default="", | |
74 | help="print a list of max. 10 string-similar symbols") | |
75 | ||
76 | parser.add_argument('--force', dest='force', action='store_true', | |
77 | default=False, | |
78 | help="reset current Git tree even when it's dirty") | |
79 | ||
80 | parser.add_argument('--no-color', dest='color', action='store_false', | |
81 | default=True, | |
82 | help="don't print colored output (default when not " | |
83 | "outputting to a terminal)") | |
84 | ||
85 | args = parser.parse_args() | |
86 | ||
87 | if args.commit and args.diff: | |
b1a3f243 VR |
88 | sys.exit("Please specify only one option at once.") |
89 | ||
0d18c192 | 90 | if args.diff and not re.match(r"^[\w\-\.\^]+\.\.[\w\-\.\^]+$", args.diff): |
b1a3f243 | 91 | sys.exit("Please specify valid input in the following format: " |
38cbfe4f | 92 | "\'commit1..commit2\'") |
b1a3f243 | 93 | |
14390e31 VR |
94 | if args.commit or args.diff: |
95 | if not args.force and tree_is_dirty(): | |
b1a3f243 VR |
96 | sys.exit("The current Git tree is dirty (see 'git status'). " |
97 | "Running this script may\ndelete important data since it " | |
98 | "calls 'git reset --hard' for some performance\nreasons. " | |
99 | " Please run this script in a clean Git tree or pass " | |
100 | "'--force' if you\nwant to ignore this warning and " | |
101 | "continue.") | |
102 | ||
14390e31 | 103 | if args.commit: |
d62d5aed AM |
104 | if args.commit.startswith('HEAD'): |
105 | sys.exit("The --commit option can't use the HEAD ref") | |
106 | ||
14390e31 | 107 | args.find = False |
a42fa92c | 108 | |
14390e31 | 109 | if args.ignore: |
cf132e4a | 110 | try: |
14390e31 | 111 | re.match(args.ignore, "this/is/just/a/test.c") |
cf132e4a VR |
112 | except: |
113 | sys.exit("Please specify a valid Python regex.") | |
114 | ||
14390e31 | 115 | return args |
b1a3f243 VR |
116 | |
117 | ||
24fe1f03 VR |
118 | def main(): |
119 | """Main function of this module.""" | |
14390e31 | 120 | args = parse_options() |
b1a3f243 | 121 | |
36c79c7f VR |
122 | global COLOR |
123 | COLOR = args.color and sys.stdout.isatty() | |
4c73c088 | 124 | |
14390e31 VR |
125 | if args.sim and not args.commit and not args.diff: |
126 | sims = find_sims(args.sim, args.ignore) | |
1b2c8414 | 127 | if sims: |
7c5227af | 128 | print("%s: %s" % (yel("Similar symbols"), ', '.join(sims))) |
1b2c8414 | 129 | else: |
7c5227af | 130 | print("%s: no similar symbols found" % yel("Similar symbols")) |
1b2c8414 VR |
131 | sys.exit(0) |
132 | ||
133 | # dictionary of (un)defined symbols | |
134 | defined = {} | |
135 | undefined = {} | |
136 | ||
14390e31 | 137 | if args.commit or args.diff: |
b1a3f243 VR |
138 | head = get_head() |
139 | ||
140 | # get commit range | |
141 | commit_a = None | |
142 | commit_b = None | |
14390e31 VR |
143 | if args.commit: |
144 | commit_a = args.commit + "~" | |
145 | commit_b = args.commit | |
146 | elif args.diff: | |
147 | split = args.diff.split("..") | |
b1a3f243 VR |
148 | commit_a = split[0] |
149 | commit_b = split[1] | |
150 | undefined_a = {} | |
151 | undefined_b = {} | |
152 | ||
153 | # get undefined items before the commit | |
2f9cc12b | 154 | reset(commit_a) |
14390e31 | 155 | undefined_a, _ = check_symbols(args.ignore) |
b1a3f243 VR |
156 | |
157 | # get undefined items for the commit | |
2f9cc12b | 158 | reset(commit_b) |
14390e31 | 159 | undefined_b, defined = check_symbols(args.ignore) |
b1a3f243 VR |
160 | |
161 | # report cases that are present for the commit but not before | |
ef3f5543 VR |
162 | for symbol in sorted(undefined_b): |
163 | # symbol has not been undefined before | |
164 | if symbol not in undefined_a: | |
165 | files = sorted(undefined_b.get(symbol)) | |
166 | undefined[symbol] = files | |
167 | # check if there are new files that reference the undefined symbol | |
b1a3f243 | 168 | else: |
ef3f5543 VR |
169 | files = sorted(undefined_b.get(symbol) - |
170 | undefined_a.get(symbol)) | |
b1a3f243 | 171 | if files: |
ef3f5543 | 172 | undefined[symbol] = files |
b1a3f243 VR |
173 | |
174 | # reset to head | |
2f9cc12b | 175 | reset(head) |
b1a3f243 VR |
176 | |
177 | # default to check the entire tree | |
178 | else: | |
14390e31 | 179 | undefined, defined = check_symbols(args.ignore) |
1b2c8414 VR |
180 | |
181 | # now print the output | |
ef3f5543 VR |
182 | for symbol in sorted(undefined): |
183 | print(red(symbol)) | |
1b2c8414 | 184 | |
ef3f5543 | 185 | files = sorted(undefined.get(symbol)) |
7c5227af | 186 | print("%s: %s" % (yel("Referencing files"), ", ".join(files))) |
1b2c8414 | 187 | |
ef3f5543 | 188 | sims = find_sims(symbol, args.ignore, defined) |
1b2c8414 VR |
189 | sims_out = yel("Similar symbols") |
190 | if sims: | |
7c5227af | 191 | print("%s: %s" % (sims_out, ', '.join(sims))) |
1b2c8414 | 192 | else: |
7c5227af | 193 | print("%s: %s" % (sims_out, "no similar symbols found")) |
1b2c8414 | 194 | |
14390e31 | 195 | if args.find: |
7c5227af | 196 | print("%s:" % yel("Commits changing symbol")) |
ef3f5543 | 197 | commits = find_commits(symbol, args.diff) |
1b2c8414 VR |
198 | if commits: |
199 | for commit in commits: | |
200 | commit = commit.split(" ", 1) | |
7c5227af | 201 | print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1])) |
1b2c8414 | 202 | else: |
7c5227af | 203 | print("\t- no commit found") |
36c79c7f | 204 | print() # new line |
c7455663 VR |
205 | |
206 | ||
2f9cc12b VR |
207 | def reset(commit): |
208 | """Reset current git tree to %commit.""" | |
209 | execute(["git", "reset", "--hard", commit]) | |
210 | ||
211 | ||
c7455663 VR |
212 | def yel(string): |
213 | """ | |
214 | Color %string yellow. | |
215 | """ | |
36c79c7f | 216 | return "\033[33m%s\033[0m" % string if COLOR else string |
c7455663 VR |
217 | |
218 | ||
219 | def red(string): | |
220 | """ | |
221 | Color %string red. | |
222 | """ | |
36c79c7f | 223 | return "\033[31m%s\033[0m" % string if COLOR else string |
b1a3f243 VR |
224 | |
225 | ||
226 | def execute(cmd): | |
227 | """Execute %cmd and return stdout. Exit in case of error.""" | |
f175ba17 | 228 | try: |
2f9cc12b | 229 | stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) |
7c5227af | 230 | stdout = stdout.decode(errors='replace') |
f175ba17 | 231 | except subprocess.CalledProcessError as fail: |
2f9cc12b | 232 | exit(fail) |
b1a3f243 VR |
233 | return stdout |
234 | ||
235 | ||
a42fa92c VR |
236 | def find_commits(symbol, diff): |
237 | """Find commits changing %symbol in the given range of %diff.""" | |
2f9cc12b VR |
238 | commits = execute(["git", "log", "--pretty=oneline", |
239 | "--abbrev-commit", "-G", | |
240 | symbol, diff]) | |
1b2c8414 | 241 | return [x for x in commits.split("\n") if x] |
a42fa92c VR |
242 | |
243 | ||
b1a3f243 VR |
244 | def tree_is_dirty(): |
245 | """Return true if the current working tree is dirty (i.e., if any file has | |
246 | been added, deleted, modified, renamed or copied but not committed).""" | |
2f9cc12b | 247 | stdout = execute(["git", "status", "--porcelain"]) |
b1a3f243 VR |
248 | for line in stdout: |
249 | if re.findall(r"[URMADC]{1}", line[:2]): | |
250 | return True | |
251 | return False | |
252 | ||
253 | ||
254 | def get_head(): | |
255 | """Return commit hash of current HEAD.""" | |
2f9cc12b | 256 | stdout = execute(["git", "rev-parse", "HEAD"]) |
b1a3f243 VR |
257 | return stdout.strip('\n') |
258 | ||
259 | ||
e2042a8a VR |
260 | def partition(lst, size): |
261 | """Partition list @lst into eveni-sized lists of size @size.""" | |
7c5227af | 262 | return [lst[i::size] for i in range(size)] |
e2042a8a VR |
263 | |
264 | ||
265 | def init_worker(): | |
266 | """Set signal handler to ignore SIGINT.""" | |
267 | signal.signal(signal.SIGINT, signal.SIG_IGN) | |
268 | ||
269 | ||
36c79c7f | 270 | def find_sims(symbol, ignore, defined=[]): |
1b2c8414 VR |
271 | """Return a list of max. ten Kconfig symbols that are string-similar to |
272 | @symbol.""" | |
273 | if defined: | |
8e8e3331 | 274 | return difflib.get_close_matches(symbol, set(defined), 10) |
1b2c8414 VR |
275 | |
276 | pool = Pool(cpu_count(), init_worker) | |
277 | kfiles = [] | |
278 | for gitfile in get_files(): | |
279 | if REGEX_FILE_KCONFIG.match(gitfile): | |
280 | kfiles.append(gitfile) | |
281 | ||
282 | arglist = [] | |
283 | for part in partition(kfiles, cpu_count()): | |
284 | arglist.append((part, ignore)) | |
285 | ||
286 | for res in pool.map(parse_kconfig_files, arglist): | |
287 | defined.extend(res[0]) | |
288 | ||
8e8e3331 | 289 | return difflib.get_close_matches(symbol, set(defined), 10) |
1b2c8414 VR |
290 | |
291 | ||
292 | def get_files(): | |
293 | """Return a list of all files in the current git directory.""" | |
294 | # use 'git ls-files' to get the worklist | |
2f9cc12b | 295 | stdout = execute(["git", "ls-files"]) |
1b2c8414 VR |
296 | if len(stdout) > 0 and stdout[-1] == "\n": |
297 | stdout = stdout[:-1] | |
298 | ||
299 | files = [] | |
300 | for gitfile in stdout.rsplit("\n"): | |
301 | if ".git" in gitfile or "ChangeLog" in gitfile or \ | |
302 | ".log" in gitfile or os.path.isdir(gitfile) or \ | |
303 | gitfile.startswith("tools/"): | |
304 | continue | |
305 | files.append(gitfile) | |
306 | return files | |
307 | ||
308 | ||
cf132e4a | 309 | def check_symbols(ignore): |
b1a3f243 | 310 | """Find undefined Kconfig symbols and return a dict with the symbol as key |
cf132e4a VR |
311 | and a list of referencing files as value. Files matching %ignore are not |
312 | checked for undefined symbols.""" | |
e2042a8a VR |
313 | pool = Pool(cpu_count(), init_worker) |
314 | try: | |
315 | return check_symbols_helper(pool, ignore) | |
316 | except KeyboardInterrupt: | |
317 | pool.terminate() | |
318 | pool.join() | |
319 | sys.exit(1) | |
320 | ||
321 | ||
322 | def check_symbols_helper(pool, ignore): | |
323 | """Helper method for check_symbols(). Used to catch keyboard interrupts in | |
324 | check_symbols() in order to properly terminate running worker processes.""" | |
24fe1f03 VR |
325 | source_files = [] |
326 | kconfig_files = [] | |
ef3f5543 VR |
327 | defined_symbols = [] |
328 | referenced_symbols = dict() # {file: [symbols]} | |
24fe1f03 | 329 | |
1b2c8414 | 330 | for gitfile in get_files(): |
24fe1f03 VR |
331 | if REGEX_FILE_KCONFIG.match(gitfile): |
332 | kconfig_files.append(gitfile) | |
333 | else: | |
1439ebd2 | 334 | if ignore and re.match(ignore, gitfile): |
e2042a8a VR |
335 | continue |
336 | # add source files that do not match the ignore pattern | |
24fe1f03 VR |
337 | source_files.append(gitfile) |
338 | ||
e2042a8a VR |
339 | # parse source files |
340 | arglist = partition(source_files, cpu_count()) | |
341 | for res in pool.map(parse_source_files, arglist): | |
ef3f5543 | 342 | referenced_symbols.update(res) |
24fe1f03 | 343 | |
e2042a8a VR |
344 | # parse kconfig files |
345 | arglist = [] | |
346 | for part in partition(kconfig_files, cpu_count()): | |
347 | arglist.append((part, ignore)) | |
348 | for res in pool.map(parse_kconfig_files, arglist): | |
ef3f5543 VR |
349 | defined_symbols.extend(res[0]) |
350 | referenced_symbols.update(res[1]) | |
351 | defined_symbols = set(defined_symbols) | |
e2042a8a | 352 | |
ef3f5543 | 353 | # inverse mapping of referenced_symbols to dict(symbol: [files]) |
e2042a8a | 354 | inv_map = dict() |
ef3f5543 VR |
355 | for _file, symbols in referenced_symbols.items(): |
356 | for symbol in symbols: | |
357 | inv_map[symbol] = inv_map.get(symbol, set()) | |
358 | inv_map[symbol].add(_file) | |
359 | referenced_symbols = inv_map | |
360 | ||
361 | undefined = {} # {symbol: [files]} | |
362 | for symbol in sorted(referenced_symbols): | |
cc641d55 | 363 | # filter some false positives |
ef3f5543 VR |
364 | if symbol == "FOO" or symbol == "BAR" or \ |
365 | symbol == "FOO_BAR" or symbol == "XXX": | |
cc641d55 | 366 | continue |
ef3f5543 VR |
367 | if symbol not in defined_symbols: |
368 | if symbol.endswith("_MODULE"): | |
cc641d55 | 369 | # avoid false positives for kernel modules |
ef3f5543 | 370 | if symbol[:-len("_MODULE")] in defined_symbols: |
24fe1f03 | 371 | continue |
ef3f5543 VR |
372 | undefined[symbol] = referenced_symbols.get(symbol) |
373 | return undefined, defined_symbols | |
24fe1f03 VR |
374 | |
375 | ||
e2042a8a VR |
376 | def parse_source_files(source_files): |
377 | """Parse each source file in @source_files and return dictionary with source | |
378 | files as keys and lists of references Kconfig symbols as values.""" | |
ef3f5543 | 379 | referenced_symbols = dict() |
e2042a8a | 380 | for sfile in source_files: |
ef3f5543 VR |
381 | referenced_symbols[sfile] = parse_source_file(sfile) |
382 | return referenced_symbols | |
e2042a8a VR |
383 | |
384 | ||
385 | def parse_source_file(sfile): | |
ef3f5543 | 386 | """Parse @sfile and return a list of referenced Kconfig symbols.""" |
24fe1f03 | 387 | lines = [] |
e2042a8a VR |
388 | references = [] |
389 | ||
390 | if not os.path.exists(sfile): | |
391 | return references | |
392 | ||
7c5227af | 393 | with open(sfile, "r", encoding='utf-8', errors='replace') as stream: |
24fe1f03 VR |
394 | lines = stream.readlines() |
395 | ||
396 | for line in lines: | |
36c79c7f | 397 | if "CONFIG_" not in line: |
24fe1f03 | 398 | continue |
ef3f5543 VR |
399 | symbols = REGEX_SOURCE_SYMBOL.findall(line) |
400 | for symbol in symbols: | |
401 | if not REGEX_FILTER_SYMBOLS.search(symbol): | |
24fe1f03 | 402 | continue |
ef3f5543 | 403 | references.append(symbol) |
e2042a8a VR |
404 | |
405 | return references | |
24fe1f03 VR |
406 | |
407 | ||
ef3f5543 VR |
408 | def get_symbols_in_line(line): |
409 | """Return mentioned Kconfig symbols in @line.""" | |
410 | return REGEX_SYMBOL.findall(line) | |
24fe1f03 VR |
411 | |
412 | ||
e2042a8a VR |
413 | def parse_kconfig_files(args): |
414 | """Parse kconfig files and return tuple of defined and references Kconfig | |
415 | symbols. Note, @args is a tuple of a list of files and the @ignore | |
416 | pattern.""" | |
417 | kconfig_files = args[0] | |
418 | ignore = args[1] | |
ef3f5543 VR |
419 | defined_symbols = [] |
420 | referenced_symbols = dict() | |
e2042a8a VR |
421 | |
422 | for kfile in kconfig_files: | |
423 | defined, references = parse_kconfig_file(kfile) | |
ef3f5543 | 424 | defined_symbols.extend(defined) |
e2042a8a VR |
425 | if ignore and re.match(ignore, kfile): |
426 | # do not collect references for files that match the ignore pattern | |
427 | continue | |
ef3f5543 VR |
428 | referenced_symbols[kfile] = references |
429 | return (defined_symbols, referenced_symbols) | |
e2042a8a VR |
430 | |
431 | ||
432 | def parse_kconfig_file(kfile): | |
ef3f5543 | 433 | """Parse @kfile and update symbol definitions and references.""" |
24fe1f03 | 434 | lines = [] |
e2042a8a VR |
435 | defined = [] |
436 | references = [] | |
24fe1f03 | 437 | |
e2042a8a VR |
438 | if not os.path.exists(kfile): |
439 | return defined, references | |
440 | ||
7c5227af | 441 | with open(kfile, "r", encoding='utf-8', errors='replace') as stream: |
24fe1f03 VR |
442 | lines = stream.readlines() |
443 | ||
444 | for i in range(len(lines)): | |
445 | line = lines[i] | |
446 | line = line.strip('\n') | |
cc641d55 | 447 | line = line.split("#")[0] # ignore comments |
24fe1f03 VR |
448 | |
449 | if REGEX_KCONFIG_DEF.match(line): | |
ef3f5543 VR |
450 | symbol_def = REGEX_KCONFIG_DEF.findall(line) |
451 | defined.append(symbol_def[0]) | |
24fe1f03 | 452 | elif REGEX_KCONFIG_STMT.match(line): |
e2042a8a | 453 | line = REGEX_QUOTES.sub("", line) |
ef3f5543 | 454 | symbols = get_symbols_in_line(line) |
cc641d55 | 455 | # multi-line statements |
24fe1f03 VR |
456 | while line.endswith("\\"): |
457 | i += 1 | |
458 | line = lines[i] | |
459 | line = line.strip('\n') | |
ef3f5543 VR |
460 | symbols.extend(get_symbols_in_line(line)) |
461 | for symbol in set(symbols): | |
462 | if REGEX_NUMERIC.match(symbol): | |
0bd38ae3 VR |
463 | # ignore numeric values |
464 | continue | |
ef3f5543 | 465 | references.append(symbol) |
e2042a8a VR |
466 | |
467 | return defined, references | |
24fe1f03 VR |
468 | |
469 | ||
470 | if __name__ == "__main__": | |
471 | main() |