| 1 | #!/usr/bin/env python3 |
| 2 | # SPDX-License-Identifier: GPL-2.0-only |
| 3 | |
| 4 | """Find Kconfig symbols that are referenced but not defined.""" |
| 5 | |
| 6 | # (c) 2014-2017 Valentin Rothberg <valentinrothberg@gmail.com> |
| 7 | # (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> |
| 8 | # |
| 9 | |
| 10 | |
| 11 | import argparse |
| 12 | import difflib |
| 13 | import os |
| 14 | import re |
| 15 | import signal |
| 16 | import subprocess |
| 17 | import sys |
| 18 | from multiprocessing import Pool, cpu_count |
| 19 | |
| 20 | |
| 21 | # regex expressions |
| 22 | OPERATORS = r"&|\(|\)|\||\!" |
| 23 | SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}" |
| 24 | DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*" |
| 25 | EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+" |
| 26 | DEFAULT = r"default\s+.*?(?:if\s.+){,1}" |
| 27 | STMT = r"^\s*(?:if|select|imply|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR |
| 28 | SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")" |
| 29 | |
| 30 | # regex objects |
| 31 | REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") |
| 32 | REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)') |
| 33 | REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) |
| 34 | REGEX_KCONFIG_DEF = re.compile(DEF) |
| 35 | REGEX_KCONFIG_EXPR = re.compile(EXPR) |
| 36 | REGEX_KCONFIG_STMT = re.compile(STMT) |
| 37 | REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") |
| 38 | REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") |
| 39 | REGEX_QUOTES = re.compile("(\"(.*?)\")") |
| 40 | |
| 41 | |
| 42 | def parse_options(): |
| 43 | """The user interface of this module.""" |
| 44 | usage = "Run this tool to detect Kconfig symbols that are referenced but " \ |
| 45 | "not defined in Kconfig. If no option is specified, " \ |
| 46 | "checkkconfigsymbols defaults to check your current tree. " \ |
| 47 | "Please note that specifying commits will 'git reset --hard\' " \ |
| 48 | "your current tree! You may save uncommitted changes to avoid " \ |
| 49 | "losing data." |
| 50 | |
| 51 | parser = argparse.ArgumentParser(description=usage) |
| 52 | |
| 53 | parser.add_argument('-c', '--commit', dest='commit', action='store', |
| 54 | default="", |
| 55 | help="check if the specified commit (hash) introduces " |
| 56 | "undefined Kconfig symbols") |
| 57 | |
| 58 | parser.add_argument('-d', '--diff', dest='diff', action='store', |
| 59 | default="", |
| 60 | help="diff undefined symbols between two commits " |
| 61 | "(e.g., -d commmit1..commit2)") |
| 62 | |
| 63 | parser.add_argument('-f', '--find', dest='find', action='store_true', |
| 64 | default=False, |
| 65 | help="find and show commits that may cause symbols to be " |
| 66 | "missing (required to run with --diff)") |
| 67 | |
| 68 | parser.add_argument('-i', '--ignore', dest='ignore', action='store', |
| 69 | default="", |
| 70 | help="ignore files matching this Python regex " |
| 71 | "(e.g., -i '.*defconfig')") |
| 72 | |
| 73 | parser.add_argument('-s', '--sim', dest='sim', action='store', default="", |
| 74 | help="print a list of max. 10 string-similar symbols") |
| 75 | |
| 76 | parser.add_argument('--force', dest='force', action='store_true', |
| 77 | default=False, |
| 78 | help="reset current Git tree even when it's dirty") |
| 79 | |
| 80 | parser.add_argument('--no-color', dest='color', action='store_false', |
| 81 | default=True, |
| 82 | help="don't print colored output (default when not " |
| 83 | "outputting to a terminal)") |
| 84 | |
| 85 | args = parser.parse_args() |
| 86 | |
| 87 | if args.commit and args.diff: |
| 88 | sys.exit("Please specify only one option at once.") |
| 89 | |
| 90 | if args.diff and not re.match(r"^[\w\-\.\^]+\.\.[\w\-\.\^]+$", args.diff): |
| 91 | sys.exit("Please specify valid input in the following format: " |
| 92 | "\'commit1..commit2\'") |
| 93 | |
| 94 | if args.commit or args.diff: |
| 95 | if not args.force and tree_is_dirty(): |
| 96 | sys.exit("The current Git tree is dirty (see 'git status'). " |
| 97 | "Running this script may\ndelete important data since it " |
| 98 | "calls 'git reset --hard' for some performance\nreasons. " |
| 99 | " Please run this script in a clean Git tree or pass " |
| 100 | "'--force' if you\nwant to ignore this warning and " |
| 101 | "continue.") |
| 102 | |
| 103 | if args.commit: |
| 104 | if args.commit.startswith('HEAD'): |
| 105 | sys.exit("The --commit option can't use the HEAD ref") |
| 106 | |
| 107 | args.find = False |
| 108 | |
| 109 | if args.ignore: |
| 110 | try: |
| 111 | re.match(args.ignore, "this/is/just/a/test.c") |
| 112 | except: |
| 113 | sys.exit("Please specify a valid Python regex.") |
| 114 | |
| 115 | return args |
| 116 | |
| 117 | |
| 118 | def print_undefined_symbols(): |
| 119 | """Main function of this module.""" |
| 120 | args = parse_options() |
| 121 | |
| 122 | global COLOR |
| 123 | COLOR = args.color and sys.stdout.isatty() |
| 124 | |
| 125 | if args.sim and not args.commit and not args.diff: |
| 126 | sims = find_sims(args.sim, args.ignore) |
| 127 | if sims: |
| 128 | print("%s: %s" % (yel("Similar symbols"), ', '.join(sims))) |
| 129 | else: |
| 130 | print("%s: no similar symbols found" % yel("Similar symbols")) |
| 131 | sys.exit(0) |
| 132 | |
| 133 | # dictionary of (un)defined symbols |
| 134 | defined = {} |
| 135 | undefined = {} |
| 136 | |
| 137 | if args.commit or args.diff: |
| 138 | head = get_head() |
| 139 | |
| 140 | # get commit range |
| 141 | commit_a = None |
| 142 | commit_b = None |
| 143 | if args.commit: |
| 144 | commit_a = args.commit + "~" |
| 145 | commit_b = args.commit |
| 146 | elif args.diff: |
| 147 | split = args.diff.split("..") |
| 148 | commit_a = split[0] |
| 149 | commit_b = split[1] |
| 150 | undefined_a = {} |
| 151 | undefined_b = {} |
| 152 | |
| 153 | # get undefined items before the commit |
| 154 | reset(commit_a) |
| 155 | undefined_a, _ = check_symbols(args.ignore) |
| 156 | |
| 157 | # get undefined items for the commit |
| 158 | reset(commit_b) |
| 159 | undefined_b, defined = check_symbols(args.ignore) |
| 160 | |
| 161 | # report cases that are present for the commit but not before |
| 162 | for symbol in sorted(undefined_b): |
| 163 | # symbol has not been undefined before |
| 164 | if symbol not in undefined_a: |
| 165 | files = sorted(undefined_b.get(symbol)) |
| 166 | undefined[symbol] = files |
| 167 | # check if there are new files that reference the undefined symbol |
| 168 | else: |
| 169 | files = sorted(undefined_b.get(symbol) - |
| 170 | undefined_a.get(symbol)) |
| 171 | if files: |
| 172 | undefined[symbol] = files |
| 173 | |
| 174 | # reset to head |
| 175 | reset(head) |
| 176 | |
| 177 | # default to check the entire tree |
| 178 | else: |
| 179 | undefined, defined = check_symbols(args.ignore) |
| 180 | |
| 181 | # now print the output |
| 182 | for symbol in sorted(undefined): |
| 183 | print(red(symbol)) |
| 184 | |
| 185 | files = sorted(undefined.get(symbol)) |
| 186 | print("%s: %s" % (yel("Referencing files"), ", ".join(files))) |
| 187 | |
| 188 | sims = find_sims(symbol, args.ignore, defined) |
| 189 | sims_out = yel("Similar symbols") |
| 190 | if sims: |
| 191 | print("%s: %s" % (sims_out, ', '.join(sims))) |
| 192 | else: |
| 193 | print("%s: %s" % (sims_out, "no similar symbols found")) |
| 194 | |
| 195 | if args.find: |
| 196 | print("%s:" % yel("Commits changing symbol")) |
| 197 | commits = find_commits(symbol, args.diff) |
| 198 | if commits: |
| 199 | for commit in commits: |
| 200 | commit = commit.split(" ", 1) |
| 201 | print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1])) |
| 202 | else: |
| 203 | print("\t- no commit found") |
| 204 | print() # new line |
| 205 | |
| 206 | |
| 207 | def reset(commit): |
| 208 | """Reset current git tree to %commit.""" |
| 209 | execute(["git", "reset", "--hard", commit]) |
| 210 | |
| 211 | |
| 212 | def yel(string): |
| 213 | """ |
| 214 | Color %string yellow. |
| 215 | """ |
| 216 | return "\033[33m%s\033[0m" % string if COLOR else string |
| 217 | |
| 218 | |
| 219 | def red(string): |
| 220 | """ |
| 221 | Color %string red. |
| 222 | """ |
| 223 | return "\033[31m%s\033[0m" % string if COLOR else string |
| 224 | |
| 225 | |
| 226 | def execute(cmd): |
| 227 | """Execute %cmd and return stdout. Exit in case of error.""" |
| 228 | try: |
| 229 | stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) |
| 230 | stdout = stdout.decode(errors='replace') |
| 231 | except subprocess.CalledProcessError as fail: |
| 232 | exit(fail) |
| 233 | return stdout |
| 234 | |
| 235 | |
| 236 | def find_commits(symbol, diff): |
| 237 | """Find commits changing %symbol in the given range of %diff.""" |
| 238 | commits = execute(["git", "log", "--pretty=oneline", |
| 239 | "--abbrev-commit", "-G", |
| 240 | symbol, diff]) |
| 241 | return [x for x in commits.split("\n") if x] |
| 242 | |
| 243 | |
| 244 | def tree_is_dirty(): |
| 245 | """Return true if the current working tree is dirty (i.e., if any file has |
| 246 | been added, deleted, modified, renamed or copied but not committed).""" |
| 247 | stdout = execute(["git", "status", "--porcelain"]) |
| 248 | for line in stdout: |
| 249 | if re.findall(r"[URMADC]{1}", line[:2]): |
| 250 | return True |
| 251 | return False |
| 252 | |
| 253 | |
| 254 | def get_head(): |
| 255 | """Return commit hash of current HEAD.""" |
| 256 | stdout = execute(["git", "rev-parse", "HEAD"]) |
| 257 | return stdout.strip('\n') |
| 258 | |
| 259 | |
| 260 | def partition(lst, size): |
| 261 | """Partition list @lst into eveni-sized lists of size @size.""" |
| 262 | return [lst[i::size] for i in range(size)] |
| 263 | |
| 264 | |
| 265 | def init_worker(): |
| 266 | """Set signal handler to ignore SIGINT.""" |
| 267 | signal.signal(signal.SIGINT, signal.SIG_IGN) |
| 268 | |
| 269 | |
| 270 | def find_sims(symbol, ignore, defined=[]): |
| 271 | """Return a list of max. ten Kconfig symbols that are string-similar to |
| 272 | @symbol.""" |
| 273 | if defined: |
| 274 | return difflib.get_close_matches(symbol, set(defined), 10) |
| 275 | |
| 276 | pool = Pool(cpu_count(), init_worker) |
| 277 | kfiles = [] |
| 278 | for gitfile in get_files(): |
| 279 | if REGEX_FILE_KCONFIG.match(gitfile): |
| 280 | kfiles.append(gitfile) |
| 281 | |
| 282 | arglist = [] |
| 283 | for part in partition(kfiles, cpu_count()): |
| 284 | arglist.append((part, ignore)) |
| 285 | |
| 286 | for res in pool.map(parse_kconfig_files, arglist): |
| 287 | defined.extend(res[0]) |
| 288 | |
| 289 | return difflib.get_close_matches(symbol, set(defined), 10) |
| 290 | |
| 291 | |
| 292 | def get_files(): |
| 293 | """Return a list of all files in the current git directory.""" |
| 294 | # use 'git ls-files' to get the worklist |
| 295 | stdout = execute(["git", "ls-files"]) |
| 296 | if len(stdout) > 0 and stdout[-1] == "\n": |
| 297 | stdout = stdout[:-1] |
| 298 | |
| 299 | files = [] |
| 300 | for gitfile in stdout.rsplit("\n"): |
| 301 | if ".git" in gitfile or "ChangeLog" in gitfile or \ |
| 302 | ".log" in gitfile or os.path.isdir(gitfile) or \ |
| 303 | gitfile.startswith("tools/"): |
| 304 | continue |
| 305 | files.append(gitfile) |
| 306 | return files |
| 307 | |
| 308 | |
| 309 | def check_symbols(ignore): |
| 310 | """Find undefined Kconfig symbols and return a dict with the symbol as key |
| 311 | and a list of referencing files as value. Files matching %ignore are not |
| 312 | checked for undefined symbols.""" |
| 313 | pool = Pool(cpu_count(), init_worker) |
| 314 | try: |
| 315 | return check_symbols_helper(pool, ignore) |
| 316 | except KeyboardInterrupt: |
| 317 | pool.terminate() |
| 318 | pool.join() |
| 319 | sys.exit(1) |
| 320 | |
| 321 | |
| 322 | def check_symbols_helper(pool, ignore): |
| 323 | """Helper method for check_symbols(). Used to catch keyboard interrupts in |
| 324 | check_symbols() in order to properly terminate running worker processes.""" |
| 325 | source_files = [] |
| 326 | kconfig_files = [] |
| 327 | defined_symbols = [] |
| 328 | referenced_symbols = dict() # {file: [symbols]} |
| 329 | |
| 330 | for gitfile in get_files(): |
| 331 | if REGEX_FILE_KCONFIG.match(gitfile): |
| 332 | kconfig_files.append(gitfile) |
| 333 | else: |
| 334 | if ignore and re.match(ignore, gitfile): |
| 335 | continue |
| 336 | # add source files that do not match the ignore pattern |
| 337 | source_files.append(gitfile) |
| 338 | |
| 339 | # parse source files |
| 340 | arglist = partition(source_files, cpu_count()) |
| 341 | for res in pool.map(parse_source_files, arglist): |
| 342 | referenced_symbols.update(res) |
| 343 | |
| 344 | # parse kconfig files |
| 345 | arglist = [] |
| 346 | for part in partition(kconfig_files, cpu_count()): |
| 347 | arglist.append((part, ignore)) |
| 348 | for res in pool.map(parse_kconfig_files, arglist): |
| 349 | defined_symbols.extend(res[0]) |
| 350 | referenced_symbols.update(res[1]) |
| 351 | defined_symbols = set(defined_symbols) |
| 352 | |
| 353 | # inverse mapping of referenced_symbols to dict(symbol: [files]) |
| 354 | inv_map = dict() |
| 355 | for _file, symbols in referenced_symbols.items(): |
| 356 | for symbol in symbols: |
| 357 | inv_map[symbol] = inv_map.get(symbol, set()) |
| 358 | inv_map[symbol].add(_file) |
| 359 | referenced_symbols = inv_map |
| 360 | |
| 361 | undefined = {} # {symbol: [files]} |
| 362 | for symbol in sorted(referenced_symbols): |
| 363 | # filter some false positives |
| 364 | if symbol == "FOO" or symbol == "BAR" or \ |
| 365 | symbol == "FOO_BAR" or symbol == "XXX": |
| 366 | continue |
| 367 | if symbol not in defined_symbols: |
| 368 | if symbol.endswith("_MODULE"): |
| 369 | # avoid false positives for kernel modules |
| 370 | if symbol[:-len("_MODULE")] in defined_symbols: |
| 371 | continue |
| 372 | undefined[symbol] = referenced_symbols.get(symbol) |
| 373 | return undefined, defined_symbols |
| 374 | |
| 375 | |
| 376 | def parse_source_files(source_files): |
| 377 | """Parse each source file in @source_files and return dictionary with source |
| 378 | files as keys and lists of references Kconfig symbols as values.""" |
| 379 | referenced_symbols = dict() |
| 380 | for sfile in source_files: |
| 381 | referenced_symbols[sfile] = parse_source_file(sfile) |
| 382 | return referenced_symbols |
| 383 | |
| 384 | |
| 385 | def parse_source_file(sfile): |
| 386 | """Parse @sfile and return a list of referenced Kconfig symbols.""" |
| 387 | lines = [] |
| 388 | references = [] |
| 389 | |
| 390 | if not os.path.exists(sfile): |
| 391 | return references |
| 392 | |
| 393 | with open(sfile, "r", encoding='utf-8', errors='replace') as stream: |
| 394 | lines = stream.readlines() |
| 395 | |
| 396 | for line in lines: |
| 397 | if "CONFIG_" not in line: |
| 398 | continue |
| 399 | symbols = REGEX_SOURCE_SYMBOL.findall(line) |
| 400 | for symbol in symbols: |
| 401 | if not REGEX_FILTER_SYMBOLS.search(symbol): |
| 402 | continue |
| 403 | references.append(symbol) |
| 404 | |
| 405 | return references |
| 406 | |
| 407 | |
| 408 | def get_symbols_in_line(line): |
| 409 | """Return mentioned Kconfig symbols in @line.""" |
| 410 | return REGEX_SYMBOL.findall(line) |
| 411 | |
| 412 | |
| 413 | def parse_kconfig_files(args): |
| 414 | """Parse kconfig files and return tuple of defined and references Kconfig |
| 415 | symbols. Note, @args is a tuple of a list of files and the @ignore |
| 416 | pattern.""" |
| 417 | kconfig_files = args[0] |
| 418 | ignore = args[1] |
| 419 | defined_symbols = [] |
| 420 | referenced_symbols = dict() |
| 421 | |
| 422 | for kfile in kconfig_files: |
| 423 | defined, references = parse_kconfig_file(kfile) |
| 424 | defined_symbols.extend(defined) |
| 425 | if ignore and re.match(ignore, kfile): |
| 426 | # do not collect references for files that match the ignore pattern |
| 427 | continue |
| 428 | referenced_symbols[kfile] = references |
| 429 | return (defined_symbols, referenced_symbols) |
| 430 | |
| 431 | |
| 432 | def parse_kconfig_file(kfile): |
| 433 | """Parse @kfile and update symbol definitions and references.""" |
| 434 | lines = [] |
| 435 | defined = [] |
| 436 | references = [] |
| 437 | |
| 438 | if not os.path.exists(kfile): |
| 439 | return defined, references |
| 440 | |
| 441 | with open(kfile, "r", encoding='utf-8', errors='replace') as stream: |
| 442 | lines = stream.readlines() |
| 443 | |
| 444 | for i in range(len(lines)): |
| 445 | line = lines[i] |
| 446 | line = line.strip('\n') |
| 447 | line = line.split("#")[0] # ignore comments |
| 448 | |
| 449 | if REGEX_KCONFIG_DEF.match(line): |
| 450 | symbol_def = REGEX_KCONFIG_DEF.findall(line) |
| 451 | defined.append(symbol_def[0]) |
| 452 | elif REGEX_KCONFIG_STMT.match(line): |
| 453 | line = REGEX_QUOTES.sub("", line) |
| 454 | symbols = get_symbols_in_line(line) |
| 455 | # multi-line statements |
| 456 | while line.endswith("\\"): |
| 457 | i += 1 |
| 458 | line = lines[i] |
| 459 | line = line.strip('\n') |
| 460 | symbols.extend(get_symbols_in_line(line)) |
| 461 | for symbol in set(symbols): |
| 462 | if REGEX_NUMERIC.match(symbol): |
| 463 | # ignore numeric values |
| 464 | continue |
| 465 | references.append(symbol) |
| 466 | |
| 467 | return defined, references |
| 468 | |
| 469 | |
| 470 | def main(): |
| 471 | try: |
| 472 | print_undefined_symbols() |
| 473 | except BrokenPipeError: |
| 474 | # Python flushes standard streams on exit; redirect remaining output |
| 475 | # to devnull to avoid another BrokenPipeError at shutdown |
| 476 | devnull = os.open(os.devnull, os.O_WRONLY) |
| 477 | os.dup2(devnull, sys.stdout.fileno()) |
| 478 | sys.exit(1) # Python exits with error code 1 on EPIPE |
| 479 | |
| 480 | |
| 481 | if __name__ == "__main__": |
| 482 | main() |