Commit | Line | Data |
---|---|---|
074075ae | 1 | #!/usr/bin/env python3 |
b3020464 TR |
2 | # SPDX-License-Identifier: GPL-2.0 |
3 | # | |
4 | # Copyright (C) Google LLC, 2018 | |
5 | # | |
6 | # Author: Tom Roeder <tmroeder@google.com> | |
7 | # | |
8 | """A tool for generating compile_commands.json in the Linux kernel.""" | |
9 | ||
10 | import argparse | |
11 | import json | |
12 | import logging | |
13 | import os | |
14 | import re | |
ecca4fea | 15 | import subprocess |
ec783c7c | 16 | import sys |
b3020464 TR |
17 | |
18 | _DEFAULT_OUTPUT = 'compile_commands.json' | |
19 | _DEFAULT_LOG_LEVEL = 'WARNING' | |
20 | ||
21 | _FILENAME_PATTERN = r'^\..*\.cmd$' | |
92215e7a | 22 | _LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.c) *(;|$)' |
b3020464 | 23 | _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] |
585d32f9 MY |
24 | # The tools/ directory adopts a different build system, and produces .cmd |
25 | # files in a different format. Do not support it. | |
26 | _EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools'] | |
b3020464 TR |
27 | |
28 | def parse_arguments(): | |
29 | """Sets up and parses command-line arguments. | |
30 | ||
31 | Returns: | |
32 | log_level: A logging level to filter log output. | |
0a7d376d | 33 | directory: The work directory where the objects were built. |
ecca4fea | 34 | ar: Command used for parsing .a archives. |
b3020464 | 35 | output: Where to write the compile-commands JSON file. |
ecca4fea | 36 | paths: The list of files/directories to handle to find .cmd files. |
b3020464 TR |
37 | """ |
38 | usage = 'Creates a compile_commands.json database from kernel .cmd files' | |
39 | parser = argparse.ArgumentParser(description=usage) | |
40 | ||
0a7d376d | 41 | directory_help = ('specify the output directory used for the kernel build ' |
b3020464 | 42 | '(defaults to the working directory)') |
6fca36f1 MY |
43 | parser.add_argument('-d', '--directory', type=str, default='.', |
44 | help=directory_help) | |
b3020464 | 45 | |
6fca36f1 MY |
46 | output_help = ('path to the output command database (defaults to ' + |
47 | _DEFAULT_OUTPUT + ')') | |
48 | parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT, | |
49 | help=output_help) | |
b3020464 | 50 | |
ea6cedc5 | 51 | log_level_help = ('the level of log messages to produce (defaults to ' + |
b3020464 | 52 | _DEFAULT_LOG_LEVEL + ')') |
ea6cedc5 MY |
53 | parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS, |
54 | default=_DEFAULT_LOG_LEVEL, help=log_level_help) | |
b3020464 | 55 | |
ecca4fea MY |
56 | ar_help = 'command used for parsing .a archives' |
57 | parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help) | |
58 | ||
59 | paths_help = ('directories to search or files to parse ' | |
60 | '(files should be *.o, *.a, or modules.order). ' | |
61 | 'If nothing is specified, the current directory is searched') | |
62 | parser.add_argument('paths', type=str, nargs='*', help=paths_help) | |
63 | ||
b3020464 TR |
64 | args = parser.parse_args() |
65 | ||
6fca36f1 MY |
66 | return (args.log_level, |
67 | os.path.abspath(args.directory), | |
fc2cb22e | 68 | args.output, |
ecca4fea MY |
69 | args.ar, |
70 | args.paths if len(args.paths) > 0 else [args.directory]) | |
fc2cb22e MY |
71 | |
72 | ||
73 | def cmdfiles_in_dir(directory): | |
74 | """Generate the iterator of .cmd files found under the directory. | |
75 | ||
76 | Walk under the given directory, and yield every .cmd file found. | |
77 | ||
78 | Args: | |
79 | directory: The directory to search for .cmd files. | |
80 | ||
81 | Yields: | |
82 | The path to a .cmd file. | |
83 | """ | |
84 | ||
85 | filename_matcher = re.compile(_FILENAME_PATTERN) | |
585d32f9 MY |
86 | exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ] |
87 | ||
88 | for dirpath, dirnames, filenames in os.walk(directory, topdown=True): | |
89 | # Prune unwanted directories. | |
90 | if dirpath in exclude_dirs: | |
91 | dirnames[:] = [] | |
92 | continue | |
fc2cb22e | 93 | |
fc2cb22e MY |
94 | for filename in filenames: |
95 | if filename_matcher.match(filename): | |
96 | yield os.path.join(dirpath, filename) | |
b3020464 TR |
97 | |
98 | ||
ecca4fea MY |
99 | def to_cmdfile(path): |
100 | """Return the path of .cmd file used for the given build artifact | |
101 | ||
102 | Args: | |
103 | Path: file path | |
104 | ||
105 | Returns: | |
106 | The path to .cmd file | |
107 | """ | |
108 | dir, base = os.path.split(path) | |
109 | return os.path.join(dir, '.' + base + '.cmd') | |
110 | ||
111 | ||
ecca4fea MY |
112 | def cmdfiles_for_a(archive, ar): |
113 | """Generate the iterator of .cmd files associated with the archive. | |
114 | ||
115 | Parse the given archive, and yield every .cmd file used to build it. | |
116 | ||
117 | Args: | |
118 | archive: The archive to parse | |
119 | ||
120 | Yields: | |
121 | The path to every .cmd file found | |
122 | """ | |
123 | for obj in subprocess.check_output([ar, '-t', archive]).decode().split(): | |
124 | yield to_cmdfile(obj) | |
125 | ||
126 | ||
127 | def cmdfiles_for_modorder(modorder): | |
128 | """Generate the iterator of .cmd files associated with the modules.order. | |
129 | ||
130 | Parse the given modules.order, and yield every .cmd file used to build the | |
131 | contained modules. | |
132 | ||
133 | Args: | |
134 | modorder: The modules.order file to parse | |
135 | ||
136 | Yields: | |
137 | The path to every .cmd file found | |
138 | """ | |
139 | with open(modorder) as f: | |
140 | for line in f: | |
f65a4868 MY |
141 | obj = line.rstrip() |
142 | base, ext = os.path.splitext(obj) | |
143 | if ext != '.o': | |
144 | sys.exit('{}: module path must end with .o'.format(obj)) | |
ecca4fea | 145 | mod = base + '.mod' |
a4ab14e1 | 146 | # Read from *.mod, to get a list of objects that compose the module. |
ecca4fea | 147 | with open(mod) as m: |
a4ab14e1 JH |
148 | for mod_line in m: |
149 | yield to_cmdfile(mod_line.rstrip()) | |
ecca4fea MY |
150 | |
151 | ||
6ca4c6d2 | 152 | def process_line(root_directory, command_prefix, file_path): |
b3020464 TR |
153 | """Extracts information from a .cmd line and creates an entry from it. |
154 | ||
155 | Args: | |
156 | root_directory: The directory that was searched for .cmd files. Usually | |
157 | used directly in the "directory" entry in compile_commands.json. | |
b3020464 | 158 | command_prefix: The extracted command line, up to the last element. |
6ca4c6d2 MY |
159 | file_path: The .c file from the end of the extracted command. |
160 | Usually relative to root_directory, but sometimes absolute. | |
b3020464 TR |
161 | |
162 | Returns: | |
163 | An entry to append to compile_commands. | |
164 | ||
165 | Raises: | |
6ca4c6d2 | 166 | ValueError: Could not find the extracted file based on file_path and |
b3020464 TR |
167 | root_directory or file_directory. |
168 | """ | |
169 | # The .cmd files are intended to be included directly by Make, so they | |
170 | # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the | |
171 | # kernel version). The compile_commands.json file is not interepreted | |
172 | # by Make, so this code replaces the escaped version with '#'. | |
173 | prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') | |
174 | ||
6ca4c6d2 MY |
175 | # Use os.path.abspath() to normalize the path resolving '.' and '..' . |
176 | abs_path = os.path.abspath(os.path.join(root_directory, file_path)) | |
177 | if not os.path.exists(abs_path): | |
178 | raise ValueError('File %s not found' % abs_path) | |
b3020464 | 179 | return { |
6ca4c6d2 MY |
180 | 'directory': root_directory, |
181 | 'file': abs_path, | |
182 | 'command': prefix + file_path, | |
b3020464 TR |
183 | } |
184 | ||
185 | ||
186 | def main(): | |
187 | """Walks through the directory and finds and parses .cmd files.""" | |
ecca4fea | 188 | log_level, directory, output, ar, paths = parse_arguments() |
b3020464 TR |
189 | |
190 | level = getattr(logging, log_level) | |
191 | logging.basicConfig(format='%(levelname)s: %(message)s', level=level) | |
192 | ||
b3020464 TR |
193 | line_matcher = re.compile(_LINE_PATTERN) |
194 | ||
195 | compile_commands = [] | |
b3020464 | 196 | |
fc2cb22e | 197 | for path in paths: |
ecca4fea MY |
198 | # If 'path' is a directory, handle all .cmd files under it. |
199 | # Otherwise, handle .cmd files associated with the file. | |
32164845 | 200 | # built-in objects are linked via vmlinux.a |
ecca4fea MY |
201 | # Modules are listed in modules.order. |
202 | if os.path.isdir(path): | |
203 | cmdfiles = cmdfiles_in_dir(path) | |
ecca4fea MY |
204 | elif path.endswith('.a'): |
205 | cmdfiles = cmdfiles_for_a(path, ar) | |
206 | elif path.endswith('modules.order'): | |
207 | cmdfiles = cmdfiles_for_modorder(path) | |
208 | else: | |
209 | sys.exit('{}: unknown file type'.format(path)) | |
fc2cb22e MY |
210 | |
211 | for cmdfile in cmdfiles: | |
212 | with open(cmdfile, 'rt') as f: | |
8a685db3 MY |
213 | result = line_matcher.match(f.readline()) |
214 | if result: | |
b3020464 | 215 | try: |
fc2cb22e MY |
216 | entry = process_line(directory, result.group(1), |
217 | result.group(2)) | |
b3020464 TR |
218 | compile_commands.append(entry) |
219 | except ValueError as err: | |
220 | logging.info('Could not add line from %s: %s', | |
fc2cb22e | 221 | cmdfile, err) |
b3020464 TR |
222 | |
223 | with open(output, 'wt') as f: | |
224 | json.dump(compile_commands, f, indent=2, sort_keys=True) | |
225 | ||
b3020464 TR |
226 | |
227 | if __name__ == '__main__': | |
228 | main() |