dm-crypt: use __bio_add_page to add single page to clone bio
[linux-block.git] / scripts / spdxcheck.py
CommitLineData
d0259c42 1#!/usr/bin/env python3
5385a295
TG
2# SPDX-License-Identifier: GPL-2.0
3# Copyright Thomas Gleixner <tglx@linutronix.de>
4
5from argparse import ArgumentParser
6from ply import lex, yacc
bed95c43 7import locale
5385a295 8import traceback
0509b270 9import fnmatch
5385a295
TG
10import sys
11import git
12import re
13import os
14
15class ParserException(Exception):
16 def __init__(self, tok, txt):
17 self.tok = tok
18 self.txt = txt
19
20class SPDXException(Exception):
21 def __init__(self, el, txt):
22 self.el = el
23 self.txt = txt
24
25class SPDXdata(object):
26 def __init__(self):
27 self.license_files = 0
28 self.exception_files = 0
29 self.licenses = [ ]
30 self.exceptions = { }
31
a377ce75
TG
32class dirinfo(object):
33 def __init__(self):
34 self.missing = 0
35 self.total = 0
67924b71 36 self.files = []
a377ce75 37
67924b71 38 def update(self, fname, basedir, miss):
a377ce75
TG
39 self.total += 1
40 self.missing += miss
67924b71
TG
41 if miss:
42 fname = './' + fname
43 bdir = os.path.dirname(fname)
44 if bdir == basedir.rstrip('/'):
45 self.files.append(fname)
a377ce75 46
5385a295
TG
47# Read the spdx data from the LICENSES directory
48def read_spdxdata(repo):
49
50 # The subdirectories of LICENSES in the kernel source
8d7a7abf
VF
51 # Note: exceptions needs to be parsed as last directory.
52 license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ]
fde5e903 53 lictree = repo.head.commit.tree['LICENSES']
5385a295
TG
54
55 spdx = SPDXdata()
56
57 for d in license_dirs:
58 for el in lictree[d].traverse():
59 if not os.path.isfile(el.path):
60 continue
61
62 exception = None
40751c6c 63 for l in open(el.path, encoding="utf-8").readlines():
5385a295
TG
64 if l.startswith('Valid-License-Identifier:'):
65 lid = l.split(':')[1].strip().upper()
66 if lid in spdx.licenses:
67 raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
68 else:
69 spdx.licenses.append(lid)
70
71 elif l.startswith('SPDX-Exception-Identifier:'):
72 exception = l.split(':')[1].strip().upper()
73 spdx.exceptions[exception] = []
74
75 elif l.startswith('SPDX-Licenses:'):
76 for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
77 if not lic in spdx.licenses:
8d7a7abf 78 raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic))
5385a295
TG
79 spdx.exceptions[exception].append(lic)
80
81 elif l.startswith("License-Text:"):
82 if exception:
83 if not len(spdx.exceptions[exception]):
8d7a7abf 84 raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception)
5385a295
TG
85 spdx.exception_files += 1
86 else:
87 spdx.license_files += 1
88 break
89 return spdx
90
91class id_parser(object):
92
93 reserved = [ 'AND', 'OR', 'WITH' ]
94 tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
95
96 precedence = ( ('nonassoc', 'AND', 'OR'), )
97
98 t_ignore = ' \t'
99
100 def __init__(self, spdx):
101 self.spdx = spdx
102 self.lasttok = None
103 self.lastid = None
104 self.lexer = lex.lex(module = self, reflags = re.UNICODE)
105 # Initialize the parser. No debug file and no parser rules stored on disk
106 # The rules are small enough to be generated on the fly
107 self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
108 self.lines_checked = 0
109 self.checked = 0
0509b270 110 self.excluded = 0
5385a295
TG
111 self.spdx_valid = 0
112 self.spdx_errors = 0
a377ce75 113 self.spdx_dirs = {}
0e7f0306
TG
114 self.dirdepth = -1
115 self.basedir = '.'
5385a295
TG
116 self.curline = 0
117 self.deepest = 0
118
0e7f0306
TG
119 def set_dirinfo(self, basedir, dirdepth):
120 if dirdepth >= 0:
121 self.basedir = basedir
122 bdir = basedir.lstrip('./').rstrip('/')
123 if bdir != '':
124 parts = bdir.split('/')
125 else:
126 parts = []
127 self.dirdepth = dirdepth + len(parts)
128
5385a295
TG
129 # Validate License and Exception IDs
130 def validate(self, tok):
131 id = tok.value.upper()
132 if tok.type == 'ID':
133 if not id in self.spdx.licenses:
134 raise ParserException(tok, 'Invalid License ID')
135 self.lastid = id
136 elif tok.type == 'EXC':
bed95c43 137 if id not in self.spdx.exceptions:
5385a295
TG
138 raise ParserException(tok, 'Invalid Exception ID')
139 if self.lastid not in self.spdx.exceptions[id]:
140 raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
141 self.lastid = None
142 elif tok.type != 'WITH':
143 self.lastid = None
144
145 # Lexer functions
146 def t_RPAR(self, tok):
147 r'\)'
148 self.lasttok = tok.type
149 return tok
150
151 def t_LPAR(self, tok):
152 r'\('
153 self.lasttok = tok.type
154 return tok
155
156 def t_ID(self, tok):
157 r'[A-Za-z.0-9\-+]+'
158
159 if self.lasttok == 'EXC':
160 print(tok)
161 raise ParserException(tok, 'Missing parentheses')
162
163 tok.value = tok.value.strip()
164 val = tok.value.upper()
165
166 if val in self.reserved:
167 tok.type = val
168 elif self.lasttok == 'WITH':
169 tok.type = 'EXC'
170
171 self.lasttok = tok.type
172 self.validate(tok)
173 return tok
174
175 def t_error(self, tok):
176 raise ParserException(tok, 'Invalid token')
177
178 def p_expr(self, p):
179 '''expr : ID
180 | ID WITH EXC
181 | expr AND expr
182 | expr OR expr
183 | LPAR expr RPAR'''
184 pass
185
186 def p_error(self, p):
187 if not p:
188 raise ParserException(None, 'Unfinished license expression')
189 else:
190 raise ParserException(p, 'Syntax error')
191
192 def parse(self, expr):
193 self.lasttok = None
194 self.lastid = None
195 self.parser.parse(expr, lexer = self.lexer)
196
197 def parse_lines(self, fd, maxlines, fname):
198 self.checked += 1
199 self.curline = 0
a377ce75 200 fail = 1
5385a295
TG
201 try:
202 for line in fd:
3a6ab5c7 203 line = line.decode(locale.getpreferredencoding(False), errors='ignore')
5385a295
TG
204 self.curline += 1
205 if self.curline > maxlines:
206 break
207 self.lines_checked += 1
208 if line.find("SPDX-License-Identifier:") < 0:
209 continue
959b4968
TG
210 expr = line.split(':')[1].strip()
211 # Remove trailing comment closure
a5f4cb42 212 if line.strip().endswith('*/'):
959b4968 213 expr = expr.rstrip('*/').strip()
c5c55385
LB
214 # Remove trailing xml comment closure
215 if line.strip().endswith('-->'):
216 expr = expr.rstrip('-->').strip()
959b4968
TG
217 # Special case for SH magic boot code files
218 if line.startswith('LIST \"'):
219 expr = expr.rstrip('\"').strip()
5385a295
TG
220 self.parse(expr)
221 self.spdx_valid += 1
222 #
223 # Should we check for more SPDX ids in the same file and
224 # complain if there are any?
225 #
a377ce75 226 fail = 0
5385a295
TG
227 break
228
229 except ParserException as pe:
230 if pe.tok:
231 col = line.find(expr) + pe.tok.lexpos
232 tok = pe.tok.value
233 sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
234 else:
28c9f3f9 235 sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt))
5385a295
TG
236 self.spdx_errors += 1
237
0e7f0306
TG
238 if fname == '-':
239 return
240
a377ce75 241 base = os.path.dirname(fname)
0e7f0306
TG
242 if self.dirdepth > 0:
243 parts = base.split('/')
244 i = 0
245 base = '.'
246 while i < self.dirdepth and i < len(parts) and len(parts[i]):
247 base += '/' + parts[i]
248 i += 1
249 elif self.dirdepth == 0:
250 base = self.basedir
251 else:
252 base = './' + base.rstrip('/')
253 base += '/'
254
a377ce75 255 di = self.spdx_dirs.get(base, dirinfo())
67924b71 256 di.update(fname, base, fail)
a377ce75
TG
257 self.spdx_dirs[base] = di
258
0509b270
TG
259class pattern(object):
260 def __init__(self, line):
261 self.pattern = line
262 self.match = self.match_file
263 if line == '.*':
264 self.match = self.match_dot
265 elif line.endswith('/'):
266 self.pattern = line[:-1]
267 self.match = self.match_dir
268 elif line.startswith('/'):
269 self.pattern = line[1:]
270 self.match = self.match_fn
271
272 def match_dot(self, fpath):
273 return os.path.basename(fpath).startswith('.')
274
275 def match_file(self, fpath):
276 return os.path.basename(fpath) == self.pattern
277
278 def match_fn(self, fpath):
279 return fnmatch.fnmatchcase(fpath, self.pattern)
280
281 def match_dir(self, fpath):
282 if self.match_fn(os.path.dirname(fpath)):
283 return True
284 return fpath.startswith(self.pattern)
285
286def exclude_file(fpath):
287 for rule in exclude_rules:
288 if rule.match(fpath):
289 return True
290 return False
291
0e7f0306
TG
292def scan_git_tree(tree, basedir, dirdepth):
293 parser.set_dirinfo(basedir, dirdepth)
5385a295 294 for el in tree.traverse():
5385a295
TG
295 if not os.path.isfile(el.path):
296 continue
0509b270
TG
297 if exclude_file(el.path):
298 parser.excluded += 1
299 continue
bed95c43
JC
300 with open(el.path, 'rb') as fd:
301 parser.parse_lines(fd, args.maxlines, el.path)
5385a295 302
0e7f0306 303def scan_git_subtree(tree, path, dirdepth):
5385a295
TG
304 for p in path.strip('/').split('/'):
305 tree = tree[p]
0e7f0306 306 scan_git_tree(tree, path.strip('/'), dirdepth)
5385a295 307
0509b270
TG
308def read_exclude_file(fname):
309 rules = []
310 if not fname:
311 return rules
312 with open(fname) as fd:
313 for line in fd:
314 line = line.strip()
315 if line.startswith('#'):
316 continue
317 if not len(line):
318 continue
319 rules.append(pattern(line))
320 return rules
321
5385a295
TG
322if __name__ == '__main__':
323
324 ap = ArgumentParser(description='SPDX expression checker')
325 ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
0e7f0306
TG
326 ap.add_argument('-d', '--dirs', action='store_true',
327 help='Show [sub]directory statistics.')
328 ap.add_argument('-D', '--depth', type=int, default=-1,
329 help='Directory depth for -d statistics. Default: unlimited')
0509b270
TG
330 ap.add_argument('-e', '--exclude',
331 help='File containing file patterns to exclude. Default: scripts/spdxexclude')
67924b71
TG
332 ap.add_argument('-f', '--files', action='store_true',
333 help='Show files without SPDX.')
5385a295
TG
334 ap.add_argument('-m', '--maxlines', type=int, default=15,
335 help='Maximum number of lines to scan in a file. Default 15')
336 ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
337 args = ap.parse_args()
338
339 # Sanity check path arguments
340 if '-' in args.path and len(args.path) > 1:
341 sys.stderr.write('stdin input "-" must be the only path argument\n')
342 sys.exit(1)
343
344 try:
345 # Use git to get the valid license expressions
346 repo = git.Repo(os.getcwd())
347 assert not repo.bare
348
349 # Initialize SPDX data
350 spdx = read_spdxdata(repo)
351
40635128 352 # Initialize the parser
5385a295
TG
353 parser = id_parser(spdx)
354
355 except SPDXException as se:
356 if se.el:
357 sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
358 else:
359 sys.stderr.write('%s\n' %se.txt)
360 sys.exit(1)
361
362 except Exception as ex:
363 sys.stderr.write('FAIL: %s\n' %ex)
364 sys.stderr.write('%s\n' %traceback.format_exc())
365 sys.exit(1)
366
0509b270
TG
367 try:
368 fname = args.exclude
369 if not fname:
370 fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
371 exclude_rules = read_exclude_file(fname)
372 except Exception as ex:
373 sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
374 sys.exit(1)
375
5385a295
TG
376 try:
377 if len(args.path) and args.path[0] == '-':
3a6ab5c7
TR
378 stdin = os.fdopen(sys.stdin.fileno(), 'rb')
379 parser.parse_lines(stdin, args.maxlines, '-')
5385a295
TG
380 else:
381 if args.path:
382 for p in args.path:
383 if os.path.isfile(p):
3a6ab5c7 384 parser.parse_lines(open(p, 'rb'), args.maxlines, p)
5385a295 385 elif os.path.isdir(p):
0e7f0306
TG
386 scan_git_subtree(repo.head.reference.commit.tree, p,
387 args.depth)
5385a295
TG
388 else:
389 sys.stderr.write('path %s does not exist\n' %p)
390 sys.exit(1)
391 else:
392 # Full git tree scan
0e7f0306
TG
393 scan_git_tree(repo.head.commit.tree, '.', args.depth)
394
395 ndirs = len(parser.spdx_dirs)
396 dirsok = 0
397 if ndirs:
398 for di in parser.spdx_dirs.values():
399 if not di.missing:
400 dirsok += 1
5385a295
TG
401
402 if args.verbose:
403 sys.stderr.write('\n')
404 sys.stderr.write('License files: %12d\n' %spdx.license_files)
405 sys.stderr.write('Exception files: %12d\n' %spdx.exception_files)
406 sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
407 sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
408 sys.stderr.write('\n')
0509b270 409 sys.stderr.write('Files excluded: %12d\n' %parser.excluded)
5385a295
TG
410 sys.stderr.write('Files checked: %12d\n' %parser.checked)
411 sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
149d623f
TG
412 if parser.checked:
413 pc = int(100 * parser.spdx_valid / parser.checked)
414 sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc))
5385a295 415 sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
a377ce75
TG
416 if ndirs:
417 sys.stderr.write('\n')
418 sys.stderr.write('Directories accounted: %8d\n' %ndirs)
a377ce75
TG
419 pc = int(100 * dirsok / ndirs)
420 sys.stderr.write('Directories complete: %8d %3d%%\n' %(dirsok, pc))
5385a295 421
0e7f0306
TG
422 if ndirs and ndirs != dirsok and args.dirs:
423 if args.verbose:
424 sys.stderr.write('\n')
425 sys.stderr.write('Incomplete directories: SPDX in Files\n')
426 for f in sorted(parser.spdx_dirs.keys()):
427 di = parser.spdx_dirs[f]
428 if di.missing:
429 valid = di.total - di.missing
430 pc = int(100 * valid / di.total)
431 sys.stderr.write(' %-80s: %5d of %5d %3d%%\n' %(f, valid, di.total, pc))
432
67924b71
TG
433 if ndirs and ndirs != dirsok and args.files:
434 if args.verbose or args.dirs:
435 sys.stderr.write('\n')
436 sys.stderr.write('Files without SPDX:\n')
437 for f in sorted(parser.spdx_dirs.keys()):
438 di = parser.spdx_dirs[f]
439 for f in sorted(di.files):
440 sys.stderr.write(' %s\n' %f)
441
5385a295
TG
442 sys.exit(0)
443
444 except Exception as ex:
445 sys.stderr.write('FAIL: %s\n' %ex)
446 sys.stderr.write('%s\n' %traceback.format_exc())
447 sys.exit(1)