scripts/spdxcheck: Put excluded files and directories into a separate file
[sfrench/cifs-2.6.git] / scripts / spdxcheck.py
index b9e0f1725a0a2330b3d8c241f675e4be1d3b1885..18cb9f5b3d3d6900a82a65d42b74f81ba0e08dd8 100755 (executable)
@@ -6,6 +6,7 @@ from argparse import ArgumentParser
 from ply import lex, yacc
 import locale
 import traceback
+import fnmatch
 import sys
 import git
 import re
@@ -106,6 +107,7 @@ class id_parser(object):
         self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
         self.lines_checked = 0
         self.checked = 0
+        self.excluded = 0
         self.spdx_valid = 0
         self.spdx_errors = 0
         self.spdx_dirs = {}
@@ -254,17 +256,47 @@ class id_parser(object):
         di.update(fname, base, fail)
         self.spdx_dirs[base] = di
 
+class pattern(object):
+    def __init__(self, line):
+        self.pattern = line
+        self.match = self.match_file
+        if line == '.*':
+            self.match = self.match_dot
+        elif line.endswith('/'):
+            self.pattern = line[:-1]
+            self.match = self.match_dir
+        elif line.startswith('/'):
+            self.pattern = line[1:]
+            self.match = self.match_fn
+
+    def match_dot(self, fpath):
+        return os.path.basename(fpath).startswith('.')
+
+    def match_file(self, fpath):
+        return os.path.basename(fpath) == self.pattern
+
+    def match_fn(self, fpath):
+        return fnmatch.fnmatchcase(fpath, self.pattern)
+
+    def match_dir(self, fpath):
+        if self.match_fn(os.path.dirname(fpath)):
+            return True
+        return fpath.startswith(self.pattern)
+
+def exclude_file(fpath):
+    for rule in exclude_rules:
+        if rule.match(fpath):
+            return True
+    return False
+
 def scan_git_tree(tree, basedir, dirdepth):
     parser.set_dirinfo(basedir, dirdepth)
     for el in tree.traverse():
-        # Exclude stuff which would make pointless noise
-        # FIXME: Put this somewhere more sensible
-        if el.path.startswith("LICENSES"):
-            continue
-        if el.path.find("license-rules.rst") >= 0:
-            continue
         if not os.path.isfile(el.path):
             continue
+        if exclude_file(el.path):
+            parser.excluded += 1
+            continue
         with open(el.path, 'rb') as fd:
             parser.parse_lines(fd, args.maxlines, el.path)
 
@@ -273,6 +305,20 @@ def scan_git_subtree(tree, path, dirdepth):
         tree = tree[p]
     scan_git_tree(tree, path.strip('/'), dirdepth)
 
+def read_exclude_file(fname):
+    rules = []
+    if not fname:
+        return rules
+    with open(fname) as fd:
+        for line in fd:
+            line = line.strip()
+            if line.startswith('#'):
+                continue
+            if not len(line):
+                continue
+            rules.append(pattern(line))
+    return rules
+
 if __name__ == '__main__':
 
     ap = ArgumentParser(description='SPDX expression checker')
@@ -281,6 +327,8 @@ if __name__ == '__main__':
                     help='Show [sub]directory statistics.')
     ap.add_argument('-D', '--depth', type=int, default=-1,
                     help='Directory depth for -d statistics. Default: unlimited')
+    ap.add_argument('-e', '--exclude',
+                    help='File containing file patterns to exclude. Default: scripts/spdxexclude')
     ap.add_argument('-f', '--files', action='store_true',
                     help='Show files without SPDX.')
     ap.add_argument('-m', '--maxlines', type=int, default=15,
@@ -316,6 +364,15 @@ if __name__ == '__main__':
         sys.stderr.write('%s\n' %traceback.format_exc())
         sys.exit(1)
 
+    try:
+        fname = args.exclude
+        if not fname:
+            fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
+        exclude_rules = read_exclude_file(fname)
+    except Exception as ex:
+        sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
+        sys.exit(1)
+
     try:
         if len(args.path) and args.path[0] == '-':
             stdin = os.fdopen(sys.stdin.fileno(), 'rb')
@@ -349,6 +406,7 @@ if __name__ == '__main__':
                 sys.stderr.write('License IDs        %12d\n' %len(spdx.licenses))
                 sys.stderr.write('Exception IDs      %12d\n' %len(spdx.exceptions))
                 sys.stderr.write('\n')
+                sys.stderr.write('Files excluded:    %12d\n' %parser.excluded)
                 sys.stderr.write('Files checked:     %12d\n' %parser.checked)
                 sys.stderr.write('Lines checked:     %12d\n' %parser.lines_checked)
                 if parser.checked: