scripts/license-header.py

#!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse from collections import OrderedDict import fnmatch import os import regex import sys class attrdict(dict): __getattr__ = dict.__getitem__ __setattr__ = dict.__setitem__ def parse_args(): parser = argparse.ArgumentParser(description="Update license headers") parser.add_argument("--header", default="license.header", help="header file") parser.add_argument( "--extra", default=80, help="extra characters past beginning of file to look for header", ) parser.add_argument( "--editdist", default=12, type=int, help="max edit distance between headers" ) parser.add_argument( "--remove", default=False, action="store_true", help="remove the header" ) parser.add_argument( "--cslash", default=False, action="store_true", help='use C slash "//" style comments', ) parser.add_argument( "-v", default=False, action="store_true", dest="verbose", help="verbose output" ) group = parser.add_mutually_exclusive_group() group.add_argument( "-k", default=False, action="store_true", dest="check", help="check headers" ) group.add_argument( "-i", default=False, action="store_true", dest="inplace", help="edit file inplace", ) parser.add_argument("files", metavar="FILES", nargs="+", help="files to process") return parser.parse_args() def file_read(filename): with open(filename) as file: return file.read() def file_lines(filename): return file_read(filename).rstrip().split("\n") def wrapper(prefix, leader, suffix, header): return prefix + "\n".join([leader + line for line in header]) + suffix def wrapper_chpp(header, args): if args.cslash: return wrapper("", "//", "\n", header) else: return wrapper("/*\n", " *", "\n */\n", header) def wrapper_hash(header, args): return wrapper("", "#", "\n", header) file_types = OrderedDict( { "CMakeLists.txt": attrdict({"wrapper": wrapper_hash, "hashbang": False}), "Makefile": attrdict({"wrapper": wrapper_hash, "hashbang": False}), "*.cpp": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.dockfile": attrdict({"wrapper": wrapper_hash, "hashbang": False}), "*.h": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.inc": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.java": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.prolog": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.py": attrdict({"wrapper": wrapper_hash, "hashbang": True}), "*.sh": attrdict({"wrapper": wrapper_hash, "hashbang": True}), "*.thrift": attrdict({"wrapper": wrapper_chpp, "hashbang": False}), "*.txt": attrdict({"wrapper": wrapper_hash, "hashbang": True}), "*.yml": attrdict({"wrapper": wrapper_hash, "hashbang": False}), } ) file_pattern = regex.compile( "|".join(["^" + fnmatch.translate(type) + "$" for type in file_types.keys()]) ) def get_filename(filename): return os.path.basename(filename) def get_fileextn(filename): split = os.path.splitext(filename) if len(split) <= 1: return "" return split[-1] def get_wrapper(filename): if filename in file_types: return file_types[filename] return file_types["*" + get_fileextn(filename)] def message(file, string): if file: print(string, file=file) def main(): fail = False log_to = None args = parse_args() if args.verbose: log_to = sys.stderr if args.check: log_to = None if args.verbose: log_to = sys.stdout header_text = file_lines(args.header) if len(args.files) == 1 and args.files[0] == "-": files = [file.strip() for file in sys.stdin.readlines()] else: files = args.files for filepath in files: filename = get_filename(filepath) matched = file_pattern.match(filename) if not matched: message(log_to, "Skip : " + filepath) continue content = file_read(filepath) wrap = get_wrapper(filename) header_comment = wrap.wrapper(header_text, args) start = 0 end = 0 # Look for an exact substr match # found = content.find(header_comment, 0, len(header_comment) + args.extra) if found >= 0: if not args.remove: message(log_to, "OK : " + filepath) continue start = found end = found + len(header_comment) else: # Look for a fuzzy match in the first 60 chars # found = regex.search( "(?be)(%s){e<=%d}" % (regex.escape(header_comment[0:60]), 6), content[0 : 80 + args.extra], ) if found: fuzzy = regex.compile( "(?be)(%s){e<=%d}" % (regex.escape(header_comment), args.editdist) ) # If the first 80 chars match - try harder for the rest of the header # found = fuzzy.search( content[0 : len(header_comment) + args.extra], found.start() ) if found: start = found.start() end = found.end() if args.remove: if start == 0 and end == 0: if not args.inplace: print(content, end="") message(log_to, "OK : " + filepath) continue # If removing the header text, zero it out there. header_comment = "" message(log_to, "Fix : " + filepath) if args.check: fail = True continue # Remove any partially matching header # content = content[0:start] + content[end:] if wrap.hashbang: search = regex.search("^#!.*\n", content) if search: content = ( content[search.start() : search.end()] + header_comment + content[search.end() :] ) else: content = header_comment + content else: content = header_comment + content if args.inplace: with open(filepath, "w") as file: print(content, file=file, end="") else: print(content, end="") if fail: return 1 return 0 if __name__ == "__main__": sys.exit(main())

scripts/license-header.py (175 lines of code) (raw):