#!/usr/bin/env python3 # Copyright 2021 Alibaba Group Holding Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import datetime import os.path import re import sys from typing import Callable, Union from lib import * BOILERPLATE_TEMPLATE_ROOT = os.path.join(BASIC_BUILD_ENV.root_dir, 'hack/boilerplates') SOURCE_ROOT = BASIC_BUILD_ENV.root_dir language_associate_file_extensions = { 'python': 'py', 'go': 'go', 'bash': 'sh', } def filter_generated_go_source_files(files: [str]) -> [str]: return [f for f in files if not f.startswith('zz_generated.') and not f.endswith('.pb.go') and not f.endswith('.generated.go')] language_associate_file_filter = { 'go': filter_generated_go_source_files, } def preprocess_common_file_content(s: str) -> str: # Strip white characters from header return s.lstrip() SHEBANG_REGEX = re.compile('^#![^\\r\\n]+[\\r\\n]+(.*)', re.MULTILINE | re.DOTALL) def preprocess_executable_script_file_content(s: str) -> str: # Remove shebang m = SHEBANG_REGEX.match(s) if m: return preprocess_common_file_content(m.group(1)) else: return s language_associate_preprocessors = { 'python': preprocess_executable_script_file_content, 'bash': preprocess_executable_script_file_content, 'go': preprocess_common_file_content, } LICENSE_YEAR_REGEX_STR = '((?:\\d+|\\d+\\s*-\\s*\\d+)(?:,\\d+|\\d+\\s*-\\s*\\d+)*)' def load_boilerplate_regex(language) -> re.Pattern: extension = language_associate_file_extensions[language] boilerplate_template_file = os.path.join(BOILERPLATE_TEMPLATE_ROOT, 'boilerplate.' + extension + '.txt') if not os.path.exists(boilerplate_template_file): raise RuntimeError('boilerplate template file not found for ' + language) with open(boilerplate_template_file) as f: template_str = f.read() # Replace YEAR with date regex. lines = template_str.splitlines() escaped_lines = [re.escape(w.rstrip()) for w in lines] escape_str = '\\s*\\n'.join(escaped_lines) regex_str = '^' + escape_str.replace('YEAR', LICENSE_YEAR_REGEX_STR) + '.*$' return re.compile(regex_str, re.MULTILINE | re.DOTALL) def read_file_content_and_preprocess(path: str, process: Callable[[str], str]) -> str: with open(path) as f: return process(f.read()) def current_year() -> int: return datetime.datetime.now().year CURRENT_YEAR = current_year() def extract_license_year_range(year_str: str) -> list[Union[tuple[int, int], int]]: year_ranges = [] for s in year_str.split(sep=','): if '-' in s: r = s.split('-') year_ranges.append((int(r[0].strip()), int(r[1].strip()))) else: year_ranges.append((int(s.strip()),)) sorted(year_ranges, key=lambda x: x[0]) return year_ranges def is_year_ranges_overlaps(year_ranges: list[Union[tuple[int, int], int]]) -> (bool, int or None): last = None # None or Union[tuple[int, int], int] for r in year_ranges: if last: if r[0] <= last[-1]: return True, r[0] last = r return False, None def walk_through_project_and_check_boilerplates(language: str, *, exclude_dirs: [str] or None = None, include_dirs: [str] or None = None, start_year: int or None = None): if exclude_dirs: exclude_dirs = [d.rstrip('/') for d in exclude_dirs] if include_dirs: include_dirs = [d.rstrip('/') for d in include_dirs] def is_path_in(p: str, paths: [str]) -> bool: if not paths: return False for d in paths: if p.startswith(d + '/'): return True return False extension = language_associate_file_extensions[language] preprocessor = language_associate_preprocessors[language] boilerplate_regex = load_boilerplate_regex(language) file_filter = language_associate_file_filter.get(language) source_files_not_match_msg = [] for root, dirs, files in os.walk(SOURCE_ROOT): relative_path = os.path.relpath(root, SOURCE_ROOT) if is_path_in(relative_path, exclude_dirs) or \ (include_dirs and not is_path_in(relative_path, include_dirs)): continue source_files = [f for f in files if str(f).endswith('.' + extension)] if file_filter: source_files = file_filter(source_files) for source_file in source_files: content = read_file_content_and_preprocess(os.path.join(root, source_file), preprocessor) match = boilerplate_regex.match(content) if not match: source_files_not_match_msg.append(os.path.join(relative_path, source_file) + ', misses license header') else: year_range = extract_license_year_range(match.group(1)) # should not overlaps overlap, overlap_year = is_year_ranges_overlaps(year_range) if overlap: source_files_not_match_msg.append( os.path.join(relative_path, source_file) + ', invalid year range, year overlaps ' + str(overlap_year)) continue s_year, e_year = year_range[0][0], year_range[-1][0] if start_year and s_year < start_year: source_files_not_match_msg.append( os.path.join(relative_path, source_file) + ', invalid year range, start year %d smaller than required %d' % (s_year, start_year)) continue if e_year > CURRENT_YEAR: source_files_not_match_msg.append( os.path.join(relative_path, source_file) + ', invalid year range, end year %d exceeds current year %d' % (e_year, CURRENT_YEAR)) sorted(source_files_not_match_msg) # Report source files not match if len(source_files_not_match_msg) > 0: print('\n'.join(source_files_not_match_msg)) def main(): parser = argparse.ArgumentParser() parser.add_argument('--exclude-dirs', dest='exclude_dirs', default='', type=str, help='Exclude directories, separated by comma.') parser.add_argument('--include-dirs', dest='include_dirs', default='', type=str, help='Include directories, separated by comma.') parser.add_argument('--language', dest='language', default='', type=str, help='Language of source files.') parser.add_argument('--start-year', dest='start_year', default=CURRENT_YEAR, type=int, help='Start year. If license with begin year lower than this, it will complain.') args = parser.parse_args() include_dirs = [s.strip() for s in args.include_dirs.split(',')] \ if args.include_dirs and len(args.include_dirs) > 0 else None exclude_dirs = [s.strip() for s in args.exclude_dirs.split(',')] \ if args.exclude_dirs and len(args.exclude_dirs) > 0 else None walk_through_project_and_check_boilerplates( args.language, exclude_dirs=exclude_dirs, include_dirs=include_dirs, start_year=args.start_year ) if __name__ == '__main__': sys.exit(main())