218 lines
7.8 KiB
Python
Executable File
218 lines
7.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright 2021 Alibaba Group Holding Limited.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import argparse
|
|
import datetime
|
|
import os.path
|
|
import re
|
|
import sys
|
|
from typing import Callable, Union
|
|
|
|
from lib import *
|
|
|
|
BOILERPLATE_TEMPLATE_ROOT = os.path.join(BASIC_BUILD_ENV.root_dir, 'hack/boilerplates')
|
|
SOURCE_ROOT = BASIC_BUILD_ENV.root_dir
|
|
|
|
language_associate_file_extensions = {
|
|
'python': 'py',
|
|
'go': 'go',
|
|
'bash': 'sh',
|
|
}
|
|
|
|
|
|
def filter_generated_go_source_files(files: [str]) -> [str]:
|
|
return [f for f in files if
|
|
not f.startswith('zz_generated.') and not f.endswith('.pb.go') and not f.endswith('.generated.go')]
|
|
|
|
|
|
language_associate_file_filter = {
|
|
'go': filter_generated_go_source_files,
|
|
}
|
|
|
|
|
|
def preprocess_common_file_content(s: str) -> str:
|
|
# Strip white characters from header
|
|
return s.lstrip()
|
|
|
|
|
|
SHEBANG_REGEX = re.compile('^#![^\\r\\n]+[\\r\\n]+(.*)', re.MULTILINE | re.DOTALL)
|
|
|
|
|
|
def preprocess_executable_script_file_content(s: str) -> str:
|
|
# Remove shebang
|
|
m = SHEBANG_REGEX.match(s)
|
|
if m:
|
|
return preprocess_common_file_content(m.group(1))
|
|
else:
|
|
return s
|
|
|
|
|
|
language_associate_preprocessors = {
|
|
'python': preprocess_executable_script_file_content,
|
|
'bash': preprocess_executable_script_file_content,
|
|
'go': preprocess_common_file_content,
|
|
}
|
|
|
|
LICENSE_YEAR_REGEX_STR = '((?:\\d+|\\d+\\s*-\\s*\\d+)(?:,\\d+|\\d+\\s*-\\s*\\d+)*)'
|
|
|
|
def load_boilerplate_regex(language) -> re.Pattern:
|
|
extension = language_associate_file_extensions[language]
|
|
boilerplate_template_file = os.path.join(BOILERPLATE_TEMPLATE_ROOT, 'boilerplate.' + extension + '.txt')
|
|
if not os.path.exists(boilerplate_template_file):
|
|
raise RuntimeError('boilerplate template file not found for ' + language)
|
|
|
|
with open(boilerplate_template_file) as f:
|
|
template_str = f.read()
|
|
|
|
# Replace YEAR with date regex.
|
|
lines = template_str.splitlines()
|
|
escaped_lines = [re.escape(w.rstrip()) for w in lines]
|
|
escape_str = '\\s*\\n'.join(escaped_lines)
|
|
regex_str = '^' + escape_str.replace('YEAR', LICENSE_YEAR_REGEX_STR) + '.*$'
|
|
return re.compile(regex_str, re.MULTILINE | re.DOTALL)
|
|
|
|
|
|
def read_file_content_and_preprocess(path: str, process: Callable[[str], str]) -> str:
|
|
with open(path) as f:
|
|
return process(f.read())
|
|
|
|
|
|
def current_year() -> int:
|
|
return datetime.datetime.now().year
|
|
|
|
|
|
CURRENT_YEAR = current_year()
|
|
|
|
|
|
def extract_license_year_range(year_str: str) -> list[Union[tuple[int, int], int]]:
|
|
year_ranges = []
|
|
for s in year_str.split(sep=','):
|
|
if '-' in s:
|
|
r = s.split('-')
|
|
year_ranges.append((int(r[0].strip()), int(r[1].strip())))
|
|
else:
|
|
year_ranges.append((int(s.strip()),))
|
|
|
|
sorted(year_ranges, key=lambda x: x[0])
|
|
return year_ranges
|
|
|
|
|
|
def is_year_ranges_overlaps(year_ranges: list[Union[tuple[int, int], int]]) -> (bool, int or None):
|
|
last = None # None or Union[tuple[int, int], int]
|
|
for r in year_ranges:
|
|
if last:
|
|
if r[0] <= last[-1]:
|
|
return True, r[0]
|
|
last = r
|
|
return False, None
|
|
|
|
|
|
def walk_through_project_and_check_boilerplates(language: str, *, exclude_dirs: [str] or None = None,
|
|
include_dirs: [str] or None = None,
|
|
start_year: int or None = None):
|
|
if exclude_dirs:
|
|
exclude_dirs = [d.rstrip('/') for d in exclude_dirs]
|
|
if include_dirs:
|
|
include_dirs = [d.rstrip('/') for d in include_dirs]
|
|
|
|
def is_path_in(p: str, paths: [str]) -> bool:
|
|
if not paths:
|
|
return False
|
|
for d in paths:
|
|
|
|
if p.startswith(d + '/'):
|
|
return True
|
|
return False
|
|
|
|
extension = language_associate_file_extensions[language]
|
|
preprocessor = language_associate_preprocessors[language]
|
|
boilerplate_regex = load_boilerplate_regex(language)
|
|
file_filter = language_associate_file_filter.get(language)
|
|
|
|
source_files_not_match_msg = []
|
|
for root, dirs, files in os.walk(SOURCE_ROOT):
|
|
relative_path = os.path.relpath(root, SOURCE_ROOT)
|
|
if is_path_in(relative_path, exclude_dirs) or \
|
|
(include_dirs and not is_path_in(relative_path, include_dirs)):
|
|
continue
|
|
|
|
source_files = [f for f in files if str(f).endswith('.' + extension)]
|
|
if file_filter:
|
|
source_files = file_filter(source_files)
|
|
|
|
for source_file in source_files:
|
|
content = read_file_content_and_preprocess(os.path.join(root, source_file), preprocessor)
|
|
match = boilerplate_regex.match(content)
|
|
if not match:
|
|
source_files_not_match_msg.append(os.path.join(relative_path, source_file) + ', misses license header')
|
|
else:
|
|
year_range = extract_license_year_range(match.group(1))
|
|
# should not overlaps
|
|
overlap, overlap_year = is_year_ranges_overlaps(year_range)
|
|
if overlap:
|
|
source_files_not_match_msg.append(
|
|
os.path.join(relative_path, source_file) +
|
|
', invalid year range, year overlaps ' + str(overlap_year))
|
|
continue
|
|
|
|
s_year, e_year = year_range[0][0], year_range[-1][0]
|
|
if start_year and s_year < start_year:
|
|
source_files_not_match_msg.append(
|
|
os.path.join(relative_path, source_file) +
|
|
', invalid year range, start year %d smaller than required %d' %
|
|
(s_year, start_year))
|
|
continue
|
|
if e_year > CURRENT_YEAR:
|
|
source_files_not_match_msg.append(
|
|
os.path.join(relative_path, source_file) +
|
|
', invalid year range, end year %d exceeds current year %d' %
|
|
(e_year, CURRENT_YEAR))
|
|
|
|
sorted(source_files_not_match_msg)
|
|
|
|
# Report source files not match
|
|
if len(source_files_not_match_msg) > 0:
|
|
print('\n'.join(source_files_not_match_msg))
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--exclude-dirs', dest='exclude_dirs', default='', type=str,
|
|
help='Exclude directories, separated by comma.')
|
|
parser.add_argument('--include-dirs', dest='include_dirs', default='', type=str,
|
|
help='Include directories, separated by comma.')
|
|
parser.add_argument('--language', dest='language', default='', type=str,
|
|
help='Language of source files.')
|
|
parser.add_argument('--start-year', dest='start_year', default=CURRENT_YEAR, type=int,
|
|
help='Start year. If license with begin year lower than this, it will complain.')
|
|
args = parser.parse_args()
|
|
|
|
include_dirs = [s.strip() for s in args.include_dirs.split(',')] \
|
|
if args.include_dirs and len(args.include_dirs) > 0 else None
|
|
exclude_dirs = [s.strip() for s in args.exclude_dirs.split(',')] \
|
|
if args.exclude_dirs and len(args.exclude_dirs) > 0 else None
|
|
|
|
walk_through_project_and_check_boilerplates(
|
|
args.language,
|
|
exclude_dirs=exclude_dirs,
|
|
include_dirs=include_dirs,
|
|
start_year=args.start_year
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|