polardbxoperator/hack/make-rules/verify-boilerplates.py

218 lines
7.8 KiB
Python
Executable File

#!/usr/bin/env python3
# Copyright 2021 Alibaba Group Holding Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import datetime
import os.path
import re
import sys
from typing import Callable, Union
from lib import *
BOILERPLATE_TEMPLATE_ROOT = os.path.join(BASIC_BUILD_ENV.root_dir, 'hack/boilerplates')
SOURCE_ROOT = BASIC_BUILD_ENV.root_dir
language_associate_file_extensions = {
'python': 'py',
'go': 'go',
'bash': 'sh',
}
def filter_generated_go_source_files(files: [str]) -> [str]:
return [f for f in files if
not f.startswith('zz_generated.') and not f.endswith('.pb.go') and not f.endswith('.generated.go')]
language_associate_file_filter = {
'go': filter_generated_go_source_files,
}
def preprocess_common_file_content(s: str) -> str:
# Strip white characters from header
return s.lstrip()
SHEBANG_REGEX = re.compile('^#![^\\r\\n]+[\\r\\n]+(.*)', re.MULTILINE | re.DOTALL)
def preprocess_executable_script_file_content(s: str) -> str:
# Remove shebang
m = SHEBANG_REGEX.match(s)
if m:
return preprocess_common_file_content(m.group(1))
else:
return s
language_associate_preprocessors = {
'python': preprocess_executable_script_file_content,
'bash': preprocess_executable_script_file_content,
'go': preprocess_common_file_content,
}
LICENSE_YEAR_REGEX_STR = '((?:\\d+|\\d+\\s*-\\s*\\d+)(?:,\\d+|\\d+\\s*-\\s*\\d+)*)'
def load_boilerplate_regex(language) -> re.Pattern:
extension = language_associate_file_extensions[language]
boilerplate_template_file = os.path.join(BOILERPLATE_TEMPLATE_ROOT, 'boilerplate.' + extension + '.txt')
if not os.path.exists(boilerplate_template_file):
raise RuntimeError('boilerplate template file not found for ' + language)
with open(boilerplate_template_file) as f:
template_str = f.read()
# Replace YEAR with date regex.
lines = template_str.splitlines()
escaped_lines = [re.escape(w.rstrip()) for w in lines]
escape_str = '\\s*\\n'.join(escaped_lines)
regex_str = '^' + escape_str.replace('YEAR', LICENSE_YEAR_REGEX_STR) + '.*$'
return re.compile(regex_str, re.MULTILINE | re.DOTALL)
def read_file_content_and_preprocess(path: str, process: Callable[[str], str]) -> str:
with open(path) as f:
return process(f.read())
def current_year() -> int:
return datetime.datetime.now().year
CURRENT_YEAR = current_year()
def extract_license_year_range(year_str: str) -> list[Union[tuple[int, int], int]]:
year_ranges = []
for s in year_str.split(sep=','):
if '-' in s:
r = s.split('-')
year_ranges.append((int(r[0].strip()), int(r[1].strip())))
else:
year_ranges.append((int(s.strip()),))
sorted(year_ranges, key=lambda x: x[0])
return year_ranges
def is_year_ranges_overlaps(year_ranges: list[Union[tuple[int, int], int]]) -> (bool, int or None):
last = None # None or Union[tuple[int, int], int]
for r in year_ranges:
if last:
if r[0] <= last[-1]:
return True, r[0]
last = r
return False, None
def walk_through_project_and_check_boilerplates(language: str, *, exclude_dirs: [str] or None = None,
include_dirs: [str] or None = None,
start_year: int or None = None):
if exclude_dirs:
exclude_dirs = [d.rstrip('/') for d in exclude_dirs]
if include_dirs:
include_dirs = [d.rstrip('/') for d in include_dirs]
def is_path_in(p: str, paths: [str]) -> bool:
if not paths:
return False
for d in paths:
if p.startswith(d + '/'):
return True
return False
extension = language_associate_file_extensions[language]
preprocessor = language_associate_preprocessors[language]
boilerplate_regex = load_boilerplate_regex(language)
file_filter = language_associate_file_filter.get(language)
source_files_not_match_msg = []
for root, dirs, files in os.walk(SOURCE_ROOT):
relative_path = os.path.relpath(root, SOURCE_ROOT)
if is_path_in(relative_path, exclude_dirs) or \
(include_dirs and not is_path_in(relative_path, include_dirs)):
continue
source_files = [f for f in files if str(f).endswith('.' + extension)]
if file_filter:
source_files = file_filter(source_files)
for source_file in source_files:
content = read_file_content_and_preprocess(os.path.join(root, source_file), preprocessor)
match = boilerplate_regex.match(content)
if not match:
source_files_not_match_msg.append(os.path.join(relative_path, source_file) + ', misses license header')
else:
year_range = extract_license_year_range(match.group(1))
# should not overlaps
overlap, overlap_year = is_year_ranges_overlaps(year_range)
if overlap:
source_files_not_match_msg.append(
os.path.join(relative_path, source_file) +
', invalid year range, year overlaps ' + str(overlap_year))
continue
s_year, e_year = year_range[0][0], year_range[-1][0]
if start_year and s_year < start_year:
source_files_not_match_msg.append(
os.path.join(relative_path, source_file) +
', invalid year range, start year %d smaller than required %d' %
(s_year, start_year))
continue
if e_year > CURRENT_YEAR:
source_files_not_match_msg.append(
os.path.join(relative_path, source_file) +
', invalid year range, end year %d exceeds current year %d' %
(e_year, CURRENT_YEAR))
sorted(source_files_not_match_msg)
# Report source files not match
if len(source_files_not_match_msg) > 0:
print('\n'.join(source_files_not_match_msg))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--exclude-dirs', dest='exclude_dirs', default='', type=str,
help='Exclude directories, separated by comma.')
parser.add_argument('--include-dirs', dest='include_dirs', default='', type=str,
help='Include directories, separated by comma.')
parser.add_argument('--language', dest='language', default='', type=str,
help='Language of source files.')
parser.add_argument('--start-year', dest='start_year', default=CURRENT_YEAR, type=int,
help='Start year. If license with begin year lower than this, it will complain.')
args = parser.parse_args()
include_dirs = [s.strip() for s in args.include_dirs.split(',')] \
if args.include_dirs and len(args.include_dirs) > 0 else None
exclude_dirs = [s.strip() for s in args.exclude_dirs.split(',')] \
if args.exclude_dirs and len(args.exclude_dirs) > 0 else None
walk_through_project_and_check_boilerplates(
args.language,
exclude_dirs=exclude_dirs,
include_dirs=include_dirs,
start_year=args.start_year
)
if __name__ == '__main__':
sys.exit(main())