in scripts/cleanup_report.py [0:0]
def write_report(examples, repo_files, report_path=None, summarize=False, dirty=False):
"""
Writes a report of files cleaned versus files awaiting cleanup.
Files that are listed in metadata but do not exist in the repo are output
as missing files.
Files that are listed more than once in any metadata are output as duplicates.
The report includes the full list of example metadata in CSV format.
:param examples: A list of example dictionaries.
:param repo_files:
:param report_path: The output file to write the report. If this file exists,
it is overwritten. If no file is specified, the report
is written to sys.stdout.
:param summarize: Omit CSV output and only print the summary.
:param dirty: Include dirty files in the full report.
"""
lines = ["File,Language,Service"]
clean_files = []
missing_files = []
bad_examples = []
repo_files_lookup = {rf.lower() for rf in repo_files}
for example in examples:
try:
for file in example['files']:
metadata_folder = os.path.split(example['metadata_path'])[0]
file_url = make_github_url(metadata_folder, file['path'])
if file_url.lower() in repo_files_lookup:
if file_url not in clean_files:
clean_files.append(file_url)
ext = os.path.splitext(file_url)[1].lstrip('.')
language = EXT_LOOKUP[ext]
for service in file.get('services', ['']):
lines.append(
','.join([file_url, language, service]))
else:
print(f"File '{file_url}' reported a second time in "
f"{example['metadata_path']}.")
else:
missing_files.append(file_url)
print(
f"File '{file_url}' reported in metadata "
f"does not exist in the repo.")
except KeyError as error:
print(f"ERROR: example missing a required {error} key: {example}.")
bad_examples.append(example)
report = open(report_path, 'w') if report_path else sys.stdout
try:
clean_count = len(clean_files)
total_count = len(repo_files)
report.write(f"Total number of examples: "
f"{len(examples) - len(bad_examples)}.\n")
report.write(f"Total number of cleaned files: {clean_count}.\n")
report.write(f"Total number of files: {total_count}.\n")
if total_count > 0:
report.write(f"Percent clean: "
f"{clean_count/total_count:.0%}.")
if not summarize:
if len(lines) > 1:
report.write("\n")
report.write('\n'.join(lines))
if dirty:
clean_lookup = [file.lower() for file in clean_files]
dirty_files = sorted([file for file in repo_files_lookup
if file not in clean_lookup])
report.write("\n")
if dirty_files:
report.write("**Dirty files found:**\n")
report.write('\n'.join(dirty_files))
else:
report.write("**No dirty files found!**")
finally:
if report is not sys.stdout:
report.close()
print(f"Report written to {report_path}.")