scripts/ci_clean_cache.py (39 lines of code) (raw):
"""
Utility to clean cache files that exceed a specific time in days according to their
last access time recorded in the cache.
Exit code:
- 1 if no candidates are found
- 0 if candidates are found
Deletion can be enabled by passing `-d` parameter, otherwise it will only list the candidates.
"""
import sys
from datetime import datetime as dt
from huggingface_hub import scan_cache_dir
def find_old_revisions(scan_results, max_age_days=30):
"""Find commit hashes of objects in the cache. These objects need a last access time that
is above the passed `max_age_days` parameter. Returns an empty list if no objects are found.
Time measurement is based of the current time and the recorded last access tiem in the cache.
"""
now = dt.now()
revisions = [(i.revisions, i.last_accessed) for i in scan_results.repos]
revisions_ages = [(rev, (now - dt.fromtimestamp(ts_access)).days) for rev, ts_access in revisions]
delete_candidates = [rev for rev, age in revisions_ages if age > max_age_days]
hashes = [n.commit_hash for rev in delete_candidates for n in rev]
return hashes
def delete_old_revisions(scan_results, delete_candidates, do_delete=False):
delete_operation = scan_results.delete_revisions(*delete_candidates)
print(f"Would free {delete_operation.expected_freed_size_str}")
print(f"Candidates: {delete_candidates}")
if do_delete:
print("Deleting now.")
delete_operation.execute()
else:
print("Not deleting, pass the -d flag.")
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("-a", "--max-age", type=int, default=30, help="Max. age in days items in the cache may have.")
parser.add_argument(
"-d",
"--delete",
action="store_true",
help=(
"Delete mode; Really delete items if there are candidates. Exit code = 0 when we found something to delete, 1 "
"otherwise."
),
)
args = parser.parse_args()
scan_results = scan_cache_dir()
delete_candidates = find_old_revisions(scan_results, args.max_age)
if not delete_candidates:
print("No delete candidates found, not deleting anything.")
sys.exit(1)
delete_old_revisions(scan_results, delete_candidates, do_delete=args.delete)