in packages/hub/src/lib/cache-management.ts [114:202]
export async function scanCachedRepo(repoPath: string): Promise<CachedRepoInfo> {
// get the directory name
const name = basename(repoPath);
if (!name.includes(REPO_ID_SEPARATOR)) {
throw new Error(`Repo path is not a valid HuggingFace cache directory: ${name}`);
}
// parse the repoId from directory name
const [type, ...remaining] = name.split(REPO_ID_SEPARATOR);
const repoType = parseRepoType(type);
const repoId = remaining.join("/");
const snapshotsPath = join(repoPath, "snapshots");
const refsPath = join(repoPath, "refs");
const snapshotStat = await stat(snapshotsPath);
if (!snapshotStat.isDirectory()) {
throw new Error(`Snapshots dir doesn't exist in cached repo ${snapshotsPath}`);
}
// Check if the refs directory exists and scan it
const refsByHash: Map<string, string[]> = new Map();
const refsStat = await stat(refsPath);
if (refsStat.isDirectory()) {
await scanRefsDir(refsPath, refsByHash);
}
// Scan snapshots directory and collect cached revision information
const cachedRevisions: CachedRevisionInfo[] = [];
const blobStats: Map<string, Stats> = new Map(); // Store blob stats
const snapshotDirs = await readdir(snapshotsPath);
for (const dir of snapshotDirs) {
if (FILES_TO_IGNORE.includes(dir)) continue; // Ignore unwanted files
const revisionPath = join(snapshotsPath, dir);
const revisionStat = await stat(revisionPath);
if (!revisionStat.isDirectory()) {
throw new Error(`Snapshots folder corrupted. Found a file: ${revisionPath}`);
}
const cachedFiles: CachedFileInfo[] = [];
await scanSnapshotDir(revisionPath, cachedFiles, blobStats);
const revisionLastModified =
cachedFiles.length > 0
? Math.max(...[...cachedFiles].map((file) => file.blob.lastModifiedAt.getTime()))
: revisionStat.mtimeMs;
cachedRevisions.push({
commitOid: dir,
files: cachedFiles,
refs: refsByHash.get(dir) || [],
size: [...cachedFiles].reduce((sum, file) => sum + file.blob.size, 0),
path: revisionPath,
lastModifiedAt: new Date(revisionLastModified),
});
refsByHash.delete(dir);
}
// Verify that all refs refer to a valid revision
if (refsByHash.size > 0) {
throw new Error(
`Reference(s) refer to missing commit hashes: ${JSON.stringify(Object.fromEntries(refsByHash))} (${repoPath})`
);
}
const repoStats = await stat(repoPath);
const repoLastAccessed =
blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat) => stat.atimeMs)) : repoStats.atimeMs;
const repoLastModified =
blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat) => stat.mtimeMs)) : repoStats.mtimeMs;
// Return the constructed CachedRepoInfo object
return {
id: {
name: repoId,
type: repoType,
},
path: repoPath,
filesCount: blobStats.size,
revisions: cachedRevisions,
size: [...blobStats.values()].reduce((sum, stat) => sum + stat.size, 0),
lastAccessedAt: new Date(repoLastAccessed),
lastModifiedAt: new Date(repoLastModified),
};
}