export async function scanCachedRepo()

in packages/hub/src/lib/cache-management.ts [114:202]


export async function scanCachedRepo(repoPath: string): Promise<CachedRepoInfo> {
	// get the directory name
	const name = basename(repoPath);
	if (!name.includes(REPO_ID_SEPARATOR)) {
		throw new Error(`Repo path is not a valid HuggingFace cache directory: ${name}`);
	}

	// parse the repoId from directory name
	const [type, ...remaining] = name.split(REPO_ID_SEPARATOR);
	const repoType = parseRepoType(type);
	const repoId = remaining.join("/");

	const snapshotsPath = join(repoPath, "snapshots");
	const refsPath = join(repoPath, "refs");

	const snapshotStat = await stat(snapshotsPath);
	if (!snapshotStat.isDirectory()) {
		throw new Error(`Snapshots dir doesn't exist in cached repo ${snapshotsPath}`);
	}

	// Check if the refs directory exists and scan it
	const refsByHash: Map<string, string[]> = new Map();
	const refsStat = await stat(refsPath);
	if (refsStat.isDirectory()) {
		await scanRefsDir(refsPath, refsByHash);
	}

	// Scan snapshots directory and collect cached revision information
	const cachedRevisions: CachedRevisionInfo[] = [];
	const blobStats: Map<string, Stats> = new Map(); // Store blob stats

	const snapshotDirs = await readdir(snapshotsPath);
	for (const dir of snapshotDirs) {
		if (FILES_TO_IGNORE.includes(dir)) continue; // Ignore unwanted files

		const revisionPath = join(snapshotsPath, dir);
		const revisionStat = await stat(revisionPath);
		if (!revisionStat.isDirectory()) {
			throw new Error(`Snapshots folder corrupted. Found a file: ${revisionPath}`);
		}

		const cachedFiles: CachedFileInfo[] = [];
		await scanSnapshotDir(revisionPath, cachedFiles, blobStats);

		const revisionLastModified =
			cachedFiles.length > 0
				? Math.max(...[...cachedFiles].map((file) => file.blob.lastModifiedAt.getTime()))
				: revisionStat.mtimeMs;

		cachedRevisions.push({
			commitOid: dir,
			files: cachedFiles,
			refs: refsByHash.get(dir) || [],
			size: [...cachedFiles].reduce((sum, file) => sum + file.blob.size, 0),
			path: revisionPath,
			lastModifiedAt: new Date(revisionLastModified),
		});

		refsByHash.delete(dir);
	}

	// Verify that all refs refer to a valid revision
	if (refsByHash.size > 0) {
		throw new Error(
			`Reference(s) refer to missing commit hashes: ${JSON.stringify(Object.fromEntries(refsByHash))} (${repoPath})`
		);
	}

	const repoStats = await stat(repoPath);
	const repoLastAccessed =
		blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat) => stat.atimeMs)) : repoStats.atimeMs;

	const repoLastModified =
		blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat) => stat.mtimeMs)) : repoStats.mtimeMs;

	// Return the constructed CachedRepoInfo object
	return {
		id: {
			name: repoId,
			type: repoType,
		},
		path: repoPath,
		filesCount: blobStats.size,
		revisions: cachedRevisions,
		size: [...blobStats.values()].reduce((sum, stat) => sum + stat.size, 0),
		lastAccessedAt: new Date(repoLastAccessed),
		lastModifiedAt: new Date(repoLastModified),
	};
}