async function main()

in packages/gguf/src/cli.ts [23:138]


async function main() {
	let ggufPath = "";
	let showTensors = false;
	let nCtx = 4096;
	for (let i = 2; i < process.argv.length; i++) {
		if (process.argv[i] === "--help" || process.argv[i] === "-h") {
			showHelp(0);
		} else if (process.argv[i] === "--show-tensor") {
			showTensors = true;
		} else if (process.argv[i] === "--context" || process.argv[i] === "-c") {
			nCtx = Number(process.argv[++i]);
		} else {
			ggufPath = process.argv[i];
		}
	}

	if (!ggufPath.length) {
		console.error("Error: Missing path to gguf file");
		showHelp(1);
	}

	const { shards } = await ggufAllShards(ggufPath, {
		allowLocalFile: true,
	});
	const { metadata, tensorInfos } = shards[0];

	// merge all metadata
	for (let i = 1; i < shards.length; i++) {
		tensorInfos.push(...shards[i].tensorInfos);
	}

	// TODO: print info about endianess
	console.log(`* Dumping ${Object.keys(metadata).length} key/value pair(s)`);
	printTable(
		[
			{ name: "Idx", alignRight: true },
			// { name: 'Type' }, // TODO: support this
			{ name: "Count", alignRight: true },
			{ name: "Value" },
		],
		Object.entries(metadata).map(([key, value], i) => {
			const MAX_LEN = 50;
			let strVal = "";
			let count = 1;
			if (Array.isArray(value)) {
				strVal = JSON.stringify(value);
				count = value.length;
			} else if (value instanceof String || typeof value === "string") {
				strVal = JSON.stringify(value);
			} else {
				strVal = value.toString();
			}
			strVal = strVal.length > MAX_LEN ? strVal.slice(0, MAX_LEN) + "..." : strVal;
			return [(i + 1).toString(), count.toString(), `${key} = ${strVal}`];
		})
	);

	console.log();
	console.log(`* Memory usage estimation (with context length of ${nCtx} tokens)`);
	try {
		const kvUsage = calcMemoryUsage(metadata as GGUFParseOutput<{ strict: false }>["metadata"], nCtx);
		let modelWeightInBytes = 0;
		for (const tensorInfo of tensorInfos) {
			const nElem = Number(tensorInfo.shape.reduce((a, b) => a * b, 1n));
			const tensorSizeInBytes = nElem * (GGML_QUANT_SIZES[tensorInfo.dtype] / 8);
			modelWeightInBytes += tensorSizeInBytes;
		}
		const overhead =
			calcMemoryUsage(metadata as GGUFParseOutput<{ strict: false }>["metadata"], 256).totalBytes +
			modelWeightInBytes * 0.05;
		const totalMemoryUsage = kvUsage.totalBytes + overhead + modelWeightInBytes;
		printTable(
			[{ name: "Item" }, { name: "Memory usage", alignRight: true }],
			[
				["K cache", (kvUsage.totalBytesK / 1e9).toFixed(2) + " GB"],
				["V cache", (kvUsage.totalBytesV / 1e9).toFixed(2) + " GB"],
				["Weight", (modelWeightInBytes / 1e9).toFixed(2) + " GB"],
				["Overhead", (overhead / 1e9).toFixed(2) + " GB"],
				["", "---"],
				["TOTAL", (totalMemoryUsage / 1e9).toFixed(2) + " GB"],
			]
		);
	} catch (e) {
		console.error(`Error: ${(e as Error).message}`);
	}

	if (showTensors) {
		console.log();
		console.log(`* Dumping ${tensorInfos.length} tensor(s)`);
		printTable(
			[
				{ name: "Idx", alignRight: true },
				{ name: "Num Elements", alignRight: true },
				{ name: "Shape" },
				{ name: "Data Type" },
				{ name: "Name" },
			],
			tensorInfos.map((tensorInfo, i) => {
				const shape = [1n, 1n, 1n, 1n];
				tensorInfo.shape.forEach((dim, i) => {
					shape[i] = dim;
				});
				return [
					(i + 1).toString(),
					shape.reduce((acc, n) => acc * n, 1n).toString(),
					shape.map((n) => n.toString().padStart(6)).join(", "),
					mapDtypeToName[tensorInfo.dtype],
					tensorInfo.name,
				];
			})
		);
	} else {
		console.log();
		console.log(`* Use --show-tensor to display tensor information`);
	}
}