in packages/gguf/src/cli.ts [23:138]
async function main() {
let ggufPath = "";
let showTensors = false;
let nCtx = 4096;
for (let i = 2; i < process.argv.length; i++) {
if (process.argv[i] === "--help" || process.argv[i] === "-h") {
showHelp(0);
} else if (process.argv[i] === "--show-tensor") {
showTensors = true;
} else if (process.argv[i] === "--context" || process.argv[i] === "-c") {
nCtx = Number(process.argv[++i]);
} else {
ggufPath = process.argv[i];
}
}
if (!ggufPath.length) {
console.error("Error: Missing path to gguf file");
showHelp(1);
}
const { shards } = await ggufAllShards(ggufPath, {
allowLocalFile: true,
});
const { metadata, tensorInfos } = shards[0];
// merge all metadata
for (let i = 1; i < shards.length; i++) {
tensorInfos.push(...shards[i].tensorInfos);
}
// TODO: print info about endianess
console.log(`* Dumping ${Object.keys(metadata).length} key/value pair(s)`);
printTable(
[
{ name: "Idx", alignRight: true },
// { name: 'Type' }, // TODO: support this
{ name: "Count", alignRight: true },
{ name: "Value" },
],
Object.entries(metadata).map(([key, value], i) => {
const MAX_LEN = 50;
let strVal = "";
let count = 1;
if (Array.isArray(value)) {
strVal = JSON.stringify(value);
count = value.length;
} else if (value instanceof String || typeof value === "string") {
strVal = JSON.stringify(value);
} else {
strVal = value.toString();
}
strVal = strVal.length > MAX_LEN ? strVal.slice(0, MAX_LEN) + "..." : strVal;
return [(i + 1).toString(), count.toString(), `${key} = ${strVal}`];
})
);
console.log();
console.log(`* Memory usage estimation (with context length of ${nCtx} tokens)`);
try {
const kvUsage = calcMemoryUsage(metadata as GGUFParseOutput<{ strict: false }>["metadata"], nCtx);
let modelWeightInBytes = 0;
for (const tensorInfo of tensorInfos) {
const nElem = Number(tensorInfo.shape.reduce((a, b) => a * b, 1n));
const tensorSizeInBytes = nElem * (GGML_QUANT_SIZES[tensorInfo.dtype] / 8);
modelWeightInBytes += tensorSizeInBytes;
}
const overhead =
calcMemoryUsage(metadata as GGUFParseOutput<{ strict: false }>["metadata"], 256).totalBytes +
modelWeightInBytes * 0.05;
const totalMemoryUsage = kvUsage.totalBytes + overhead + modelWeightInBytes;
printTable(
[{ name: "Item" }, { name: "Memory usage", alignRight: true }],
[
["K cache", (kvUsage.totalBytesK / 1e9).toFixed(2) + " GB"],
["V cache", (kvUsage.totalBytesV / 1e9).toFixed(2) + " GB"],
["Weight", (modelWeightInBytes / 1e9).toFixed(2) + " GB"],
["Overhead", (overhead / 1e9).toFixed(2) + " GB"],
["", "---"],
["TOTAL", (totalMemoryUsage / 1e9).toFixed(2) + " GB"],
]
);
} catch (e) {
console.error(`Error: ${(e as Error).message}`);
}
if (showTensors) {
console.log();
console.log(`* Dumping ${tensorInfos.length} tensor(s)`);
printTable(
[
{ name: "Idx", alignRight: true },
{ name: "Num Elements", alignRight: true },
{ name: "Shape" },
{ name: "Data Type" },
{ name: "Name" },
],
tensorInfos.map((tensorInfo, i) => {
const shape = [1n, 1n, 1n, 1n];
tensorInfo.shape.forEach((dim, i) => {
shape[i] = dim;
});
return [
(i + 1).toString(),
shape.reduce((acc, n) => acc * n, 1n).toString(),
shape.map((n) => n.toString().padStart(6)).join(", "),
mapDtypeToName[tensorInfo.dtype],
tensorInfo.name,
];
})
);
} else {
console.log();
console.log(`* Use --show-tensor to display tensor information`);
}
}