in src/lib/server/models.ts [147:275]
async function getChatPromptRender(
m: z.infer<typeof modelConfig>
): Promise<ReturnType<typeof compileTemplate<ChatTemplateInput>>> {
if (m.endpoints?.some((e) => e.type === "local")) {
const endpoint = m.endpoints?.find((e) => e.type === "local");
const path = endpoint?.modelPath ?? `hf:${m.id ?? m.name}`;
const { resolveModelFile, readGgufFileInfo } = await import("node-llama-cpp");
const modelPath = await resolveModelFile(path, MODELS_FOLDER);
const info = await readGgufFileInfo(modelPath, {
readTensorInfo: false,
});
if (info.metadata.tokenizer.chat_template) {
// compile with jinja
const jinjaTemplate = new Template(info.metadata.tokenizer.chat_template);
return (inputs: ChatTemplateInput) => {
return jinjaTemplate.render({ ...m, ...inputs });
};
}
}
if (m.chatPromptTemplate) {
return compileTemplate<ChatTemplateInput>(m.chatPromptTemplate, m);
}
let tokenizer: PreTrainedTokenizer;
try {
tokenizer = await getTokenizer(m.tokenizer ?? m.id ?? m.name);
} catch (e) {
// if fetching the tokenizer fails but it wasnt manually set, use the default template
if (!m.tokenizer) {
logger.warn(
`No tokenizer found for model ${m.name}, using default template. Consider setting tokenizer manually or making sure the model is available on the hub.`,
m
);
return compileTemplate<ChatTemplateInput>(
"{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}",
m
);
}
logger.error(
e,
`Failed to load tokenizer ${
m.tokenizer ?? m.id ?? m.name
} make sure the model is available on the hub and you have access to any gated models.`
);
process.exit();
}
const renderTemplate = ({ messages, preprompt, tools, continueMessage }: ChatTemplateInput) => {
let formattedMessages: {
role: string;
content: string;
tool_calls?: { id: string; tool_call_id: string; output: string }[];
}[] = messages.map((message) => ({
content: message.content,
role: message.from,
}));
if (!m.systemRoleSupported) {
const firstSystemMessage = formattedMessages.find((msg) => msg.role === "system");
formattedMessages = formattedMessages.filter((msg) => msg.role !== "system");
if (
firstSystemMessage &&
formattedMessages.length > 0 &&
formattedMessages[0].role === "user"
) {
formattedMessages[0].content =
firstSystemMessage.content + "\n" + formattedMessages[0].content;
}
}
if (preprompt && formattedMessages[0].role !== "system") {
formattedMessages = [
{
role: m.systemRoleSupported ? "system" : "user",
content: preprompt,
},
...formattedMessages,
];
}
const mappedTools =
tools?.map((tool) => {
const inputs: Record<
string,
{
type: ToolInput["type"];
description: string;
required: boolean;
}
> = {};
for (const value of tool.inputs) {
if (value.paramType !== "fixed") {
inputs[value.name] = {
type: value.type,
description: value.description ?? "",
required: value.paramType === "required",
};
}
}
return {
name: tool.name,
description: tool.description,
parameter_definitions: inputs,
};
}) ?? [];
const output = tokenizer.apply_chat_template(formattedMessages, {
tokenize: false,
add_generation_prompt: !continueMessage,
tools: mappedTools.length ? mappedTools : undefined,
});
if (typeof output !== "string") {
throw new Error("Failed to apply chat template, the output is not a string");
}
return output;
};
return renderTemplate;
}