async function getChatPromptRender()

in src/lib/server/models.ts [147:275]
110 lines of code
18 McCabe index (conditional complexity)

async function getChatPromptRender(
	m: z.infer<typeof modelConfig>
): Promise<ReturnType<typeof compileTemplate<ChatTemplateInput>>> {
	if (m.endpoints?.some((e) => e.type === "local")) {
		const endpoint = m.endpoints?.find((e) => e.type === "local");
		const path = endpoint?.modelPath ?? `hf:${m.id ?? m.name}`;

		const { resolveModelFile, readGgufFileInfo } = await import("node-llama-cpp");

		const modelPath = await resolveModelFile(path, MODELS_FOLDER);

		const info = await readGgufFileInfo(modelPath, {
			readTensorInfo: false,
		});

		if (info.metadata.tokenizer.chat_template) {
			// compile with jinja
			const jinjaTemplate = new Template(info.metadata.tokenizer.chat_template);
			return (inputs: ChatTemplateInput) => {
				return jinjaTemplate.render({ ...m, ...inputs });
			};
		}
	}

	if (m.chatPromptTemplate) {
		return compileTemplate<ChatTemplateInput>(m.chatPromptTemplate, m);
	}
	let tokenizer: PreTrainedTokenizer;

	try {
		tokenizer = await getTokenizer(m.tokenizer ?? m.id ?? m.name);
	} catch (e) {
		// if fetching the tokenizer fails but it wasnt manually set, use the default template
		if (!m.tokenizer) {
			logger.warn(
				`No tokenizer found for model ${m.name}, using default template. Consider setting tokenizer manually or making sure the model is available on the hub.`,
				m
			);
			return compileTemplate<ChatTemplateInput>(
				"{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}",
				m
			);
		}

		logger.error(
			e,
			`Failed to load tokenizer ${
				m.tokenizer ?? m.id ?? m.name
			} make sure the model is available on the hub and you have access to any gated models.`
		);
		process.exit();
	}

	const renderTemplate = ({ messages, preprompt, tools, continueMessage }: ChatTemplateInput) => {
		let formattedMessages: {
			role: string;
			content: string;
			tool_calls?: { id: string; tool_call_id: string; output: string }[];
		}[] = messages.map((message) => ({
			content: message.content,
			role: message.from,
		}));

		if (!m.systemRoleSupported) {
			const firstSystemMessage = formattedMessages.find((msg) => msg.role === "system");
			formattedMessages = formattedMessages.filter((msg) => msg.role !== "system");

			if (
				firstSystemMessage &&
				formattedMessages.length > 0 &&
				formattedMessages[0].role === "user"
			) {
				formattedMessages[0].content =
					firstSystemMessage.content + "\n" + formattedMessages[0].content;
			}
		}

		if (preprompt && formattedMessages[0].role !== "system") {
			formattedMessages = [
				{
					role: m.systemRoleSupported ? "system" : "user",
					content: preprompt,
				},
				...formattedMessages,
			];
		}

		const mappedTools =
			tools?.map((tool) => {
				const inputs: Record<
					string,
					{
						type: ToolInput["type"];
						description: string;
						required: boolean;
					}
				> = {};

				for (const value of tool.inputs) {
					if (value.paramType !== "fixed") {
						inputs[value.name] = {
							type: value.type,
							description: value.description ?? "",
							required: value.paramType === "required",
						};
					}
				}

				return {
					name: tool.name,
					description: tool.description,
					parameter_definitions: inputs,
				};
			}) ?? [];

		const output = tokenizer.apply_chat_template(formattedMessages, {
			tokenize: false,
			add_generation_prompt: !continueMessage,
			tools: mappedTools.length ? mappedTools : undefined,
		});

		if (typeof output !== "string") {
			throw new Error("Failed to apply chat template, the output is not a string");
		}

		return output;
	};
	return renderTemplate;
}