in src/lib/server/models.ts [288:346]
getEndpoint: async (): Promise<Endpoint> => {
if (!m.endpoints) {
return endpointTgi({
type: "tgi",
url: `${config.HF_API_ROOT}/${m.name}`,
accessToken: config.HF_TOKEN ?? config.HF_ACCESS_TOKEN,
weight: 1,
model: m,
});
}
const totalWeight = sum(m.endpoints.map((e) => e.weight));
let random = Math.random() * totalWeight;
for (const endpoint of m.endpoints) {
if (random < endpoint.weight) {
const args = { ...endpoint, model: m };
switch (args.type) {
case "tgi":
return endpoints.tgi(args);
case "local":
return endpoints.local(args);
case "inference-client":
return endpoints.inferenceClient(args);
case "anthropic":
return endpoints.anthropic(args);
case "anthropic-vertex":
return endpoints.anthropicvertex(args);
case "bedrock":
return endpoints.bedrock(args);
case "aws":
return await endpoints.aws(args);
case "openai":
return await endpoints.openai(args);
case "llamacpp":
return endpoints.llamacpp(args);
case "ollama":
return endpoints.ollama(args);
case "vertex":
return await endpoints.vertex(args);
case "genai":
return await endpoints.genai(args);
case "cloudflare":
return await endpoints.cloudflare(args);
case "cohere":
return await endpoints.cohere(args);
case "langserve":
return await endpoints.langserve(args);
default:
// for legacy reason
return endpoints.tgi(args);
}
}
random -= endpoint.weight;
}
throw new Error(`Failed to select endpoint`);
},