router/Cargo.toml (86 lines of code) (raw):

[package] name = "text-embeddings-router" description = "Text Embedding Webserver" build = "build.rs" version.workspace = true edition.workspace = true authors.workspace = true homepage.workspace = true [lib] path = "src/lib.rs" [[bin]] name = "text-embeddings-router" path = "src/main.rs" [dependencies] anyhow = { workspace = true } text-embeddings-backend = { path = "../backends", features = ["clap"] } text-embeddings-core = { path = "../core" } clap = { workspace = true } futures = "^0.3" init-tracing-opentelemetry = { version = "0.18.1", features = ["opentelemetry-otlp"] } hf-hub = { workspace = true } http = "1.0.0" num_cpus = { workspace = true } metrics = { workspace = true } metrics-exporter-prometheus = { version = "0.15.1", features = [] } opentelemetry = "0.23.0" opentelemetry_sdk = { version = "0.23.0", features = ["rt-tokio"] } opentelemetry-otlp = "0.16.0" reqwest = { version = "0.12.5", features = [] } simsimd = "4.4.0" serde = { workspace = true } serde_json = { workspace = true } thiserror = { workspace = true } tokenizers = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } tracing-opentelemetry = "0.24.0" tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] } veil = "0.1.6" # HTTP dependencies axum = { version = "0.7.4", features = ["json"], optional = true } axum-tracing-opentelemetry = { version = "0.18.1", optional = true } base64 = { version = "0.22.1", optional = true } tower-http = { version = "0.5.1", features = ["cors"], optional = true } utoipa = { version = "4.2", features = ["axum_extras"], optional = true } utoipa-swagger-ui = { version = "7.1", features = ["axum", "vendored"], optional = true } # gRPC dependencies async-stream = { version = "0.3.5", optional = true } prost = { version = "0.12.1", optional = true } tonic = { version = "0.11.0", optional = true } tonic-health = { version = "0.11.0", optional = true } tonic-reflection = { version = "0.11.0", optional = true } tokio-stream = { version = "0.1.14", optional = true } # Optional cudarc = { workspace = true, optional = true } intel-mkl-src = { workspace = true, optional = true } # Malloc trim hack for linux [target.'cfg(target_os = "linux")'.dependencies] libc = "0.2.149" # else use mimalloc [target.'cfg(not(target_os = "linux"))'.dependencies] mimalloc = { version = "*", features = ["no_thp"] } [dev-dependencies] insta = { git = "https://github.com/OlivierDehaene/insta", rev = "f4f98c0410b91fb5a28b10df98e4422955be9c2c", features = ["yaml"] } is_close = "0.1.3" reqwest = { version = "0.12.5", features = ["json"] } serial_test = { workspace = true } [build-dependencies] vergen = { version = "8.0.0", features = ["build", "git", "gitcl"] } tonic-build = { version = "0.11.0", optional = true } [features] default = ["candle", "http", "dynamic-linking"] http = ["dep:axum", "dep:axum-tracing-opentelemetry", "dep:base64", "dep:tower-http", "dep:utoipa", "dep:utoipa-swagger-ui"] grpc = ["metrics-exporter-prometheus/http-listener", "dep:prost", "dep:tonic", "dep:tonic-health", "dep:tonic-reflection", "dep:tonic-build", "dep:async-stream", "dep:tokio-stream"] metal = ["text-embeddings-backend/metal"] mkl = ["text-embeddings-backend/mkl"] accelerate = ["text-embeddings-backend/accelerate"] python = ["text-embeddings-backend/python"] ort = ["text-embeddings-backend/ort"] candle = ["text-embeddings-backend/candle"] candle-cuda = ["candle", "text-embeddings-backend/flash-attn", "dep:cudarc"] candle-cuda-turing = ["candle", "text-embeddings-backend/flash-attn-v1", "dep:cudarc"] candle-cuda-volta = ["candle", "text-embeddings-backend/cuda", "dep:cudarc"] static-linking = ["cudarc?/static-linking", "intel-mkl-src?/mkl-static-lp64-iomp"] dynamic-linking = ["cudarc?/dynamic-linking", "intel-mkl-src?/mkl-dynamic-lp64-iomp"] google = []