Cargo.toml (72 lines of code) (raw):
[workspace]
members = [
"backends",
"backends/candle",
"backends/ort",
"backends/core",
"backends/python",
"backends/grpc-client",
"core",
"router",
]
default-members = [
"backends",
"backends/candle",
"backends/ort",
"backends/core",
"backends/python",
"backends/grpc-client",
"core",
"router",
]
resolver = "2"
[workspace.package]
version = "1.7.2"
edition = "2021"
authors = ["Olivier Dehaene", "Nicolas Patry", "Alvaro Bartolome"]
homepage = "https://github.com/huggingface/text-embeddings-inference"
[workspace.dependencies]
anyhow = "1.0.75"
clap = { version = "4.1", features = ["derive", "env"] }
hf-hub = { version = "0.4", features = ["tokio"], default-features = false }
metrics = "0.23"
nohash-hasher = "0.2"
num_cpus = "1.16.0"
tokenizers = { version = "0.21.0", default-features = false, features = ["onig", "esaxx_fast"] }
tokio = { version = "1.25", features = ["rt", "rt-multi-thread", "parking_lot", "sync", "signal"] }
tracing = "0.1"
serde = { version = "1.0", features = ["serde_derive"] }
serde_json = "1.0"
thiserror = "1.0"
rand = "0.9"
serial_test = "2.0.0"
cudarc = { version = "0.13" , features =["cuda-12020"], default-features = false}
intel-mkl-src = { version = "0.8"}
candle = { version = "0.8", package = "candle-core" }
candle-nn = { version = "0.8" }
candle-transformers = { version = "0.8" }
candle-flash-attn = { version = "0.8" }
candle-cublaslt= { version = "0.0.1" }
candle-layer-norm = { version = "0.0.1" }
candle-rotary = { version = "0.0.1" }
candle-flash-attn-v1 = { version = "0.0.1" }
half = { version = "2.3.1", features = ["num-traits"] }
[patch.crates-io]
cudarc = { git = "https://github.com/Narsil/cudarc" , rev = "8b4f18b4bcd5e4b1a9daf40abc3a2e27f83f06e9"}
candle = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-core" }
candle-nn = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-nn" }
candle-transformers = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-transformers" }
candle-flash-attn = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-flash-attn" }
[profile.release]
debug = 0
lto = "fat"
opt-level = 3
codegen-units = 1
strip = "symbols"
panic = "abort"
[profile.release-debug]
inherits = "release"
debug = 1
lto = "thin"
codegen-units = 16
strip = "none"