Cargo.toml (72 lines of code) (raw):

[workspace] members = [ "backends", "backends/candle", "backends/ort", "backends/core", "backends/python", "backends/grpc-client", "core", "router", ] default-members = [ "backends", "backends/candle", "backends/ort", "backends/core", "backends/python", "backends/grpc-client", "core", "router", ] resolver = "2" [workspace.package] version = "1.7.2" edition = "2021" authors = ["Olivier Dehaene", "Nicolas Patry", "Alvaro Bartolome"] homepage = "https://github.com/huggingface/text-embeddings-inference" [workspace.dependencies] anyhow = "1.0.75" clap = { version = "4.1", features = ["derive", "env"] } hf-hub = { version = "0.4", features = ["tokio"], default-features = false } metrics = "0.23" nohash-hasher = "0.2" num_cpus = "1.16.0" tokenizers = { version = "0.21.0", default-features = false, features = ["onig", "esaxx_fast"] } tokio = { version = "1.25", features = ["rt", "rt-multi-thread", "parking_lot", "sync", "signal"] } tracing = "0.1" serde = { version = "1.0", features = ["serde_derive"] } serde_json = "1.0" thiserror = "1.0" rand = "0.9" serial_test = "2.0.0" cudarc = { version = "0.13" , features =["cuda-12020"], default-features = false} intel-mkl-src = { version = "0.8"} candle = { version = "0.8", package = "candle-core" } candle-nn = { version = "0.8" } candle-transformers = { version = "0.8" } candle-flash-attn = { version = "0.8" } candle-cublaslt= { version = "0.0.1" } candle-layer-norm = { version = "0.0.1" } candle-rotary = { version = "0.0.1" } candle-flash-attn-v1 = { version = "0.0.1" } half = { version = "2.3.1", features = ["num-traits"] } [patch.crates-io] cudarc = { git = "https://github.com/Narsil/cudarc" , rev = "8b4f18b4bcd5e4b1a9daf40abc3a2e27f83f06e9"} candle = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-core" } candle-nn = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-nn" } candle-transformers = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-transformers" } candle-flash-attn = { git = "https://github.com/huggingface/candle", rev = "6381023982251959a2c9bab7378b3013304e192b", package = "candle-flash-attn" } [profile.release] debug = 0 lto = "fat" opt-level = 3 codegen-units = 1 strip = "symbols" panic = "abort" [profile.release-debug] inherits = "release" debug = 1 lto = "thin" codegen-units = 16 strip = "none"