backends/proto/embed.proto (32 lines of code) (raw):
syntax = "proto3";
package embedding.v1;
service EmbeddingService {
/// Decode token for a list of prefilled batches
rpc Embed (EmbedRequest) returns (EmbedResponse);
/// Health check
rpc Health (HealthRequest) returns (HealthResponse);
/// Predict
rpc Predict (EmbedRequest) returns (PredictResponse);
}
message HealthRequest {}
message HealthResponse {}
message EmbedRequest {
repeated uint32 input_ids = 1;
repeated uint32 token_type_ids = 2;
repeated uint32 position_ids = 3;
repeated uint32 cu_seq_lengths = 4;
/// Length of the longest request
uint32 max_length = 5;
}
message Embedding {
repeated float values = 1;
}
message EmbedResponse {
repeated Embedding embeddings = 1;
}
message Score {
repeated float values = 1;
}
message PredictResponse {
repeated Score scores = 1;
}