proto/tei.proto (171 lines of code) (raw):

syntax = "proto3"; package tei.v1; service Info { rpc Info (InfoRequest) returns (InfoResponse) { option idempotency_level = IDEMPOTENT; }; } service Embed { rpc Embed (EmbedRequest) returns (EmbedResponse); rpc EmbedStream (stream EmbedRequest) returns (stream EmbedResponse); rpc EmbedSparse (EmbedSparseRequest) returns (EmbedSparseResponse); rpc EmbedSparseStream (stream EmbedSparseRequest) returns (stream EmbedSparseResponse); rpc EmbedAll (EmbedAllRequest) returns (EmbedAllResponse); rpc EmbedAllStream (stream EmbedAllRequest) returns (stream EmbedAllResponse); } service Predict { rpc Predict (PredictRequest) returns (PredictResponse); rpc PredictPair (PredictPairRequest) returns (PredictResponse); rpc PredictStream (stream PredictRequest) returns (stream PredictResponse); rpc PredictPairStream (stream PredictPairRequest) returns (stream PredictResponse); } service Rerank { rpc Rerank (RerankRequest) returns (RerankResponse); rpc RerankStream (stream RerankStreamRequest) returns (RerankResponse); } service Tokenize { rpc Tokenize (EncodeRequest) returns (EncodeResponse); rpc TokenizeStream (stream EncodeRequest) returns (stream EncodeResponse); rpc Decode (DecodeRequest) returns (DecodeResponse); rpc DecodeStream (stream DecodeRequest) returns (stream DecodeResponse); } message InfoRequest {} enum ModelType { MODEL_TYPE_EMBEDDING = 0; MODEL_TYPE_CLASSIFIER = 1; MODEL_TYPE_RERANKER = 2; } message InfoResponse { string version = 1; optional string sha = 2; optional string docker_label = 3; string model_id = 4; optional string model_sha = 5; string model_dtype = 6; ModelType model_type = 7; uint32 max_concurrent_requests = 8; uint32 max_input_length = 9; uint32 max_batch_tokens = 10; optional uint32 max_batch_requests = 11; uint32 max_client_batch_size = 12; uint32 tokenization_workers = 13; } message Metadata { uint32 compute_chars = 1; uint32 compute_tokens = 2; uint64 total_time_ns = 3; uint64 tokenization_time_ns = 4; uint64 queue_time_ns = 5; uint64 inference_time_ns = 6; } enum TruncationDirection { TRUNCATION_DIRECTION_RIGHT = 0; TRUNCATION_DIRECTION_LEFT = 1; } message EmbedRequest { string inputs = 1; bool truncate = 2; bool normalize = 3; TruncationDirection truncation_direction = 4; optional string prompt_name = 5; } message EmbedResponse { repeated float embeddings = 1; Metadata metadata = 2; } message EmbedSparseRequest { string inputs = 1; bool truncate = 2; TruncationDirection truncation_direction = 3; optional string prompt_name = 4; } message SparseValue { uint32 index = 1; float value = 2; } message EmbedSparseResponse { repeated SparseValue sparse_embeddings = 1; Metadata metadata = 2; } message EmbedAllRequest { string inputs = 1; bool truncate = 2; TruncationDirection truncation_direction = 3; optional string prompt_name = 4; } message TokenEmbedding { repeated float embeddings = 1; } message EmbedAllResponse { repeated TokenEmbedding token_embeddings = 1; Metadata metadata = 2; } message PredictRequest { string inputs = 1; bool truncate = 2; bool raw_scores = 3; TruncationDirection truncation_direction = 4; } message PredictPairRequest { repeated string inputs = 1; bool truncate = 2; bool raw_scores = 3; TruncationDirection truncation_direction = 4; } message Prediction { float score = 1; string label = 2; } message PredictResponse { repeated Prediction predictions = 1; Metadata metadata = 2; } message RerankRequest { string query = 1; repeated string texts = 2; bool truncate = 3; bool raw_scores = 4; bool return_text = 5; TruncationDirection truncation_direction = 6; } message RerankStreamRequest{ string query = 1; string text = 2; bool truncate = 3; // The server will only consider the first value bool raw_scores = 4; // The server will only consider the first value bool return_text = 5; TruncationDirection truncation_direction = 6; } message Rank { uint32 index = 1; optional string text = 2; float score = 3; } message RerankResponse { repeated Rank ranks = 1; Metadata metadata = 2; } message EncodeRequest { string inputs = 1; bool add_special_tokens = 2; optional string prompt_name = 3; } message SimpleToken { uint32 id = 1; string text = 2; bool special = 3; optional uint32 start = 4; optional uint32 stop = 5; } message EncodeResponse { repeated SimpleToken tokens = 1; } message DecodeRequest { repeated uint32 ids = 1; bool skip_special_tokens = 2; } message DecodeResponse { string text = 1; }