in backends/python/server/text_embeddings_server/models/jinaBert_model.py [0:0]
def __init__(self, prefix, handle, device, dtype, config: JinaBertConfig):
if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
config, "embedding_size"
):
raise ValueError(
f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
f"heads ({config.num_attention_heads})"
)
self.config = config
self.num_attention_heads = config.num_attention_heads
self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.query_weight = (
handle.get_tensor(f"{prefix}.query.weight").to(dtype).to(device)
)
self.query_bias = handle.get_tensor(f"{prefix}.query.bias").to(dtype).to(device)
self.key_weight = handle.get_tensor(f"{prefix}.key.weight").to(dtype).to(device)
self.key_bias = handle.get_tensor(f"{prefix}.key.bias").to(dtype).to(device)
self.value_weight = (
handle.get_tensor(f"{prefix}.value.weight").to(dtype).to(device)
)
self.value_bias = handle.get_tensor(f"{prefix}.value.bias").to(dtype).to(device)
self.layer_norm_q_weight = (
handle.get_tensor(f"{prefix}.layer_norm_q.weight").to(dtype).to(device)
)
self.layer_norm_q_bias = (
handle.get_tensor(f"{prefix}.layer_norm_q.bias").to(dtype).to(device)
)
self.layer_norm_k_weight = (
handle.get_tensor(f"{prefix}.layer_norm_k.weight").to(dtype).to(device)
)
self.layer_norm_k_bias = (
handle.get_tensor(f"{prefix}.layer_norm_k.bias").to(dtype).to(device)
)
self.dropout = nn.Dropout(config.attention_probs_dropout_prob)