in src/feature_extractor.py [0:0]
def extract_features(self, text):
inputs = self.tokenizer(text, return_tensors="np", padding=True, truncation=True)
# Prepare the input_feed with all required inputs
input_feed = {
self.ort_session.get_inputs()[0].name: inputs["input_ids"],
self.ort_session.get_inputs()[1].name: inputs["attention_mask"],
}
# Check if 'token_type_ids' is required by the model
if len(self.ort_session.get_inputs()) > 2:
input_feed[self.ort_session.get_inputs()[2].name] = inputs.get("token_type_ids", None)
# Run inference
outputs = self.ort_session.run(None, input_feed)
# return outputs[0], inputs["attention_mask"]
# Squeeze the batch dimension to ensure shape [seqLength, embedDim]
return np.squeeze(outputs[0], axis=0), inputs["attention_mask"].squeeze(axis=0)