in src/feature_extractor.py [0:0]
def __init__(self, EMBEDDING_MODELS_DICT, model_name="Xenova/all-MiniLM-L6-v2"):
print(f"selected model is {model_name}")
model_url = EMBEDDING_MODELS_DICT.get(model_name)
model_dir_path = "../models"
model_path = f"{model_dir_path}/{model_name.replace('/','_')}"
if not os.path.exists(model_dir_path):
os.makedirs(model_dir_path)
if not os.path.exists(model_path):
print("Downloading ONNX model...")
response = requests.get(model_url)
with open(model_path, "wb") as f:
f.write(response.content)
print("ONNX model downloaded.")
# Load the ONNX model
self.ort_session = ort.InferenceSession(model_path)
# Initialize tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_name)