in Medical_Text_Analysis_Resources/docker_containers/model_container/model_scripts/hosted_model.py [0:0]
def run_model(self,input_sentence,the_paragraph):
#get the embedder
word_embedding_model = models.Transformer('emilyalsentzer/Bio_ClinicalBERT')
# Apply mean pooling to get one fixed sized sentence vector
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
pooling_mode_mean_tokens=True,
pooling_mode_cls_token=False,
pooling_mode_max_tokens=False)
embedder = SentenceTransformer(modules=[word_embedding_model, pooling_model])
# create a corpus of every individual sentence within the paragraph
corpus=self.break_up_by_sentence(the_paragraph)
corpus_embeddings = embedder.encode(corpus)
# define the input sentence as the query
queries = [input_sentence]
query_embedding = embedder.encode(queries)
# calculate the distance between the query embedding and the corpus embeddings
distances = scipy.spatial.distance.cdist(query_embedding, corpus_embeddings, "cosine")[0]
to_return=self.get_best_n_sentences(corpus,distances)
return(to_return)