in files/lambda_code/call_cm_other_models_lambda.py [0:0]
def search_raw_text_for_breast_cancer_genes(raw_text=None):
''''search the raw output for breast cancer genes. This searches the entire text; not just Comprehend Medical identified entities.'''
list_to_search=raw_text.split()
dict_of_breast_cancer_genes={}
dict_of_breast_cancer_genes['BREAST_CANCER_GENES_FOUND']=[]
for i in range(0,len(list_to_search)):
try:
the_text=list_to_search[i]
max_score=0
#get score of best matching gene
for j in range(0,len(list_of_genes)):
the_score=get_match_score(string_1=list_of_genes[j],string_2=the_text,normalize_max=True)
if the_score >= max_score:
max_score=the_score
the_index=j
gene_score=max_score
if gene_score >=.75: #hard code a score threshold. Do not report scores less than this.
dict_of_breast_cancer_genes['BREAST_CANCER_GENES_FOUND'].append({the_text:list_of_genes[the_index]})
except: #if the dictionary entry doesn't match, just skip it.
pass
return(dict_of_breast_cancer_genes)