def search_raw_text_for_breast_cancer_genes()

in files/lambda_code/call_cm_other_models_lambda.py [0:0]


def search_raw_text_for_breast_cancer_genes(raw_text=None):
    ''''search the raw output for breast cancer genes. This searches the entire text; not just Comprehend Medical identified entities.'''
    list_to_search=raw_text.split()
    dict_of_breast_cancer_genes={}
    dict_of_breast_cancer_genes['BREAST_CANCER_GENES_FOUND']=[]
    for i in range(0,len(list_to_search)):
        try:
            the_text=list_to_search[i]
            max_score=0
            #get score of best matching gene
            for j in range(0,len(list_of_genes)):
                the_score=get_match_score(string_1=list_of_genes[j],string_2=the_text,normalize_max=True)
                if the_score >= max_score:
                    max_score=the_score
                    the_index=j
            gene_score=max_score
            if gene_score >=.75:   #hard code a score threshold. Do not report scores less than this.
                dict_of_breast_cancer_genes['BREAST_CANCER_GENES_FOUND'].append({the_text:list_of_genes[the_index]})
        except: #if the dictionary entry doesn't match, just skip it.
            pass
    return(dict_of_breast_cancer_genes)