def decode_transcript()

in functions/source/keyword-extraction/lambda_function.py [0:0]


def decode_transcript(body):
    """Decode the transcript"""
    # define punctuation
    punc = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    kword_path = download_txt_path
    # Assign data to variable
    data = body
    keywords = pandas.read_csv(kword_path, sep = "\n", header = None)
    keys = {i.strip():i.strip().lower() for i in keywords[0]}
    def get_key(val): 
        for key, value in keys.items(): 
             if val == value: 
                 return key

    #keys 
    decoded_data = {"StartTime": [], "EndTime": [], "Speaker": [], "Transcript": [], "KeywordPresence": [], "Keywords": []}
    # If speaker identification
    if "Speaker" in data.keys():
        decoded_data["StartTime"].append(convert_time_stamp(data["StartTime"]))
        decoded_data["EndTime"].append(convert_time_stamp(data["EndTime"]))
        decoded_data["Speaker"].append(data["Speaker"])
        decoded_data["Transcript"].append(data["Transcript"])
        # For each word in the segment...
        
        seg = data['Transcript'].lower()
        for ele in punc:
            if ele in punc:
                seg = seg.replace(ele, '')
                
        kwrds = list(filter(lambda x: seg.find(x) != -1, [' '+i for i in keys.values()]))
        kwrds = [j.strip() for j in kwrds]
        kwrds = [get_key(k) for k in kwrds]

        if kwrds == []:
            decoded_data["KeywordPresence"].append(False)
        else:
            decoded_data["KeywordPresence"].append(True)
        decoded_data["Keywords"].append(kwrds)    

    return decoded_data