in functions/source/keyword-extraction/lambda_function.py [0:0]
def decode_transcript(body):
"""Decode the transcript"""
# define punctuation
punc = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
kword_path = download_txt_path
# Assign data to variable
data = body
keywords = pandas.read_csv(kword_path, sep = "\n", header = None)
keys = {i.strip():i.strip().lower() for i in keywords[0]}
def get_key(val):
for key, value in keys.items():
if val == value:
return key
#keys
decoded_data = {"StartTime": [], "EndTime": [], "Speaker": [], "Transcript": [], "KeywordPresence": [], "Keywords": []}
# If speaker identification
if "Speaker" in data.keys():
decoded_data["StartTime"].append(convert_time_stamp(data["StartTime"]))
decoded_data["EndTime"].append(convert_time_stamp(data["EndTime"]))
decoded_data["Speaker"].append(data["Speaker"])
decoded_data["Transcript"].append(data["Transcript"])
# For each word in the segment...
seg = data['Transcript'].lower()
for ele in punc:
if ele in punc:
seg = seg.replace(ele, '')
kwrds = list(filter(lambda x: seg.find(x) != -1, [' '+i for i in keys.values()]))
kwrds = [j.strip() for j in kwrds]
kwrds = [get_key(k) for k in kwrds]
if kwrds == []:
decoded_data["KeywordPresence"].append(False)
else:
decoded_data["KeywordPresence"].append(True)
decoded_data["Keywords"].append(kwrds)
return decoded_data