in python/ts-to-word.py [0:0]
def write(cli_arguments, speech_segments, job_status):
"""
Write a transcript from the .json transcription file and other data generated
by the results parser, putting it all into a human-readable Word document
:param cli_arguments: CLI arguments used for this processing run
:param speech_segments: List of call speech segments
:param job_status: Status of the Transcribe job
"""
json_filepath = Path(cli_arguments.inputFile)
data = json.load(open(json_filepath.absolute(), "r", encoding="utf-8"))
sentimentEnabled = (cli_arguments.sentiment == 'on')
tempFiles = []
# Initiate Document, orientation and margins
document = Document()
document.sections[0].left_margin = Mm(19.1)
document.sections[0].right_margin = Mm(19.1)
document.sections[0].top_margin = Mm(19.1)
document.sections[0].bottom_margin = Mm(19.1)
document.sections[0].page_width = Mm(210)
document.sections[0].page_height = Mm(297)
# Set the base font and document title
font = document.styles["Normal"].font
font.name = "Calibri"
font.size = Pt(10)
# Create our custom text header style
custom_style = document.styles.add_style(CUSTOM_STYLE_HEADER, WD_STYLE_TYPE.PARAGRAPH)
custom_style.paragraph_format.widow_control = True
custom_style.paragraph_format.keep_with_next = True
custom_style.paragraph_format.space_after = Pt(0)
custom_style.font.size = font.size
custom_style.font.name = font.name
custom_style.font.bold = True
custom_style.font.italic = True
# Intro banner header
document.add_picture(load_image(IMAGE_URL_BANNER), width=Mm(171))
# Pull out header information - some from the JSON, but most only exists in the Transcribe job status
if cli_arguments.analyticsMode:
# We need 2 columns only if we're in analytics mode, as we put the charts on the right of the table
document.add_section(WD_SECTION.CONTINUOUS)
section_ptr = document.sections[-1]._sectPr
cols = section_ptr.xpath('./w:cols')[0]
cols.set(qn('w:num'), '2')
# Write put the call summary table - depending on the mode that Transcribe was used in, and
# if the request is being run on a JSON results file rather than reading the job info from Transcribe,
# not all of the information is available.
# -- Media information
# -- Amazon Transcribe job information
# -- Average transcript word-confidence scores
write_custom_text_header(document, "Call Summary")
table = document.add_table(rows=1, cols=2)
table.style = document.styles[TABLE_STYLE_STANDARD]
table.alignment = WD_ALIGN_PARAGRAPH.LEFT
hdr_cells = table.rows[0].cells
hdr_cells[0].text = "Job Name"
if cli_arguments.analyticsMode:
hdr_cells[1].text = data["JobName"]
else:
hdr_cells[1].text = data["jobName"]
job_data = []
# Audio duration is the end-time of the final voice segment, which might be shorter than the actual file duration
if len(speech_segments) > 0:
audio_duration = speech_segments[-1].segmentEndTime
dur_text = str(int(audio_duration / 60)) + "m " + str(round(audio_duration % 60, 2)) + "s"
job_data.append({"name": "Audio Duration", "value": dur_text})
# We can infer diarization mode from the JSON results data structure
if cli_arguments.analyticsMode:
job_data.append({"name": "Audio Ident", "value": "Call Analytics"})
elif "speaker_labels" in data["results"]:
job_data.append({"name": "Audio Ident", "value": "Speaker-separated"})
else:
job_data.append({"name": "Audio Ident", "value": "Channel-separated"})
# Some information is only in the job status
if job_status is not None:
job_data.append({"name": "Language", "value": job_status["LanguageCode"]})
job_data.append({"name": "File Format", "value": job_status["MediaFormat"]})
job_data.append({"name": "Sample Rate", "value": str(job_status["MediaSampleRateHertz"]) + " Hz"})
job_data.append({"name": "Job Created", "value": job_status["CreationTime"].strftime("%a %d %b '%y at %X")})
if "ContentRedaction" in job_status["Settings"]:
redact_type = job_status["Settings"]["ContentRedaction"]["RedactionType"]
redact_output = job_status["Settings"]["ContentRedaction"]["RedactionOutput"]
job_data.append({"name": "Redaction Mode", "value": redact_type + " [" + redact_output + "]"})
if "VocabularyFilterName" in job_status["Settings"]:
vocab_filter = job_status["Settings"]["VocabularyFilterName"]
vocab_method = job_status["Settings"]["VocabularyFilterMethod"]
job_data.append({"name": "Vocabulary Filter", "value": vocab_filter + " [" + vocab_method + "]"})
if "VocabularyName" in job_status["Settings"]:
job_data.append({"name": "Custom Vocabulary", "value": job_status["Settings"]["VocabularyName"]})
# Finish with the confidence scores (if we have any)
stats = generate_confidence_stats(speech_segments)
if len(stats["accuracy"]) > 0:
job_data.append({"name": "Avg. Confidence", "value": str(round(statistics.mean(stats["accuracy"]), 2)) + "%"})
# Place all of our job-summary fields into the Table, one row at a time
for next_row in job_data:
row_cells = table.add_row().cells
row_cells[0].text = next_row["name"]
row_cells[1].text = next_row["value"]
# Formatting transcript table widths
widths = (Cm(3.44), Cm(4.89))
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
# Spacer paragraph
document.add_paragraph()
# Conversational Analytics (other column) if enabled
# -- Caller sentiment graph
# -- Talk time split
if cli_arguments.analyticsMode:
write_header_graphs(data, document, tempFiles)
# At this point, if we have no transcript then we need to quickly exit
if len(speech_segments) == 0:
document.add_section(WD_SECTION.CONTINUOUS)
section_ptr = document.sections[-1]._sectPr
cols = section_ptr.xpath('./w:cols')[0]
cols.set(qn('w:num'), '1')
write_custom_text_header(document, "This call had no audible speech to transcribe.")
else:
# Conversational Analytics (new Section)
# -- Show speaker loudness graph, with sentiment, interrupts and non-talk time highlighted
# -- Show a summary of any call analytics categories detected
# -- Show a summary of any issues detected in the transcript
# -- Process and display speaker sentiment by period
if cli_arguments.analyticsMode:
build_call_loudness_charts(document, speech_segments, data["ConversationCharacteristics"]["Interruptions"],
data["ConversationCharacteristics"]["NonTalkTime"],
data["ConversationCharacteristics"]["TalkTime"], tempFiles)
keyed_categories = write_detected_categories(document, data["Categories"]["MatchedDetails"])
write_detected_issue_summaries(document, speech_segments)
write_analytics_sentiment(data, document)
else:
# No analytics => no categories
keyed_categories = {}
# Process and display transcript by speaker segments (new section)
# -- Conversation "turn" start time and duration
# -- Speaker identification
# -- Sentiment type (if enabled) and sentiment score (if available)
# -- Transcribed text with (if available) Call Analytics markers
document.add_section(WD_SECTION.CONTINUOUS)
section_ptr = document.sections[-1]._sectPr
cols = section_ptr.xpath('./w:cols')[0]
cols.set(qn('w:num'), '1')
write_custom_text_header(document, "Call Transcription")
document.add_paragraph() # Spacing
write_small_header_text(document, "WORD CONFIDENCE: >= 90% in black, ", 0.9)
write_small_header_text(document, ">= 50% in brown, ", 0.5)
write_small_header_text(document, "< 50% in red", 0.49)
table_cols = 4
if sentimentEnabled or cli_arguments.analyticsMode:
# Ensure that we add space for the sentiment column
table_cols += 1
content_col_offset = 0
else:
# Will need to shift the content column to the left, as Sentiment isn't there now
content_col_offset = -1
table = document.add_table(rows=1, cols=table_cols)
table.style = document.styles[TABLE_STYLE_STANDARD]
hdr_cells = table.rows[0].cells
hdr_cells[COL_STARTTIME].text = "Start"
hdr_cells[COL_ENDTIME].text = "Dur."
hdr_cells[COL_SPEAKER].text = "Speaker"
hdr_cells[COL_CONTENT + content_col_offset].text = "Transcription"
# Based upon our segment list, write out the transcription table
write_transcribe_text(table, sentimentEnabled or cli_arguments.analyticsMode, cli_arguments.analyticsMode,
speech_segments, keyed_categories)
document.add_paragraph()
# Formatting transcript table widths - we need to add sentiment
# column if needed, and it and the content width accordingly
widths = [Inches(0.8), Inches(0.5), Inches(0.5), 0]
if sentimentEnabled:
# Comprehend sentiment needs space for the icon and % score
widths.append(0)
widths[COL_CONTENT + + content_col_offset] = Inches(7)
widths[COL_SENTIMENT] = Inches(0.7)
elif cli_arguments.analyticsMode:
# Analytics sentiment just needs an icon
widths.append(0)
widths[COL_CONTENT + + content_col_offset] = Inches(7.4)
widths[COL_SENTIMENT] = Inches(0.3)
else:
widths[COL_CONTENT + content_col_offset] = Inches(7.7)
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
# Setup the repeating header
set_repeat_table_header(table.rows[0])
# Display confidence count table, if requested (new section)
# -- Summary table of confidence scores into "bins"
# -- Scatter plot of confidence scores over the whole transcript
if cli_arguments.confidence == 'on':
write_confidence_scores(document, stats, tempFiles)
document.add_section(WD_SECTION.CONTINUOUS)
# Generate our raw data for the Comprehend sentiment graph (if requested)
if sentimentEnabled:
write_comprehend_sentiment(document, speech_segments, tempFiles)
# Save the whole document
document.save(cli_arguments.outputFile)
# Now delete any local images that we created
for filename in tempFiles:
os.remove(filename)