in python/ts-to-word.py [0:0]
def write_confidence_scores(document, stats, temp_files):
"""
Using the pre-build confidence stats list, create a summary table of confidence score
spreads, as well as a scatter-plot showing each word against the overall mean
:param document: Word document structure to write the table into
:param stats: Statistics for the confidence scores in the conversation
:param temp_files: List of temporary files for later deletion
:return:
"""
document.add_section(WD_SECTION.CONTINUOUS)
section_ptr = document.sections[-1]._sectPr
cols = section_ptr.xpath('./w:cols')[0]
cols.set(qn('w:num'), '2')
write_custom_text_header(document, "Word Confidence Scores")
# Start with the fixed headers
table = document.add_table(rows=1, cols=3)
table.style = document.styles[TABLE_STYLE_STANDARD]
table.alignment = WD_ALIGN_PARAGRAPH.LEFT
hdr_cells = table.rows[0].cells
hdr_cells[0].text = "Confidence"
hdr_cells[1].text = "Count"
hdr_cells[2].text = "Percentage"
parsedWords = stats["parsedWords"]
confidenceRanges = ["98% - 100%", "90% - 97%", "80% - 89%", "70% - 79%", "60% - 69%", "50% - 59%", "40% - 49%",
"30% - 39%", "20% - 29%", "10% - 19%", "0% - 9%"]
confidenceRangeStats = ["9.8", "9", "8", "7", "6", "5", "4", "3", "2", "1", "0"]
# Add on each row
shading_reqd = False
for confRange, rangeStats in zip(confidenceRanges, confidenceRangeStats):
row_cells = table.add_row().cells
row_cells[0].text = confRange
row_cells[1].text = str(stats[rangeStats])
row_cells[2].text = str(round(stats[rangeStats] / parsedWords * 100, 2)) + "%"
# Add highlighting to the row if required
if shading_reqd:
for column in range(0, 3):
set_table_cell_background_colour(row_cells[column], ALTERNATE_ROW_COLOUR)
shading_reqd = not shading_reqd
# Formatting transcript table widths, then move to the next column
widths = (Inches(1.2), Inches(0.8), Inches(0.8))
for row in table.rows:
for idx, width in enumerate(widths):
row.cells[idx].width = width
# Confidence of each word as scatter graph, and the mean as a line across
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4))
ax.scatter(stats["timestamps"], stats["accuracy"])
ax.plot([stats["timestamps"][0], stats["timestamps"][-1]], [statistics.mean(stats["accuracy"]),
statistics.mean(stats["accuracy"])], "r")
# Formatting
ax.set_xlabel("Time (seconds)")
ax.set_ylabel("Word Confidence (percent)")
ax.set_yticks(range(0, 101, 10))
fig.suptitle("Word Confidence During Transcription", fontsize=11, fontweight="bold")
ax.legend(["Word Confidence Mean", "Individual words"], loc="lower center")
# Write out the chart
chart_file_name = "./" + "chart.png"
plt.savefig(chart_file_name, facecolor="aliceblue")
temp_files.append(chart_file_name)
plt.clf()
document.add_picture(chart_file_name, width=Cm(8))
document.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.LEFT
document.add_paragraph()