def write_confidence_scores()

in python/ts-to-word.py [0:0]


def write_confidence_scores(document, stats, temp_files):
    """
    Using the pre-build confidence stats list, create a summary table of confidence score
    spreads, as well as a scatter-plot showing each word against the overall mean

    :param document: Word document structure to write the table into
    :param stats: Statistics for the confidence scores in the conversation
    :param temp_files: List of temporary files for later deletion
    :return:
    """
    document.add_section(WD_SECTION.CONTINUOUS)
    section_ptr = document.sections[-1]._sectPr
    cols = section_ptr.xpath('./w:cols')[0]
    cols.set(qn('w:num'), '2')
    write_custom_text_header(document, "Word Confidence Scores")
    # Start with the fixed headers
    table = document.add_table(rows=1, cols=3)
    table.style = document.styles[TABLE_STYLE_STANDARD]
    table.alignment = WD_ALIGN_PARAGRAPH.LEFT
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = "Confidence"
    hdr_cells[1].text = "Count"
    hdr_cells[2].text = "Percentage"
    parsedWords = stats["parsedWords"]
    confidenceRanges = ["98% - 100%", "90% - 97%", "80% - 89%", "70% - 79%", "60% - 69%", "50% - 59%", "40% - 49%",
                        "30% - 39%", "20% - 29%", "10% - 19%", "0% - 9%"]
    confidenceRangeStats = ["9.8", "9", "8", "7", "6", "5", "4", "3", "2", "1", "0"]
    # Add on each row
    shading_reqd = False
    for confRange, rangeStats in zip(confidenceRanges, confidenceRangeStats):
        row_cells = table.add_row().cells
        row_cells[0].text = confRange
        row_cells[1].text = str(stats[rangeStats])
        row_cells[2].text = str(round(stats[rangeStats] / parsedWords * 100, 2)) + "%"

        # Add highlighting to the row if required
        if shading_reqd:
            for column in range(0, 3):
                set_table_cell_background_colour(row_cells[column], ALTERNATE_ROW_COLOUR)
        shading_reqd = not shading_reqd

    # Formatting transcript table widths, then move to the next column
    widths = (Inches(1.2), Inches(0.8), Inches(0.8))
    for row in table.rows:
        for idx, width in enumerate(widths):
            row.cells[idx].width = width
    # Confidence of each word as scatter graph, and the mean as a line across
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 4))
    ax.scatter(stats["timestamps"], stats["accuracy"])
    ax.plot([stats["timestamps"][0], stats["timestamps"][-1]], [statistics.mean(stats["accuracy"]),
                                                                statistics.mean(stats["accuracy"])], "r")
    # Formatting
    ax.set_xlabel("Time (seconds)")
    ax.set_ylabel("Word Confidence (percent)")
    ax.set_yticks(range(0, 101, 10))
    fig.suptitle("Word Confidence During Transcription", fontsize=11, fontweight="bold")
    ax.legend(["Word Confidence Mean", "Individual words"], loc="lower center")
    # Write out the chart
    chart_file_name = "./" + "chart.png"
    plt.savefig(chart_file_name, facecolor="aliceblue")
    temp_files.append(chart_file_name)
    plt.clf()
    document.add_picture(chart_file_name, width=Cm(8))
    document.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.LEFT
    document.add_paragraph()