in python/ts-to-word.py [0:0]
def write_transcribe_text(output_table, sentiment_enabled, analytics_mode, speech_segments, keyed_categories):
"""
Writes out each line of the transcript in the Word table structure, optionally including sentiments
:param output_table: Word document structure to write the table into
:param sentiment_enabled: Flag to indicate we need to show some sentiment
:param analytics_mode: Flag to indicate we're in Analytics mode, not Standard
:param speech_segments: Turn-by-turn speech list
:param keyed_categories: List of categories identified at any timestamps
"""
# Load our image files if we have sentiment enabled
if sentiment_enabled:
png_smile = load_image(IMAGE_URL_SMILE)
png_frown = load_image(IMAGE_URL_FROWN)
png_neutral = load_image(IMAGE_URL_NEUTRAL)
content_col_offset = 0
else:
# Ensure we offset the CONTENT column correctly due to no sentiment
content_col_offset = -1
# Create a row populate it for each segment that we have
shading_reqd = False
for segment in speech_segments:
# Before we start, does an angory start at this time?
start_in_millis = segment.segmentStartTime * 1000.0
end_in_millis = segment.segmentEndTime * 1000.0
if start_in_millis in keyed_categories:
insert_category_row(content_col_offset, keyed_categories, output_table, start_in_millis)
keyed_categories.pop(start_in_millis)
# Start with the easy stuff
row_cells = output_table.add_row().cells
row_cells[COL_STARTTIME].text = convert_timestamp(segment.segmentStartTime)
row_cells[COL_ENDTIME].text = f"{(segment.segmentEndTime - segment.segmentStartTime):.1f}s"
row_cells[COL_SPEAKER].text = segment.segmentSpeaker
# Mark the start of the turn as INTERRUPTED if that's the case
if segment.segmentInterruption:
run = row_cells[COL_CONTENT + content_col_offset].paragraphs[0].add_run("[INTERRUPTION]")
set_transcript_text_style(run, True, confidence=0.0)
row_cells[COL_CONTENT + content_col_offset].paragraphs[0].add_run(" ")
# Issues are in order - pick out the stop/start tuples
issues = segment.segmentIssuesDetected.copy()
if len(issues) > 0:
next_issue = issues.pop()
else:
next_issue = {}
# Then do each word with confidence-level colouring
text_index = 1
live_issue = False
for eachWord in segment.segmentConfidence:
# Are we at the start of an issue?
if len(next_issue) > 0:
if (next_issue["Begin"] == 0) or (next_issue["Begin"] == text_index):
# If so, start the highlighting run
run = row_cells[COL_CONTENT + content_col_offset].paragraphs[0].add_run(" [ISSUE]")
set_transcript_text_style(run, True, confidence=0.0)
live_issue = True
# Output the next word, with the correct confidence styling and forced backgroud
run = row_cells[COL_CONTENT + content_col_offset].paragraphs[0].add_run(eachWord["text"])
text_index += len(eachWord["text"])
confLevel = eachWord["confidence"]
set_transcript_text_style(run, live_issue, confidence=confLevel)
# Has our issue now finished?
if live_issue and next_issue["End"] <= text_index:
# Yes - stop highlighting, and pick up any pending issue left on this line
live_issue = False
if len(issues) > 0:
next_issue = issues.pop()
else:
next_issue = {}
# If enabled, finish with the base sentiment for the segment - don't write out
# score if it turns out that this segment ie neither Negative nor Positive
if sentiment_enabled:
if segment.segmentIsPositive or segment.segmentIsNegative:
paragraph = row_cells[COL_SENTIMENT].paragraphs[0]
img_run = paragraph.add_run()
if segment.segmentIsPositive:
img_run.add_picture(png_smile, width=Mm(4))
else:
img_run.add_picture(png_frown, width=Mm(4))
# We only have turn-by-turn sentiment score values in non-analytics mode
if not analytics_mode:
text_run = paragraph.add_run(' (' + str(segment.segmentSentimentScore)[:4] + ')')
text_run.font.size = Pt(7)
text_run.font.italic = True
else:
row_cells[COL_SENTIMENT].paragraphs[0].add_run().add_picture(png_neutral, width=Mm(4))
# Add highlighting to the row if required
if shading_reqd:
for column in range(0, COL_CONTENT + content_col_offset + 1):
set_table_cell_background_colour(row_cells[column], ALTERNATE_ROW_COLOUR)
shading_reqd = not shading_reqd
# Check if a category occurs in the middle of a segment - put it after the segment, as timestamp is "later"
for category_start in keyed_categories.copy().keys():
if (start_in_millis < category_start) and (category_start < end_in_millis):
insert_category_row(content_col_offset, keyed_categories, output_table, category_start)
keyed_categories.pop(category_start)
# Before we end, does an analytics category start with this line's end time?
if end_in_millis in keyed_categories:
# If so, write out the line after this
insert_category_row(content_col_offset, keyed_categories, output_table, end_in_millis)
keyed_categories.pop(end_in_millis)