in bindings/python/py_src/tokenizers/tools/visualizer.py [0:0]
def __make_html(text: str, encoding: Encoding, annotations: AnnotationList) -> str:
char_states = EncodingVisualizer.__make_char_states(text, encoding, annotations)
current_consecutive_chars = [char_states[0]]
prev_anno_ix = char_states[0].anno_ix
spans = []
label_colors_dict = EncodingVisualizer.calculate_label_colors(annotations)
cur_anno_ix = char_states[0].anno_ix
if cur_anno_ix is not None:
# If we started in an annotation make a span for it
anno = annotations[cur_anno_ix]
label = anno.label
color = label_colors_dict[label]
spans.append(f'<span class="annotation" style="color:{color}" data-label="{label}">')
for cs in char_states[1:]:
cur_anno_ix = cs.anno_ix
if cur_anno_ix != prev_anno_ix:
# If we've transitioned in or out of an annotation
spans.append(
# Create a span from the current consecutive characters
EncodingVisualizer.consecutive_chars_to_html(
current_consecutive_chars,
text=text,
encoding=encoding,
)
)
current_consecutive_chars = [cs]
if prev_anno_ix is not None:
# if we transitioned out of an annotation close it's span
spans.append("</span>")
if cur_anno_ix is not None:
# If we entered a new annotation make a span for it
anno = annotations[cur_anno_ix]
label = anno.label
color = label_colors_dict[label]
spans.append(f'<span class="annotation" style="color:{color}" data-label="{label}">')
prev_anno_ix = cur_anno_ix
if cs.partition_key() == current_consecutive_chars[0].partition_key():
# If the current charchter is in the same "group" as the previous one
current_consecutive_chars.append(cs)
else:
# Otherwise we make a span for the previous group
spans.append(
EncodingVisualizer.consecutive_chars_to_html(
current_consecutive_chars,
text=text,
encoding=encoding,
)
)
# An reset the consecutive_char_list to form a new group
current_consecutive_chars = [cs]
# All that's left is to fill out the final span
# TODO I think there is an edge case here where an annotation's span might not close
spans.append(
EncodingVisualizer.consecutive_chars_to_html(
current_consecutive_chars,
text=text,
encoding=encoding,
)
)
res = HTMLBody(spans) # Send the list of spans to the body of our html
return res