in chunking/chunkers/multimodal_chunker.py [0:0]
def _replace_figures_in_sequence(self, content, figures):
"""
Replace all occurrences of <figure>...</figure> with <figure{id}> in the order
of the figures list. If we run out of <figure> tags or figures, we stop.
Args:
content (str): The document content containing figure tags.
figures (list): A list of figure dictionaries with 'id' keys.
Returns:
str: The updated content with figure tags replaced by identifiers.
"""
for fig in figures:
figure_id = fig.get("id")
if not figure_id:
continue
start_index = content.find("<figure>")
if start_index == -1:
break # no more <figure> tags
end_index = content.find("</figure>", start_index)
if end_index == -1:
break # malformed or missing closing </figure>
# Replace everything from <figure> to </figure> with <figure{id}>
content = (
content[:start_index]
+ f"<figure{figure_id}>"
+ content[end_index + len("</figure>"):]
)
return content