in build_obelics/13_final_processing.py [0:0]
def final_cleaning_node_level(texts, images, metadata):
new_texts = []
new_images = []
new_metadata = []
previous_is_text = False
for text, image, meta in zip(texts, images, metadata):
if text is not None:
assert image is None
assert meta is None
if text == "":
continue
if previous_is_text:
new_texts[-1] = new_texts[-1] + "\n\n" + text
else:
new_texts.append(text)
new_images.append(None)
new_metadata.append(None)
previous_is_text = True
elif image is not None:
assert (text is None) and (meta is not None)
new_texts.append(None)
new_images.append(image)
new_metadata.append(meta)
previous_is_text = False
elif meta is not None:
raise ValueError("metadata cannot be != None if text and image are None")
assert len(new_texts) == len(new_images) == len(new_metadata)
return new_texts, new_images, new_metadata