def extract_text_per_page_from_sample()

in generation/llm_swarm_script.py [0:0]


def extract_text_per_page_from_sample(sample: Dict[str, Any]) -> List[str]:
    """
    Extracts text from each page of a given sample and returns it as a list of strings.

    Args:
        sample (Dict[str, Any]): The sample containing page data in JSON format.

    Returns:
        List[str]: A list of strings, where each string represents the text of a page.
    """
    texts = []
    for page in sample['json']['pages']:
        pages_text = ' \n '.join(page['lines']['text'])
        texts.append(pages_text)
    return texts