def parse_table()

in 4-mmrag_tooluse/mmrag_bh.py [0:0]
38 lines of code
3 McCabe index (conditional complexity)

def parse_table(base64_image: str, table_title: str, report_date: str) -> Dict:
    """
    Parses a table from an image, formats it according to a predefined JSON schema,
    and saves the resulting JSON to the TABLE_JSON_FOLDER.
    """
    relevant_schema = schema_dict.get(table_title)

    if relevant_schema is None:
        logger.warning(f"No schema found for table title: {table_title}")
        return {}

    # Convert the schema to a formatted JSON string
    system_prompt = f"""
    You are an AI assistant tasked with extracting and structuring data from images containing tables.

    **Instructions:**
    - Extract all data from the provided table image.
    - Format the extracted data according to the JSON schema provided below.
    - Ensure that all fields are correctly populated and adhere strictly to the schema specifications.
    - Use the following values for additional fields:
      - `"title"`: "{table_title}"
      - `"report_date"`: "{report_date}"

    **Output Format:**
    - Provide the output strictly in JSON format without any additional text or explanations.
    """

    messages = [
        {
            "role": "system",
            "content": system_prompt
        },
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "Please extract and format the data from the following table image according to the provided JSON schema."},
                {"type": "image_url", "image_url": {
                    "url": f"data:image/png;base64,{base64_image}", "detail": "high"}}
            ]
        }
    ]

    try:
        response = client.chat.completions.create(
            model=Config.GPT_MODEL,
            response_format=relevant_schema,
            messages=messages,
            temperature=0.0
        )

        response_json = json.loads(response.choices[0].message.content)

        os.makedirs(Config.TABLE_JSON_FOLDER, exist_ok=True)
        filename = f"{table_title}_{report_date}.json".replace(" ", "_")
        file_path = os.path.join(Config.TABLE_JSON_FOLDER, filename)
        with open(file_path, 'w') as json_file:
            json.dump(response_json, json_file, indent=4)
        logger.info(f"Saved parsed table JSON to {file_path}")

        return response_json
    except Exception as e:
        logger.error(f"Error in parse_table: {e}")
        return {}