in 4-mmrag_tooluse/mmrag_bh.py [0:0]
def parse_table(base64_image: str, table_title: str, report_date: str) -> Dict:
"""
Parses a table from an image, formats it according to a predefined JSON schema,
and saves the resulting JSON to the TABLE_JSON_FOLDER.
"""
relevant_schema = schema_dict.get(table_title)
if relevant_schema is None:
logger.warning(f"No schema found for table title: {table_title}")
return {}
# Convert the schema to a formatted JSON string
system_prompt = f"""
You are an AI assistant tasked with extracting and structuring data from images containing tables.
**Instructions:**
- Extract all data from the provided table image.
- Format the extracted data according to the JSON schema provided below.
- Ensure that all fields are correctly populated and adhere strictly to the schema specifications.
- Use the following values for additional fields:
- `"title"`: "{table_title}"
- `"report_date"`: "{report_date}"
**Output Format:**
- Provide the output strictly in JSON format without any additional text or explanations.
"""
messages = [
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": [
{"type": "text", "text": "Please extract and format the data from the following table image according to the provided JSON schema."},
{"type": "image_url", "image_url": {
"url": f"data:image/png;base64,{base64_image}", "detail": "high"}}
]
}
]
try:
response = client.chat.completions.create(
model=Config.GPT_MODEL,
response_format=relevant_schema,
messages=messages,
temperature=0.0
)
response_json = json.loads(response.choices[0].message.content)
os.makedirs(Config.TABLE_JSON_FOLDER, exist_ok=True)
filename = f"{table_title}_{report_date}.json".replace(" ", "_")
file_path = os.path.join(Config.TABLE_JSON_FOLDER, filename)
with open(file_path, 'w') as json_file:
json.dump(response_json, json_file, indent=4)
logger.info(f"Saved parsed table JSON to {file_path}")
return response_json
except Exception as e:
logger.error(f"Error in parse_table: {e}")
return {}