in 4-mmrag_tooluse/mmrag_bh.py [0:0]
def analyze_image(base64_image: str, quarter_info: str) -> Dict:
system_prompt = f"""
Analyze the image below and determine if it contains graphs or tabular data.
- If the image contains a table:
- Shorten the table title to one of ["Free_Cash_Flow_Reconciliation", "Free_Cash_Flow_Less_Principal_Repayments", "Free_Cash_Flow_Less_Equipment_Finance Leases"].
- Transcribe the table's title under the "content_output" key.
- Set "image_category" to "table".
- If the image contains graphs:
- Set "image_category" to "graphs".
- Provide a detailed analysis/summary of the graphs, including:
- **Descriptions** of what each graph represents.
- **Key data points** presented as bullet points or numbered lists.
- **Insights or takeaways or trends** derived from the graphs.
The quarter information is: {quarter_info}. Please use that as the value for the JSON key "quarter_info".
"""
response = client.chat.completions.create(
model=Config.GPT_MODEL,
response_format={
"type": "json_schema",
"json_schema": {
"name": "image_analysis",
"schema": {
"type": "object",
"properties": {
"image_category": {"type": "string"},
"content_output": {"type": "string"},
"quarter_info": {"type": "string"}
},
"required": ["image_category", "content_output", "quarter_info"],
"additionalProperties": False
},
"strict": True
}
},
messages=[
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {
"url": f"data:image/png;base64,{base64_image}", "detail": "high"}}
]
}
],
temperature=0.0,
)
response_string = response.choices[0].message.content
response_dict = json.loads(response_string)
return response_dict