def extract_data_points()

in orchestration/orchestrator.py [0:0]


    def extract_data_points(chat_log) -> list:
        data_points = []
        call_id_map = {}
        allowed_extensions = {'vtt', 'xlsx', 'xls', 'pdf', 'png', 'jpeg', 'jpg', 'bmp', 'tiff', 'docx', 'pptx'}
        extension_pattern = "|".join(allowed_extensions)
        pattern = rf'[\w\-.]+\.(?:{extension_pattern}): .*?(?=(?:[\w\-.]+\.(?:{extension_pattern})\:)|$)'
        if chat_log:
            for msg in chat_log:
                try:
                    if "message_type" in msg and "content" in msg and isinstance(msg["content"], list) and msg["content"]:
                        if msg["message_type"] == "ToolCallRequestEvent":
                            content = msg["content"][0]
                            if "id='" in content:
                                call_id = content.split("id='")[1].split("',")[0]
                                call_id_map[call_id] = None
                        elif msg["message_type"] == "ToolCallExecutionEvent":
                            content = msg["content"][0]
                            if "call_id='" in content:
                                call_id = content.split("call_id='")[1].split("')")[0]
                                if call_id in call_id_map:
                                    if "content='" in content:
                                        data = content.split("content='")[1].rsplit("',", 1)[0]
                                        entries = re.findall(pattern, data, re.DOTALL | re.IGNORECASE)
                                        data_points.extend(entries)
                except Exception as e:
                    logging.warning(f"[orchestrator] Error processing message: {e}.")
        else:
            logging.warning("[orchestrator] Chat log is empty or not provided.")
        return data_points