def validate_json()

in aoai/token_count_utils.py [0:0]


def validate_json(parsed_data) -> Union[bool, str]:
    """
    Validate the parsed JSON data to ensure it has the required keys and values.
    
    Args:
        parsed_data: (dict): Parsed JSON data.

    Returns:
        bool: True if the JSON data is valid, False otherwise.
        message: str: Error message if the JSON data is invalid.    
    """
    try:
        # Check if 'messages' key exists and is a list
        if "messages" not in parsed_data or not isinstance(parsed_data["messages"], list):
            logger.warning("Invalid JSON: 'messages' key is missing or not a list.")
            return False, "missing_messages_list"

        # Check if each message has the required keys according to its 'role'
        for message in parsed_data["messages"]:
            if not isinstance(message, dict):
                logger.warning(f"Invalid JSON: Each message should be a dictionary. Found: {type(message)}")
                return False, "missing_message_dict"

            # Check if 'role' key exists and is of type string
            if "role" not in message or not isinstance(message["role"], str):
                logger.warning(f"Invalid JSON: Each message should contain a 'role' key of type string.")
                return False, "missing_role_key"

            # Check required keys based on role
            role = message["role"]
            
            if role == "system":
                # 'system' role must have 'content'
                if "content" not in message or not isinstance(message["content"], str):
                    logger.warning(f"Invalid JSON: 'system' role must have a 'content' key of type string.")
                    return False, "content_key_missing"
                if not message["content"].strip():
                    logger.warning("Invalid JSON: 'system' role 'content' cannot be empty or only whitespace.")
                    return False, "content_empty"
            
            elif role == "user":
                # 'user' role must have 'content'
                if "content" not in message or not isinstance(message["content"], str):
                    logger.warning(f"Invalid JSON: 'user' role must have a 'content' key of type string.")
                    return False, "content_key_missing"
                if not message["content"].strip():
                    logger.warning("Invalid JSON: 'user' role 'content' cannot be empty or only whitespace.")
                    return False, "content_empty"
            
            elif role == "assistant":
                # The 'assistant' role must have at least one of 'content' or 'tool_calls'
                if ("content" not in message or not isinstance(message["content"], str)) and ("tool_calls" not in message):
                    logger.warning(f"Invalid JSON: 'assistant' role must have either 'content' or 'tool_calls'.")
                    return False, "content_or_tool_calls_missing"
                if "content" in message:
                    if not message["content"].strip():
                        logger.warning("Invalid JSON: 'assistant' role 'content' cannot be empty or only whitespace.")
                        return False, "content_empty"
                if "tool_calls" in message and not isinstance(message["tool_calls"], list):
                    logger.warning(f"Invalid JSON: 'tool_calls' must be a list if provided.")
                    return False, "tool_calls_not_list"

            elif role == "tool":
                # 'tool' role must have a 'tool_call_id'
                if "tool_call_id" not in message or not isinstance(message["tool_call_id"], str):
                    logger.warning(f"Invalid JSON: 'tool' role must have a 'tool_call_id' key of type string.")
                    return False, "tool_call_id_missing"
            
            else:
                logger.warning(f"Invalid JSON: Unknown role '{role}'.")
                return False, "unknown_role"

        # Validate 'tools' key (if necessary)
        if "tools" in parsed_data and not isinstance(parsed_data["tools"], list):
            logger.warning(f"Invalid JSON: 'tools' key must be a list if present.")
            return False, "tools_not_list"
        
        return True, "passed"

    except Exception as e:
        logger.warning(f"Error during validation: {e}")
        return False, "error_during_validation"