in aoai/token_count_utils.py [0:0]
def validate_json(parsed_data) -> Union[bool, str]:
"""
Validate the parsed JSON data to ensure it has the required keys and values.
Args:
parsed_data: (dict): Parsed JSON data.
Returns:
bool: True if the JSON data is valid, False otherwise.
message: str: Error message if the JSON data is invalid.
"""
try:
# Check if 'messages' key exists and is a list
if "messages" not in parsed_data or not isinstance(parsed_data["messages"], list):
logger.warning("Invalid JSON: 'messages' key is missing or not a list.")
return False, "missing_messages_list"
# Check if each message has the required keys according to its 'role'
for message in parsed_data["messages"]:
if not isinstance(message, dict):
logger.warning(f"Invalid JSON: Each message should be a dictionary. Found: {type(message)}")
return False, "missing_message_dict"
# Check if 'role' key exists and is of type string
if "role" not in message or not isinstance(message["role"], str):
logger.warning(f"Invalid JSON: Each message should contain a 'role' key of type string.")
return False, "missing_role_key"
# Check required keys based on role
role = message["role"]
if role == "system":
# 'system' role must have 'content'
if "content" not in message or not isinstance(message["content"], str):
logger.warning(f"Invalid JSON: 'system' role must have a 'content' key of type string.")
return False, "content_key_missing"
if not message["content"].strip():
logger.warning("Invalid JSON: 'system' role 'content' cannot be empty or only whitespace.")
return False, "content_empty"
elif role == "user":
# 'user' role must have 'content'
if "content" not in message or not isinstance(message["content"], str):
logger.warning(f"Invalid JSON: 'user' role must have a 'content' key of type string.")
return False, "content_key_missing"
if not message["content"].strip():
logger.warning("Invalid JSON: 'user' role 'content' cannot be empty or only whitespace.")
return False, "content_empty"
elif role == "assistant":
# The 'assistant' role must have at least one of 'content' or 'tool_calls'
if ("content" not in message or not isinstance(message["content"], str)) and ("tool_calls" not in message):
logger.warning(f"Invalid JSON: 'assistant' role must have either 'content' or 'tool_calls'.")
return False, "content_or_tool_calls_missing"
if "content" in message:
if not message["content"].strip():
logger.warning("Invalid JSON: 'assistant' role 'content' cannot be empty or only whitespace.")
return False, "content_empty"
if "tool_calls" in message and not isinstance(message["tool_calls"], list):
logger.warning(f"Invalid JSON: 'tool_calls' must be a list if provided.")
return False, "tool_calls_not_list"
elif role == "tool":
# 'tool' role must have a 'tool_call_id'
if "tool_call_id" not in message or not isinstance(message["tool_call_id"], str):
logger.warning(f"Invalid JSON: 'tool' role must have a 'tool_call_id' key of type string.")
return False, "tool_call_id_missing"
else:
logger.warning(f"Invalid JSON: Unknown role '{role}'.")
return False, "unknown_role"
# Validate 'tools' key (if necessary)
if "tools" in parsed_data and not isinstance(parsed_data["tools"], list):
logger.warning(f"Invalid JSON: 'tools' key must be a list if present.")
return False, "tools_not_list"
return True, "passed"
except Exception as e:
logger.warning(f"Error during validation: {e}")
return False, "error_during_validation"