in yourbench/utils/parsing_engine.py [0:0]
def extract_content_from_xml_tags(full_content, xml_tag):
# This function extracts the content between the XML tags
# It uses regex to find the content and includes error handling
# Define the regex patterns to match the content
pattern_with_closing_tag = f"<{xml_tag}>(.*?)</{xml_tag}>"
pattern_without_closing_tag = f"<{xml_tag}>(.*)"
try:
# First, try to find matches with both opening and closing tags
matches_with_closing = re.findall(pattern_with_closing_tag, full_content, re.DOTALL)
if matches_with_closing:
return matches_with_closing[0].strip()
# If no matches found, try to find content with only opening tag
matches_without_closing = re.findall(pattern_without_closing_tag, full_content, re.DOTALL)
if matches_without_closing:
return matches_without_closing[0].strip()
# If still no matches found, return an empty string
return ""
except Exception as extraction_error:
logger.error(f"Error extracting content from XML tags: {extraction_error}")
return ""