def is_valid_question_or_answer()

in clean_and_create/load_data.py [0:0]


def is_valid_question_or_answer(text):
    if not text or text.strip() == "":
        return False

    # Define patterns that indicate code
    patterns = [
        r'\{.*?\}',  # Matches { ... }
        r'\[.*?\]',  # Matches [ ... ]
        r'<.*?>',    # Matches < ... >
        r'\b\d{1,3}(\.\d{1,3}){3}\b',  # Matches IP addresses
        r'\w+\.\w+',  # Matches word.word patterns
        r'\n\s*\n',  # Matches two consecutive newlines
        r'unanswerable',  # Matches 'unanswerable' regardless of case
        r'Q\d+: ',  # Contains other questions
        r'A\d+: ',  # Contains other answers
    ]
    return not any(re.search(pattern, text, re.IGNORECASE) for pattern in patterns)