in create_only_with_pdfs/load_data.py [0:0]
def is_valid_question_or_answer(text):
if not text or text.strip() == "":
return False
# Define patterns that indicate code
patterns = [
r'\{.*?\}', # Matches { ... }
r'\[.*?\]', # Matches [ ... ]
r'<.*?>', # Matches < ... >
r'\b\d{1,3}(\.\d{1,3}){3}\b', # Matches IP addresses
r'\w+\.\w+', # Matches word.word patterns
r'\n\s*\n', # Matches two consecutive newlines
r'unanswerable', # Matches 'unanswerable' regardless of case
r'Q\d+: ', # Contains other questions
r'A\d+: ', # Contains other answers
]
return not any(re.search(pattern, text, re.IGNORECASE) for pattern in patterns)