in sourcecode/scoring/topic_model.py [0:0]
def _compile_regex(self):
"""Compile a single regex from all seed terms grouped by topic."""
regex_patterns = {}
for topic, patterns in self._seedTerms.items():
mod_patterns = []
for pattern in patterns:
# If the pattern contains an escaped period (i.e. it's a URL), don't enforce the preceding whitespace or start-of-string.
if "\\." in pattern:
mod_patterns.append(pattern)
else:
mod_patterns.append(f"(\s|^){pattern}")
group_name = f"{topic.name}"
regex_patterns[group_name] = f"(?P<{group_name}>{'|'.join(mod_patterns)})"
# Combine all groups into a single regex
full_regex = "|".join(regex_patterns.values())
return re.compile(full_regex)