def _compile_regex()

in sourcecode/scoring/topic_model.py [0:0]


  def _compile_regex(self):
    """Compile a single regex from all seed terms grouped by topic."""
    regex_patterns = {}
    for topic, patterns in self._seedTerms.items():
      mod_patterns = []
      for pattern in patterns:
        # If the pattern contains an escaped period (i.e. it's a URL), don't enforce the preceding whitespace or start-of-string.
        if "\\." in pattern:
          mod_patterns.append(pattern)
        else:
          mod_patterns.append(f"(\s|^){pattern}")
      group_name = f"{topic.name}"
      regex_patterns[group_name] = f"(?P<{group_name}>{'|'.join(mod_patterns)})"
    # Combine all groups into a single regex
    full_regex = "|".join(regex_patterns.values())
    return re.compile(full_regex)