def truncate_stockholm_msa()

in src/analysis/parsers.py [0:0]


def truncate_stockholm_msa(stockholm_msa_path: str, max_sequences: int) -> str:
  """Reads + truncates a Stockholm file while preventing excessive RAM usage."""
  seqnames = set()
  filtered_lines = []

  with open(stockholm_msa_path) as f:
    for line in f:
      if line.strip() and not line.startswith(('#', '//')):
        # Ignore blank lines, markup and end symbols - remainder are alignment
        # sequence parts.
        seqname = line.partition(' ')[0]
        seqnames.add(seqname)
        if len(seqnames) >= max_sequences:
          break

    f.seek(0)
    for line in f:
      if _keep_line(line, seqnames):
        filtered_lines.append(line)

  return ''.join(filtered_lines)