def _parse_hmmsearch_description()

in src/analysis/parsers.py [0:0]


def _parse_hmmsearch_description(description: str) -> HitMetadata:
  """Parses the hmmsearch A3M sequence description line."""
  # Example 1: >4pqx_A/2-217 [subseq from] mol:protein length:217  Free text
  # Example 2: >5g3r_A/1-55 [subseq from] mol:protein length:352
  match = re.match(
      r'^>?([a-z0-9]+)_(\w+)/([0-9]+)-([0-9]+).*protein length:([0-9]+) *(.*)$',
      description.strip())

  if not match:
    raise ValueError(f'Could not parse description: "{description}".')

  return HitMetadata(
      pdb_id=match[1],
      chain=match[2],
      start=int(match[3]),
      end=int(match[4]),
      length=int(match[5]),
      text=match[6])