def create_parser()

in src/protein_structure/embedding_from_esmfold.py [0:0]


def create_parser():
    parser = argparse.ArgumentParser(
        description="Extract per-token representations and model outputs for sequences in a FASTA file"  # noqa
    )
    # dim len: [640, 1280, 2560, 5120]
    parser.add_argument(
        "--model_name",
        type=str,
        default="esm2_t36_3B_UR50D",
        help="PyTorch model file OR name of pretrained model to download (see README for models)",
        choices=["esm2_t30_150M_UR50D", "esm2_t33_650M_UR50D", "esm2_t36_3B_UR50D", "esm2_t48_15B_UR50D"]
    )
    parser.add_argument(
        "-name",
        type=str,
        default=None,
        help="sequence name.",
    )
    parser.add_argument(
        "-seq",
        type=str,
        default=None,
        help="sequence.",
    )
    parser.add_argument(
        '-i',
        "--file",
        type=str,
        help="FASTA/CSV file on which to extract representations",
    )
    parser.add_argument(
        '-o',
        "--output_dir",
        type=str,
        help="output directory for extracted representations",
    )

    parser.add_argument("--toks_per_batch", type=int, default=4096, help="maximum batch size")
    parser.add_argument(
        "--repr_layers",
        type=int,
        default=[-1],
        nargs="+",
        help="layers indices from which to extract representations (0 to num_layers, inclusive)",
    )
    parser.add_argument(
        "--include",
        type=str,
        nargs="+",
        choices=["mean", "per_tok", "bos", "contacts"],
        help="specify which representations to return",
        required=True,
    )
    parser.add_argument(
        "--truncation_seq_length",
        type=int,
        default=4094,
        help="truncate sequences longer than the given value",
    )
    parser.add_argument(
        "--try_failure",
        action="store_true",
        help="when CUDA Out of Memory, try to reduce the truncation_seq_length"
    )
    parser.add_argument("--nogpu", action="store_true", help="Do not use GPU even if available")
    return parser