in src/protein_structure/embedding_from_esmfold.py [0:0]
def create_parser():
parser = argparse.ArgumentParser(
description="Extract per-token representations and model outputs for sequences in a FASTA file" # noqa
)
# dim len: [640, 1280, 2560, 5120]
parser.add_argument(
"--model_name",
type=str,
default="esm2_t36_3B_UR50D",
help="PyTorch model file OR name of pretrained model to download (see README for models)",
choices=["esm2_t30_150M_UR50D", "esm2_t33_650M_UR50D", "esm2_t36_3B_UR50D", "esm2_t48_15B_UR50D"]
)
parser.add_argument(
"-name",
type=str,
default=None,
help="sequence name.",
)
parser.add_argument(
"-seq",
type=str,
default=None,
help="sequence.",
)
parser.add_argument(
'-i',
"--file",
type=str,
help="FASTA/CSV file on which to extract representations",
)
parser.add_argument(
'-o',
"--output_dir",
type=str,
help="output directory for extracted representations",
)
parser.add_argument("--toks_per_batch", type=int, default=4096, help="maximum batch size")
parser.add_argument(
"--repr_layers",
type=int,
default=[-1],
nargs="+",
help="layers indices from which to extract representations (0 to num_layers, inclusive)",
)
parser.add_argument(
"--include",
type=str,
nargs="+",
choices=["mean", "per_tok", "bos", "contacts"],
help="specify which representations to return",
required=True,
)
parser.add_argument(
"--truncation_seq_length",
type=int,
default=4094,
help="truncate sequences longer than the given value",
)
parser.add_argument(
"--try_failure",
action="store_true",
help="when CUDA Out of Memory, try to reduce the truncation_seq_length"
)
parser.add_argument("--nogpu", action="store_true", help="Do not use GPU even if available")
return parser