in vision/amazon-sagemaker-pytorch-detectron2/container_training/sku-110k/training.py [0:0]
def _parse_args() -> argparse.Namespace:
r"""Define training script API according to the argument that are parsed from the CLI
Returns
-------
argparse.Namespace
training script arguments, execute $(python $thisfile --help) for detailed documentation
"""
parser = argparse.ArgumentParser()
# Pretrained model
parser.add_argument(
"--model-type",
type=str,
default="faster_rcnn",
choices=["faster_rcnn", "retinanet"],
metavar="MT",
help=(
"Type of architecture to be used for object detection; "
"two options are supported: 'faster_rccn' and 'retinanet' "
"(default: faster_rcnn)"
),
)
parser.add_argument(
"--backbone",
type=str,
default="R_50_C4",
choices=[
"R_50_C4",
"R_50_DC5",
"R_50_FPN",
"R_101_C4",
"R_101_DC5",
"R_101_FPN",
"X_101_32x8d_FPN",
],
metavar="B",
help=(
"Encoder backbone, how to read this field: "
"R50 (RetinaNet-50), R100 (RetinaNet-100), X101 (ResNeXt-101); "
"C4 (Use a ResNet conv4 backbone with conv5 head), "
"DC5 (ResNet conv5 backbone with dilations in conv5) "
"FPN (Use a FPN on top of resnet) ;"
"Attention! Only some combinations are supported, please refer to the original doc "
"(https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) "
"(default: R_50_C4)"
),
)
parser.add_argument(
"--lr-schedule",
type=int,
default=1,
choices=[1, 3],
metavar="LRS",
help=(
"Length of the training schedule, two values are supported: 1 or 3. "
"1x = 16 images / it * 90,000 iterations in total with the LR reduced at 60k and 80k."
"3x = 16 images / it * 270,000 iterations in total with the LR reduced at 210k and 250k"
"(default: 1)"
),
)
# Hyper-parameters
parser.add_argument(
"--num-workers",
type=int,
default=2,
metavar="NW",
help="Number of workers used to by the data loader (default: 2)",
)
parser.add_argument(
"--lr",
type=float,
default=0.00025,
metavar="LR",
help="Base learning rate value (default: 0.00025)",
)
parser.add_argument(
"--num-iter",
type=int,
default=1000,
metavar="I",
help="Maximum number of iterations (default: 1000)",
)
parser.add_argument(
"--batch-size",
type=int,
default=16,
metavar="B",
help="Number of images per batch across all machines (default: 16)",
)
parser.add_argument(
"--num-rpn",
type=int,
default=100,
metavar="R",
help="Total number of RPN examples per image (default: 100)",
)
parser.add_argument(
"--reg-loss-type",
type=str,
default="smooth_l1",
choices=["smooth_l1", "giou"],
metavar="RLT",
help=("Loss type used for regression subnet " "(default: smooth_l1)"),
)
# RetinaNet Specific
parser.add_argument(
"--focal-loss-gamma",
type=float,
default=2.0,
metavar="FLG",
help="Focal loss gamma, used in RetinaNet (default: 2.0)",
)
parser.add_argument(
"--focal-loss-alpha",
type=float,
default=0.25,
metavar="FLA",
help="Focal loss alpha, used in RetinaNet. It must be in [0.1,1] (default: 0.25)",
)
# Faster-RCNN Specific
parser.add_argument(
"--bbox-reg-loss-weight",
type=float,
default=1.0,
help="Weight regression loss (default: 0.1)",
)
parser.add_argument(
"--bbox-rpn-pos-fraction",
type=float,
default=0.5,
help="Target fraction of foreground (positive) examples per RPN minibatch (default: 0.5)",
)
parser.add_argument(
"--bbox-head-pos-fraction",
type=float,
default=0.25,
help=(
"Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) "
"(default: 0.25)"
),
)
parser.add_argument(
"--log-period",
type=int,
default=40,
help="Occurence in number of iterations at which loss values are logged",
)
# Inference Parameters
parser.add_argument(
"--det-per-img",
type=int,
default=200,
metavar="R",
help="Maximum number of detections to return per image during inference (default: 200)",
)
parser.add_argument(
"--nms-thr",
type=float,
default=0.5,
metavar="NMS",
help="If IoU is bigger than this value, only more confident pred is kept "
"(default: 0.5)",
)
parser.add_argument(
"--pred-thr",
type=float,
default=0.5,
metavar="PT",
help="Minimum confidence score to retain prediction (default: 0.5)",
)
parser.add_argument(
"--evaluation-type",
choices=["fast", "coco"],
type=str,
default=None,
help=(
"Evaluation to run on the test set after the training loop on the test. "
"Valid options are: `fast` (Detectron2 boosted COCO eval) and "
"`coco` (default COCO evaluation). "
"This value is by default None, which means that no evaluation is executed"
),
)
# Mandatory parameters
parser.add_argument(
"--classes", type=str, metavar="C", help="List of classes of objects"
)
parser.add_argument(
"--dataset-name", type=str, metavar="DS", help="Name of the dataset"
)
parser.add_argument(
"--label-name",
type=str,
metavar="DS",
help="Name of category of objects to detect (e.g. 'object')",
)
# Container Environment
parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"])
parser.add_argument(
"--training-channel",
type=str,
default=os.environ["SM_CHANNEL_TRAINING"],
help="Path folder that contains training images (File mode)",
)
parser.add_argument(
"--validation-channel",
type=str,
default=os.environ["SM_CHANNEL_VALIDATION"],
help="Path folder that contains validation images (File mode)",
)
parser.add_argument(
"--test-channel",
type=str,
default=os.environ["SM_CHANNEL_TEST"],
help=(
"Path folder that contains test images, "
"these are used to evaluate the model but not to drive hparam tuning"
),
)
parser.add_argument(
"--annotation-channel",
type=str,
default=os.environ["SM_CHANNEL_ANNOTATION"],
help="Path to folder that contains augumented manifest files with annotations",
)
parser.add_argument("--num-gpus", type=int, default=os.environ["SM_NUM_GPUS"])
parser.add_argument(
"--hosts", type=str, default=ast.literal_eval(os.environ["SM_HOSTS"])
)
parser.add_argument(
"--current-host", type=str, default=os.environ["SM_CURRENT_HOST"]
)
return parser.parse_args()