in train/comms/pt/commsTraceReplay.py [0:0]
def readArgs(self, parser):
# read the common/basic arguments
super().readArgs(parser)
parser.add_argument(
"--trace-path",
type=str,
default="./",
help="File path to read the trace. All rank read their own trace file unless `--use-one-trace` is used.",
)
parser.add_argument(
"--use-one-trace",
action="store_true",
default=False,
help="Toggle to use only one trace for all ranks",
)
parser.add_argument(
"--dry-run",
action="store_true",
default=self.is_dry_run,
help="Toggle to only analyze trace without actually replaying collectives",
)
parser.add_argument(
"--auto-shrink",
action="store_true",
default=self.shrink,
help="Toggle to shrink message size when it does not match with the current scale (only for debug purpose)",
)
parser.add_argument(
"--max-msg-cnt",
type=int,
default=self.max_msg_cnt,
help="Only replay first N operations (0 means no limit)",
)
parser.add_argument(
"--no-warm-up",
action="store_true",
default=False,
help="Toggle to disable performing extra replaying for warm-up",
)
parser.add_argument(
"--allow-ops",
"--allow-list",
type=str,
default="all",
help="List of desired collectives (separate by comma) to be replayed, e.g., `--allow-ops all_reduce,all_to_allv,wait`, typo or not supported collectives will be ignored.",
)
parser.add_argument(
"--output-path",
type=str,
default=self.out_path,
nargs="?",
const="",
help='Output path to write the replayed trace for post performance analysis. Set as empty string, i.e., "", to skip output',
)
parser.add_argument(
"--colls-per-batch",
type=int,
default=self.colls_per_batch,
help="Toggle to set number of consecutive collectives in a batch. This also enables per batch latency stats.",
)
return parser.parse_args()