scripts/launcher_distributed.py [413:451]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            model_output_dir=args.model_output_dir,
            metric_logger=metric_logger,
        )
    )

    epoch = get_epoch(args.tune_finetune_yaml)

    # Dynamically modify Evaluation yaml file.
    template = jinja_env.from_string(Path(args.tune_eval_yaml).open().read())
    Path(args.tune_eval_yaml).open("w").write(
        template.render(
            model_dir=args.model_dir,
            model_output_dir=os.path.join(args.model_output_dir, f"epoch_{epoch}"),
        )
    )

    # Dynamically modify Quantization yaml file.
    template = jinja_env.from_string(Path(args.tune_quant_yaml).open().read())
    Path(args.tune_quant_yaml).open("w").write(
        template.render(
            model_output_dir=os.path.join(args.model_output_dir, f"epoch_{epoch}")
        )
    )

    try:
        print("Starting training...")
        training_function()

        if report_error == 1:
            sys.exit(1)

        print(f"Training completed with code: {report_error}")

    except Exception as e:
        # Log the error
        print(f"Error occurred during training: {str(e)}")

        # Exit with a non-zero status code
        sys.exit(1)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -