def get_marian

def get_marian_cmd()

in pipeline/train/train.py [0:0]
53 lines of code
10 McCabe index (conditional complexity)

    def get_marian_cmd(self):
        all_model_metrics = ["chrf", "ce-mean-words", "bleu-detok"]
        validation_metrics = [
            # Place the best model metric first.
            self.best_model_metric.value,
            # And then the rest of the metrics should follow.
            *[m for m in all_model_metrics if m != self.best_model_metric.value],
        ]

        # Take off the "--" from beginning of the list.
        extra_args = self.extra_marian_args[1:]

        if "USE_CPU" not in os.environ:
            # We run a CPU version of Marian in tests and it does not work with these arguments.
            extra_args.append("--sharding")
            extra_args.append("local")

        if self.model_type == ModelType.student:
            if self.student_model == StudentModel.none:
                raise ValueError("Student configuration is not provided")
            model_name = f"student.{self.student_model.value}"
        else:
            model_name = self.model_type.value

        if filecmp.cmp(self.src_vocab, self.trg_vocab, shallow=False):
            emb_args = {"tied-embeddings-all": "true"}
        else:
            # when using separate vocabs tie only target embeddings and output embeddings in output layer
            # do not tie source and target embeddings
            emb_args = {"tied-embeddings-all": "false", "tied-embeddings": "true"}

        return [
            str(self.marian_bin),
            *apply_command_args(
                {
                    "model": self.artifacts / "model.npz",
                    "config": [
                        train_dir / f"configs/model/{model_name}.yml",
                        train_dir
                        / f"configs/training/{self.model_type.value}.{self.training_type.value}.yml",
                    ],
                    "tempdir": self.temp_dir / "marian-tmp",
                    "vocabs": [self.src_vocab, self.trg_vocab],
                    "workspace": self.workspace,
                    "devices": self.gpus.split(" "),
                    "valid-metrics": validation_metrics,
                    "valid-sets": str(self.validation_set),
                    "valid-translation-output": self.artifacts / "devset.out",
                    "valid-log": self.artifacts / "valid.log",
                    "log": self.artifacts / "train.log",
                    "shuffle": "batches",
                    "seed": str(self.seed),
                    "no-restore-corpus": None,
                    "valid-reset-stalled": None,
                    "sync-sgd": None,
                    "quiet-translation": None,
                    "overwrite": None,
                    "keep-best": None,
                    "tsv": None,
                }
            ),
            *apply_command_args(emb_args),
            *extra_args,
        ]