def jobs_for_mono_datasets()

in taskcluster/translations_taskgraph/transforms/from_datasets.py [0:0]


def jobs_for_mono_datasets(config, jobs):
    for job in jobs:
        dataset_config = job.pop("dataset-config", {})
        category = dataset_config.get("category")
        provider = dataset_config.get("provider", "")
        substitution_fields = dataset_config.get("substitution-fields", [])
        exclude_locales = dataset_config.get("exclude-locales", [])
        datasets = config.params["training_config"]["datasets"]
        src = config.params["training_config"]["experiment"]["src"]
        trg = config.params["training_config"]["experiment"]["trg"]

        if {"src": src, "trg": trg} in exclude_locales:
            continue

        if category not in ("mono-src", "mono-trg"):
            raise Exception(
                "from_datasets:mono can only be used with mono-src and mono-trg categories"
            )

        included_datasets = set()
        if category:
            included_datasets.update(datasets[category])
        else:
            for sets in datasets.values():
                included_datasets.update(sets)

        for full_dataset in included_datasets:
            dataset_provider, dataset = full_dataset.split("_", 1)
            if provider and provider != dataset_provider:
                continue

            subjob = copy.deepcopy(job)

            if category == "mono-src":
                locale = src
            elif category == "mono-trg":
                locale = trg
            else:
                raise Exception(
                    "from_datasets:mono can only be used with mono-src and mono-trg categories"
                )

            subs = {
                "provider": dataset_provider,
                "dataset": full_dataset,
                "dataset_sanitized": sanitize_dataset_name(dataset),
                "locale": locale,
                "src_locale": src,
                "trg_locale": trg,
            }
            for field in substitution_fields:
                container, subfield = subjob, field
                while "." in subfield:
                    f, subfield = subfield.split(".", 1)
                    container = container[f]

                container[subfield] = substitute(container[subfield], **subs)

            subjob.setdefault("attributes", {})
            subjob["attributes"]["provider"] = dataset_provider
            subjob["attributes"]["dataset"] = dataset
            subjob["attributes"]["locale"] = locale
            subjob["attributes"]["src_locale"] = src
            subjob["attributes"]["trg_locale"] = trg

            yield subjob