def jobs_from_datasets()

in taskcluster/translations_taskgraph/transforms/from_datasets.py [0:0]


def jobs_from_datasets(config, jobs):
    for job in jobs:
        dataset_config = job.pop("dataset-config", {})
        category = dataset_config.get("category", "")
        provider = dataset_config.get("provider", "")
        substitution_fields = dataset_config.get("substitution-fields", [])
        exclude_locales = dataset_config.get("exclude-locales", [])
        datasets = config.params["training_config"]["datasets"]
        src = config.params["training_config"]["experiment"]["src"]
        trg = config.params["training_config"]["experiment"]["trg"]

        included_datasets = set()
        if category:
            included_datasets.update(datasets[category])
        else:
            for sets in datasets.values():
                included_datasets.update(sets)

        if {"src": src, "trg": trg} in exclude_locales:
            continue

        for full_dataset in included_datasets:
            dataset_provider, dataset = full_dataset.split("_", 1)
            if provider and provider != dataset_provider:
                continue

            subjob = copy.deepcopy(job)

            subs = {
                "provider": dataset_provider,
                "dataset": full_dataset,
                "dataset_sanitized": sanitize_dataset_name(dataset),
                "src_locale": src,
                "trg_locale": trg,
            }
            for field in substitution_fields:
                container, subfield = subjob, field
                while "." in subfield:
                    f, subfield = subfield.split(".", 1)
                    container = container[f]

                container[subfield] = substitute(container[subfield], **subs)

            subjob.setdefault("attributes", {})
            subjob["attributes"]["provider"] = dataset_provider
            subjob["attributes"]["dataset"] = dataset
            subjob["attributes"]["src_locale"] = src
            subjob["attributes"]["trg_locale"] = trg

            yield subjob