in taskcluster/translations_taskgraph/transforms/from_datasets.py [0:0]
def jobs_for_mono_datasets(config, jobs):
for job in jobs:
dataset_config = job.pop("dataset-config", {})
category = dataset_config.get("category")
provider = dataset_config.get("provider", "")
substitution_fields = dataset_config.get("substitution-fields", [])
exclude_locales = dataset_config.get("exclude-locales", [])
datasets = config.params["training_config"]["datasets"]
src = config.params["training_config"]["experiment"]["src"]
trg = config.params["training_config"]["experiment"]["trg"]
if {"src": src, "trg": trg} in exclude_locales:
continue
if category not in ("mono-src", "mono-trg"):
raise Exception(
"from_datasets:mono can only be used with mono-src and mono-trg categories"
)
included_datasets = set()
if category:
included_datasets.update(datasets[category])
else:
for sets in datasets.values():
included_datasets.update(sets)
for full_dataset in included_datasets:
dataset_provider, dataset = full_dataset.split("_", 1)
if provider and provider != dataset_provider:
continue
subjob = copy.deepcopy(job)
if category == "mono-src":
locale = src
elif category == "mono-trg":
locale = trg
else:
raise Exception(
"from_datasets:mono can only be used with mono-src and mono-trg categories"
)
subs = {
"provider": dataset_provider,
"dataset": full_dataset,
"dataset_sanitized": sanitize_dataset_name(dataset),
"locale": locale,
"src_locale": src,
"trg_locale": trg,
}
for field in substitution_fields:
container, subfield = subjob, field
while "." in subfield:
f, subfield = subfield.split(".", 1)
container = container[f]
container[subfield] = substitute(container[subfield], **subs)
subjob.setdefault("attributes", {})
subjob["attributes"]["provider"] = dataset_provider
subjob["attributes"]["dataset"] = dataset
subjob["attributes"]["locale"] = locale
subjob["attributes"]["src_locale"] = src
subjob["attributes"]["trg_locale"] = trg
yield subjob