in evals/eval/evaluate.py [0:0]
def run_comet_compare(lang_pairs, skip_existing, translators, gpus, models_dir, results_dir):
for pair in lang_pairs:
if "nn" in pair:
print(
"There are no evaluation datasets for Norwegian Nynorsk "
"and it is not supported by Google and Microsoft API. Skipping comparison"
)
continue
source, target = pair
for dataset_name in find_datasets(pair):
original_dataset_name = dataset_name
dataset_name = dataset_name.replace("/", "_")
print(f"Comparison for dataset: {dataset_name}, pair: {source}-{target}")
working_folder = f"{results_dir}/{source}-{target}/"
output_filename = f"{working_folder}/{dataset_name}.{source}-{target}.cometcompare"
if (
skip_existing
and os.path.isfile(output_filename)
and os.stat(output_filename).st_size > 0
):
print(f"Comparison exists. Skipping...")
continue
source_dataset = f"{dataset_name}.{source}"
targets = ""
for translator in translators.split(","):
targets += f"{dataset_name}.{translator}.{target} "
command = ""
if dataset_name in CUSTOM_DATASETS:
reference = f"{dataset_name}.{target}"
command = f"comet-compare --gpus {gpus} -s {source_dataset} -t {targets.strip()} -r {reference}"
else:
command = f"comet-compare --gpus {gpus} -d {original_dataset_name}:{source}-{target} -t {targets.strip()}"
res = subprocess.run(
command.split(" "),
cwd=working_folder,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout = res.stdout.decode("utf-8")
with open(output_filename, "w") as f:
f.write(stdout)
print("stdout: ", res.stdout.decode("utf-8"))
print("stderr: ", res.stderr.decode("utf-8"))