in codegen_sources/model/src/evaluation/evaluator.py [0:0]
def create_reference_files(self):
"""
Create reference files for BLEU evaluation.
"""
params = self.params
for key, v in self.data["para"].items():
span = None
if len(key) == 3:
lang1, lang2, span = key
else:
assert len(key) == 2
lang1, lang2 = key
assert lang1 < lang2, (lang1, lang2)
for data_set in EVAL_DATASET_SPLITS:
has_sent_ids = (data_set, (lang1, lang2)) in params.has_sentence_ids
params.eval_scripts_folders[(lang1, lang2, data_set)] = os.path.join(
params.eval_scripts_root,
"{0}-{1}.{2}".format(lang1, lang2, data_set),
)
subprocess.Popen(
"mkdir -p %s"
% params.eval_scripts_folders[(lang1, lang2, data_set)],
shell=True,
).wait()
params.eval_scripts_folders[(lang2, lang1, data_set)] = os.path.join(
params.eval_scripts_root,
"{0}-{1}.{2}".format(lang2, lang1, data_set),
)
subprocess.Popen(
"mkdir -p %s"
% params.eval_scripts_folders[(lang2, lang1, data_set)],
shell=True,
).wait()
# define data paths
lang1_path = os.path.join(
params.hyp_path,
"ref.{0}-{1}.{2}.txt".format(lang2, lang1, data_set),
)
lang2_path = os.path.join(
params.hyp_path,
"ref.{0}-{1}.{2}.txt".format(lang1, lang2, data_set),
)
spans_path = os.path.join(
params.hyp_path,
"ref.{0}-{1}-{3}.{2}.txt".format(lang1, lang2, span, data_set),
)
id_path = os.path.join(
params.hyp_path,
"ids.{0}-{1}.{2}.txt".format(lang1, lang2, data_set),
)
# store data paths
params.ref_paths[(lang2, lang1, data_set)] = lang1_path
params.ref_paths[(lang1, lang2, data_set)] = lang2_path
params.id_paths[(lang1, lang2, data_set)] = id_path
params.id_paths[(lang2, lang1, data_set)] = id_path
# text sentences
lang1_txt = []
lang2_txt = []
id_txt = []
spans = []
# convert to text
for i, batch in enumerate(
self.get_iterator(data_set, lang1, lang2, span=span)
):
if len(batch) == 2:
(sent1, len1, id1, lenid1), (sent2, len2, id2, lenid2) = batch
else:
(
(sent1, len1, id1, lenid1),
(sent2, len2, id2, lenid2),
(span_batch, len_span, _, _),
) = batch
spans.extend(list(span_batch.T))
lang1_txt.extend(convert_to_text(sent1, len1, self.dico, params))
lang2_txt.extend(convert_to_text(sent2, len2, self.dico, params))
if has_sent_ids:
assert id1.equal(id2) and lenid1.equal(lenid2)
id_txt.extend(convert_to_text(id1, lenid1, self.dico, params))
# replace <unk> by <<unk>> as these tokens cannot be counted in BLEU
lang1_txt = [x.replace("<unk>", "<<unk>>") for x in lang1_txt]
lang2_txt = [x.replace("<unk>", "<<unk>>") for x in lang2_txt]
# export hypothesis
with open(lang1_path, "w", encoding="utf-8") as f:
f.write("\n".join(lang1_txt) + "\n")
with open(lang2_path, "w", encoding="utf-8") as f:
f.write("\n".join(lang2_txt) + "\n")
if len(spans) > 0:
with open(spans_path, "w", encoding="utf-8") as f:
f.write("\n".join([str(s) for s in spans]) + "\n")
# restore original segmentation
restore_segmentation(
lang1_path, roberta_mode=params.roberta_mode, single_line=True
)
restore_segmentation(
lang2_path, roberta_mode=params.roberta_mode, single_line=True
)
if has_sent_ids:
with open(id_path, "w", encoding="utf-8") as f:
f.write("\n".join(id_txt) + "\n")
restore_segmentation(
id_path, roberta_mode=params.roberta_mode, single_line=True
)