in codegen_sources/model/src/utils.py [0:0]
def vizualize_do_files(lang1, src_file, ref_file, hyp_file):
lang1_processor = LangProcessor.processors[lang1.split("_")[0]](
root_folder=TREE_SITTER_ROOT
)
src_viz = str(Path(src_file).with_suffix(".vizualize.txt"))
hyp_viz = str(
Path(re.sub("beam\d", "", hyp_file[0])).with_suffix(".vizualize.txt.tmp")
)
ref_viz = str(Path(ref_file).with_suffix(".vizualize.txt"))
hyp_lines = list(
zip(*[read_file_lines(path) for path in hyp_file])
) # test_size * beam_size
beam_size = len(hyp_lines[0])
with open(src_file, encoding="utf-8") as f:
src_lines = f.readlines() # test_size
with open(ref_file, encoding="utf-8") as f:
ref_lines = f.readlines() # test_size
with open(src_viz, "w", encoding="utf-8") as src_vizf:
with open(hyp_viz, "w", encoding="utf-8") as hyp_vizf:
with open(ref_viz, "w", encoding="utf-8") as ref_vizf:
src_vizf.write(
"========================SOURCE============================\n"
)
hyp_vizf.write(
"=========================HYPO=============================\n"
)
ref_vizf.write(
"==========================REF=============================\n"
)
for src, hyps, ref in zip(src_lines, hyp_lines, ref_lines):
src_vizf.write(
"=========================================================\n"
)
hyp_vizf.write(
"=========================================================\n"
)
ref_vizf.write(
"=========================================================\n"
)
try:
src = lang1_processor.detokenize_code(src)
src_vizf.write(src)
except:
src = "".join(
[
c if (i + 1) % 50 != 0 else c + "\n"
for i, c in enumerate(src)
]
)
src_vizf.write(src)
ref = ref.replace("|", "\n").strip()
ref_vizf.write(ref)
for i in range(beam_size):
hyp = hyps[i]
hyp = hyp.replace("|", "\n").strip()
hyp_vizf.write(hyp)
if i == 0:
maximum = max(
len(src.split("\n")),
len(hyp.split("\n")),
len(ref.split("\n")),
)
for i in range(len(src.split("\n")), maximum):
src_vizf.write("\n")
for i in range(len(hyp.split("\n")), maximum):
hyp_vizf.write("\n")
for i in range(len(ref.split("\n")), maximum):
ref_vizf.write("\n")
else:
maximum = max(
len(src.split("\n")),
len(hyp.split("\n")),
len(ref.split("\n")),
)
for i in range(maximum - 1):
src_vizf.write("\n")
for i in range(maximum - 1):
ref_vizf.write("\n")
for i in range(len(hyp.split("\n")), maximum):
hyp_vizf.write("\n")
src_vizf.write("-\n")
hyp_vizf.write("-\n")
ref_vizf.write("-\n")
src_vizf.write("--\n\n")
hyp_vizf.write("--\n\n")
ref_vizf.write("--\n\n")
command = f"pr -w 250 -m -t {src_viz} {ref_viz} {hyp_viz} > {hyp_viz[:-4]}"
subprocess.Popen(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
).wait()
os.remove(src_viz)
os.remove(ref_viz)
os.remove(hyp_viz)