in utils_multiple_choice.py [0:0]
def __init__(self):
self.D = [[], [], []]
datasetfile = "arranger_input.json"
with open(datasetfile, "r") as f:
data = json.load(f)
for sid in range(2):
dt = ["train", "dev"][sid]
for i in range(len(data[dt])):
d = [data[dt][i][0].lower(), data[dt][i][1].lower(), data[dt][i][2].lower(), data[dt][i][3]]
self.D[sid] += [d]
sid = 2
for fns in [["lm.input.dev.cc.txt", "lm.output.dev.cc.txt", "dev.inference.gpt2_10epoch_1e-3_fp16.json"],
["lm.input.test.cc.txt", "lm.output.test.cc.txt", "test.inference.gpt2_10epoch_1e-3_fp16.json"]]:
with open(fns[0], "r") as f:
data = f.read().split("\n")[0:-1:2]
data_d = data
with open(fns[1], "r") as f:
data = f.read()
data = data.split("[TransformerGenerator]:")[1:]
for i in range(len(data)):
data[i] = data[i].split("\n")[0].strip()
data_cc = data
with open(fns[2], "r") as f:
data = json.load(f)
for i in range(len(data)):
data[i] = data[i].split("<|response|>")
if len(data[i]) == 1:
data[i] += ['']
elif len(data[i]) > 2:
data[i] = ["<|response|>".join(data[i][:-2]), data[i][-1]]
self.D[2] += [[data_d[i].strip(), data[i][1], data_cc[i].strip(), 0]]