in scripts/adapet/ADAPET/src/data/COPAReader.py [0:0]
def read_dataset(self, split=None, is_eval=False):
'''
Read the original dataset
:param split: partition of the dataset
:param is_eval:
'''
file = self._get_file(split)
data = []
with open(file, 'r') as f_in:
for line in f_in.readlines():
json_string = json.loads(line)
premise = json_string["premise"]
choice1 = json_string["choice1"]
choice2 = json_string["choice2"]
question = json_string["question"]
idx = json_string["idx"]
if "label" in json_string:
lbl = json_string["label"]
else:
lbl = -1
dict_input = {"premise": premise, "choice1": choice1,
"idx": idx, "choice2": choice2, "question": question}
dict_output = {"lbl": lbl}
dict_input_output = {"input": dict_input, "output": dict_output}
data.append(dict_input_output)
if split == 'train' or split == 'unlabeled':
mirror_data = []
for dict_input_output in data:
dict_input, dict_output = dict_input_output["input"], \
dict_input_output["output"]
mirror_dict_input = {
"premise": dict_input["premise"],
"choice1": dict_input["choice2"],
"choice2": dict_input["choice1"],
"idx": dict_input["idx"],
"question": dict_input["question"]
}
mirror_dict_output = {"lbl": 1 if dict_output["lbl"] == 0 else 0}
mirror_dict_input_output = {
"input": mirror_dict_input,
"output": mirror_dict_output
}
mirror_data.append(mirror_dict_input_output)
data.extend(mirror_data)
data = np.asarray(data)
return data