in model_code/process_data_to_source_target.py [0:0]
def form_multitask_source_target(questions, documents, answers, output, dataset_name, valid=False):
"""
constructs question + document to answer, question to answer with multitasking
"""
mask_source = []
mask_target = []
if not valid:
multitask_source, multitask_target = form_multitask(questions, documents, answers)
for doc in documents:
ms, mt = masking_tokens(doc)
mask_source.extend(ms)
mask_target.extend(mt)
else:
multitask_source, multitask_target = form_multitask_valid(questions, documents, answers)
qd_source = multitask_source + mask_source
targets = multitask_target + mask_target
assert(len(qd_source) == len(targets))
combined = list(zip(qd_source, targets))
random.shuffle(combined)
sources, targets = zip(*combined)
write_output(output + "/" + dataset_name + ".multitask_source", sources)
write_output(output + "/" + dataset_name + ".multitask_target", targets)
return