in model_code/process_data_to_source_target.py [0:0]
def form_multitask(questions, documents, answers):
source = []
target = []
for i, question in enumerate(questions):
# language modeling tasks
source.append("<lm_qda>")
target.append("<startQuestion> " + question + " " + documents[i] + " <startAnswer> " + answers[i])
source.append("<lm_qd>")
target.append("<startQuestion> " + question + " " + documents[i])
source.append("<lm_a>")
target.append(answers[i])
source.append("<lm_q>")
target.append(question)
source.append("<lm_d>")
target.append(documents[i])
# seq2seq tasks
source.append("<s2s_q_da> <startQuestion> " + question)
target.append(documents[i] + " <startAnswer> " + answers[i])
source.append("<s2s_qd_a> <startQuestion> " + question + " " + documents[i])
target.append(answers[i])
source.append("<s2s_q_a> <startQuestion> " + question)
target.append(answers[i])
source.append("<s2s_q_d> <startQuestion> " + question)
target.append(documents[i])
return source, target