in dvd_codebase/data/data_handler.py [0:0]
def create_dials(dials, vocab, answer_list, segment_map, vft_data, args):
dialog_list = []
qa_id = 0
for dialog in tqdm(dials, total=len(dials)):
questions = [words2ids(t['question'], vocab) for t in dialog]
answers = [words2ids(str(t['answer']), vocab) for t in dialog]
answer_output = [[answer_list.index(str(t['answer']))] for t in dialog]
qa_pair = [np.concatenate((q,a)).astype(np.int32) for q,a in zip(questions, answers)]
vid_cutoffs = [t['template']['cutoff'] for t in dialog]
vft_sizes = [get_vft_size_by_timestamp(c, segment_map) for c in vid_cutoffs]
gt_vid_periods = [t['template']['used_periods'][-1] for t in dialog]
gt_vft_ranges = [get_vft_range_by_period(p, segment_map, vft_sizes[p_idx]) for p_idx, p in enumerate(gt_vid_periods)]
programs = [program2ids(t['final_all_program'], vocab) for t in dialog]
states = [state2ids(t['template']['used_objects'], vocab) for t in dialog]
vid = dialog[0]['image'].replace('CLEVR', 'CATER')
vid_split = dialog[0]['split']
vid_key = '{}-{}'.format(vid_split, vid)
whole_vft_fea = vft_data[vid_key]
for n in range(len(questions)):
start_turn_idx = 0
history = np.asarray([])
turns = []
for m in range(start_turn_idx, n):
history = np.append(history, qa_pair[m])
turns.append(qa_pair[m])
end = vft_sizes[m]
ft = whole_vft_fea[:end+1]
question = questions[n]
turns.append(questions[n])
answer = answer_output[n]
program = programs[n]
state = states[n]
end = vft_sizes[n]
curr_vft = whole_vft_fea[:vft_sizes[n]+1]
vft_size = curr_vft.shape[0]
gt_period = gt_vid_periods[n]
item = [vid_split, vid, qa_id, history, question, answer, turns,
curr_vft,
vft_size, gt_period,
program, state]
dialog_list.append(item)
qa_id += 1
data = {'dialogs': dialog_list, 'vocab': vocab, 'answer': answer_list, 'features': []}
return data