in ttw/data_loader.py [0:0]
def __init__(self, data_dir, set, last_turns=1, min_freq=3, min_sent_len=2, orientation_aware=False,
include_guide_utterances=True):
self.dialogues = json.load(open(os.path.join(data_dir, 'talkthewalk.{}.json'.format(set))))
self.dict = Dictionary(file=os.path.join(data_dir, 'dict.txt'), min_freq=min_freq)
self.map = Map(data_dir, neighborhoods, include_empty_corners=True)
self.act_dict = ActionAgnosticDictionary()
self.act_aware_dict = ActionAwareDictionary()
self.feature_loader = GoldstandardFeatures(self.map)
self.data = dict()
self.data['actions'] = list()
self.data['goldstandard'] = list()
self.data['landmarks'] = list()
self.data['target'] = list()
self.data['utterance'] = list()
for config in self.dialogues:
loc = config['start_location']
neighborhood = config['neighborhood']
boundaries = config['boundaries']
act_memory = list()
obs_memory = [self.feature_loader.get(neighborhood, loc)]
dialogue_context = list()
for msg in config['dialog']:
if msg['id'] == 'Tourist':
act = msg['text']
act_id = self.act_aware_dict.encode(act)
if act_id >= 0:
new_loc = step_aware(act, loc, boundaries)
old_loc = loc
loc = new_loc
if orientation_aware:
act_memory.append(act_id)
obs_memory.append(self.feature_loader.get(neighborhood, new_loc))
else:
if act == 'ACTION:FORWARD': # went forward
act_dir = self.act_dict.encode_from_location(old_loc, new_loc)
act_memory.append(act_dir)
obs_memory.append(self.feature_loader.get(neighborhood, loc))
elif len(msg['text'].split(' ')) > min_sent_len:
dialogue_context.append(self.dict.encode(msg['text']))
utt = self.dict.encode(START_TOKEN) + [y for x in dialogue_context[-last_turns:] for y in x] \
+ self.dict.encode(END_TOKEN)
self.data['utterance'].append(utt)
landmarks, tgt = self.map.get_landmarks(config['neighborhood'], boundaries, loc)
self.data['landmarks'].append(landmarks)
self.data['target'].append(tgt)
self.data['actions'].append(act_memory)
self.data['goldstandard'].append(obs_memory)
act_memory = list()
obs_memory = [self.feature_loader.get(neighborhood, loc)]
elif include_guide_utterances:
dialogue_context.append(self.dict.encode(msg['text']))