in baselines/her/ddpg.py [0:0]
def init_demo_buffer(self, demoDataFile, update_stats=True): #function that initializes the demo buffer
demoData = np.load(demoDataFile) #load the demonstration data from data file
info_keys = [key.replace('info_', '') for key in self.input_dims.keys() if key.startswith('info_')]
info_values = [np.empty((self.T - 1, 1, self.input_dims['info_' + key]), np.float32) for key in info_keys]
demo_data_obs = demoData['obs']
demo_data_acs = demoData['acs']
demo_data_info = demoData['info']
for epsd in range(self.num_demo): # we initialize the whole demo buffer at the start of the training
obs, acts, goals, achieved_goals = [], [] ,[] ,[]
i = 0
for transition in range(self.T - 1):
obs.append([demo_data_obs[epsd][transition].get('observation')])
acts.append([demo_data_acs[epsd][transition]])
goals.append([demo_data_obs[epsd][transition].get('desired_goal')])
achieved_goals.append([demo_data_obs[epsd][transition].get('achieved_goal')])
for idx, key in enumerate(info_keys):
info_values[idx][transition, i] = demo_data_info[epsd][transition][key]
obs.append([demo_data_obs[epsd][self.T - 1].get('observation')])
achieved_goals.append([demo_data_obs[epsd][self.T - 1].get('achieved_goal')])
episode = dict(o=obs,
u=acts,
g=goals,
ag=achieved_goals)
for key, value in zip(info_keys, info_values):
episode['info_{}'.format(key)] = value
episode = convert_episode_to_batch_major(episode)
global DEMO_BUFFER
DEMO_BUFFER.store_episode(episode) # create the observation dict and append them into the demonstration buffer
logger.debug("Demo buffer size currently ", DEMO_BUFFER.get_current_size()) #print out the demonstration buffer size
if update_stats:
# add transitions to normalizer to normalize the demo data as well
episode['o_2'] = episode['o'][:, 1:, :]
episode['ag_2'] = episode['ag'][:, 1:, :]
num_normalizing_transitions = transitions_in_episode_batch(episode)
transitions = self.sample_transitions(episode, num_normalizing_transitions)
o, g, ag = transitions['o'], transitions['g'], transitions['ag']
transitions['o'], transitions['g'] = self._preprocess_og(o, ag, g)
# No need to preprocess the o_2 and g_2 since this is only used for stats
self.o_stats.update(transitions['o'])
self.g_stats.update(transitions['g'])
self.o_stats.recompute_stats()
self.g_stats.recompute_stats()
episode.clear()
logger.info("Demo buffer size: ", DEMO_BUFFER.get_current_size()) #print out the demonstration buffer size