in coinrun/main_utils.py [0:0]
def process_ep_buf(epinfobuf, tb_writer=None, suffix='', step=0):
rewards = [epinfo['r'] for epinfo in epinfobuf]
rew_mean = np.nanmean(rewards)
if Config.SYNC_FROM_ROOT:
rew_mean = mpi_average_train_test([rew_mean])[0]
if tb_writer is not None:
tb_writer.log_scalar(rew_mean, 'rew_mean' + suffix, step)
aux_dicts = []
if len(epinfobuf) > 0 and 'aux_dict' in epinfobuf[0]:
aux_dicts = [epinfo['aux_dict'] for epinfo in epinfobuf]
if len(aux_dicts) > 0:
keys = aux_dicts[0].keys()
for key in keys:
sub_rews = [aux_dict[key] for aux_dict in aux_dicts]
sub_rew = np.nanmean(sub_rews)
if tb_writer is not None:
tb_writer.log_scalar(sub_rew, key, step)
return rew_mean