in gym/gym/benchmarks/scoring.py [0:0]
def score_evaluation(self, benchmark, env_id, data_sources, initial_reset_timestamps, episode_lengths, episode_rewards, episode_types, timestamps):
tasks = benchmark.task_specs(env_id)
spec = envs.spec(env_id)
#### 0. Compute timing stats
if len(initial_reset_timestamps) > 0:
initial_reset_timestamp = min(initial_reset_timestamps)
else:
initial_reset_timestamp = 0
# How long each episode actually took
timestamps = np.array(timestamps)
durations = _compute_episode_durations(initial_reset_timestamps, data_sources, timestamps)
#### Grab the data corresponding to each of evaluation/training
lengths = np.array(episode_lengths)
rewards = np.array(episode_rewards)
#### Calculate the total elapsed time (in various units)
#### for each episode
# How many training timesteps have elapsed by the end of each
# episode. Not to be confused with Unix timestamps.
elapsed_timesteps = np.cumsum(lengths)
# Total number of seconds elapsed by the end of each
# episode. Note that with n parallel workers each running for
# m seconds, we want to count the total time as n * m.
elapsed_seconds = np.cumsum(durations)
# List of score for each task
scores = []
# List of lists of solved episodes for each task
solves = []
# List of lists of episode rewards for each task
rewards = []
# List of lists of relevant episode lengths for each task
cutoff_lengths = []
_timestamps = []
elapsed_times = []
for task in tasks:
# Find the first episode where we're over the allotted
# training timesteps.
cutoff_idx = _find_cutoffs_for_task(task, elapsed_timesteps, elapsed_seconds)
if not np.isfinite(cutoff_idx):
# All episodes are fair game
cutoff_idx = len(lengths)
reward = np.array(episode_rewards)[:cutoff_idx]
score, solved = self.score_and_solved_func(task, reward, elapsed_seconds[:cutoff_idx])
scores.append(score)
solves.append(solved)
rewards.append(reward)
cutoff_lengths.append(lengths[:cutoff_idx])
if np.any(timestamps[:cutoff_idx]):
last_timestamp = timestamps[cutoff_idx - 1]
elapsed_time = elapsed_seconds[cutoff_idx - 1]
else:
# If we don't have any valid episodes, then the
# last valid timestamp is when we started.
last_timestamp = initial_reset_timestamp
elapsed_time = 0.0
# Record the timestamp of the last episode
_timestamps.append(last_timestamp)
elapsed_times.append(elapsed_time)
return {
'rewards': rewards,
'lengths': cutoff_lengths,
'scores': scores,
'solves': solves,
'timestamps': _timestamps,
'elapsed_times': elapsed_times,
'initial_reset_timestamp': initial_reset_timestamp,
}