in understanding_rl_vision/rl_clarity/interface.py [0:0]
def get_bookmarks(trajectories, *, sign, num):
advantages = trajectories["advantages"]
dones = trajectories["dones"].copy()
dones[:, -1] = np.ones_like(dones[:, -1])
high_scores_and_coords = []
for trajectory in range(advantages.shape[0]):
high_score = 0
high_score_coords = None
for step in range(advantages.shape[1]):
score = advantages[trajectory][step] * sign
if score > high_score:
high_score = score
high_score_coords = (trajectory, step)
if dones[trajectory][step] and high_score_coords is not None:
high_scores_and_coords.append((high_score, high_score_coords))
high_score = 0
high_score_coords = None
high_scores_and_coords.sort(key=lambda x: -x[0])
return list(map(lambda x: x[1], high_scores_and_coords[:num]))