in understanding_rl_vision/rl_clarity/training.py [0:0]
def step_async(self, actions):
mask = np.random.uniform(size=self.num_envs) < self.epsilon
new_actions = np.array(
[
self.action_space.sample() if mask[i] else actions[i]
for i in range(self.num_envs)
]
)
self.venv.step_async(new_actions)