def sample()

in MTRF/algorithms/softlearning/samplers/simple_sampler.py [0:0]


    def sample(self):
        if self._current_observation is None:
            self._current_observation = self.env.reset()

        if (self._algorithm is not None
                and self._algorithm._save_training_video_frequency > 0
                and self._algorithm._epoch % self._algorithm._save_training_video_frequency == 0):
            if not hasattr(self, "_images"):
                self._images = []
            self._images.append(
                self.env.render(mode='rgb_array', width=256, height=256))

        action = self.policy.actions_np(self._policy_input)[0]
        next_observation, reward, terminal, info = self.env.step(action)

        self._path_length += 1
        self._path_return += reward
        self._total_samples += 1

        processed_sample = self._process_sample(
            observation=self._current_observation,
            action=action,
            reward=reward,
            terminal=terminal,
            next_observation=next_observation,
            info=info,
        )

        for key, value in flatten(processed_sample).items():
            self._current_path[key].append(value)

        if terminal or self._path_length >= self._max_path_length:
            last_path = unflatten({
                field_name: np.array(values)
                for field_name, values in self._current_path.items()
            })

            self.pool.add_path({
                key: value
                for key, value in last_path.items()
            })

            if (self._algorithm is not None
                    and self._algorithm._save_training_video_frequency > 0
                    and self._algorithm._epoch % self._algorithm._save_training_video_frequency == 0):
                self._last_n_paths.appendleft({
                    'images': self._images,
                    **last_path,
                })
            else:
                self._last_n_paths.appendleft(last_path)

            self._max_path_return = max(self._max_path_return,
                                        self._path_return)
            self._last_path_return = self._path_return

            self.policy.reset()
            self.pool.terminate_episode()
            self._current_observation = None
            self._path_length = 0
            self._path_return = 0
            self._current_path = defaultdict(list)
            self._images = []

            self._n_episodes += 1
        else:
            self._current_observation = next_observation

        return next_observation, reward, terminal, info