def _state_value()

in reagent/ope/trainers/rl_tabular_trainers.py [0:0]


    def _state_value(self, state: State):
        i = 0
        state_count = self._state_counts[state] if state in self._state_counts else 0
        while state_count < self._count_threshold and i < self._max_iteration:
            i += 1
            mdp = self._log_generator.generate_log(state)
            if self._first_visit:
                state_counts = {}
                for t in mdp:
                    if t.last_state is None:
                        continue
                    if t.last_state in state_counts:
                        state_counts[t.last_state] += 1
                    else:
                        state_counts[t.last_state] = 1
                g = 0
                for t in reversed(mdp):
                    if t.last_state is None:
                        continue
                    g = self._gamma * g + t.reward
                    counts = state_counts[t.last_state]
                    if counts > 1:
                        self._update_state_value(t.last_state, g)
                    counts -= 1
                    if counts == 0:
                        del state_counts[t.last_state]
                    else:
                        state_counts[t.last_state] = counts
            else:
                g = 0
                for t in reversed(mdp):
                    if t.last_state is None:
                        continue
                    g = self._gamma * g + t.reward
                    self._update_state_value(t.last_state, g)
            state_count = (
                self._state_counts[state] if state in self._state_counts else 0
            )
        return super()._state_value(state)