void step()

in csrc/liars_dice/subgame_solving.cc [577:664]


  void step(int traverser) override {
    update_regrets(traverser);
    root_values[traverser] = traverser_values[0];
    {
      const double alpha = params.linear_update
                               ? 2. / (num_steps[traverser] + 2)
                               : 1. / (num_steps[traverser] + 1);
      root_values_means[traverser].resize(root_values[traverser].size());
      for (size_t i = 0; i < root_values[traverser].size(); ++i) {
        root_values_means[traverser][i] +=
            (root_values[traverser][i] - root_values_means[traverser][i]) *
            alpha;
      }
    }

    double pos_discount = 1;
    double neg_discount = 1;
    double strat_discount = 1;
    {
      // We always have uniform strategy, hence +1.
      const double num_strategies = num_steps[traverser] + 1;
      if (params.linear_update) {
        pos_discount = neg_discount = strat_discount =
            num_strategies / (num_strategies + 1);
      } else if (params.dcfr) {
        if (params.dcfr_alpha >= 5) {
          pos_discount = 1;
        } else {
          pos_discount = pow(num_strategies, params.dcfr_alpha) /
                         (pow(num_strategies, params.dcfr_alpha) + 1.);
        }
        if (params.dcfr_beta <= -5) {
          neg_discount = 0;
        } else {
          neg_discount = pow(num_strategies, params.dcfr_beta) /
                         (pow(num_strategies, params.dcfr_beta) + 1.);
        }
        strat_discount =
            pow(num_strategies / (num_strategies + 1), params.dcfr_gamma);
      }
    }

    for (size_t node = 0; node < tree.size(); ++node) {
      if (!tree[node].num_children() ||
          tree[node].state.player_id != traverser) {
        continue;
      }
      const auto [start, end] = game.get_bid_range(tree[node].state);
      for (int i = 0; i < game.num_hands(); i++) {
        for (int action = start; action < end; ++action) {
          // TODO(akhti): remove magic constant.
          last_strategies[node][i][action] =
              std::max<double>(regrets[node][i][action], kRegretSmoothingEps);
        }
        normalize_probabilities(last_strategies[node][i],
                                &last_strategies[node][i]);
      }
    }

    compute_reach_probabilities(tree, last_strategies,
                                initial_beliefs[traverser], traverser,
                                &reach_probabilities_buffer);
    for (size_t node = 0; node < tree.size(); ++node) {
      if (!tree[node].num_children() ||
          tree[node].state.player_id != traverser) {
        continue;
      }
      const auto [action_begin, action_end] =
          game.get_bid_range(tree[node].state);
      for (int i = 0; i < game.num_hands(); i++) {
        for (Action a = action_begin; a < action_end; ++a) {
          regrets[node][i][a] *=
              regrets[node][i][a] > 0 ? pos_discount : neg_discount;
        }
        for (Action a = action_begin; a < action_end; ++a) {
          sum_strategies[node][i][a] *= strat_discount;
        }
        for (Action a = action_begin; a < action_end; ++a) {
          sum_strategies[node][i][a] +=
              reach_probabilities_buffer[node][i] * last_strategies[node][i][a];
        }
        normalize_probabilities(sum_strategies[node][i],
                                &average_strategies[node][i]);
      }
    }

    ++num_steps[traverser];
  }