in csrc/liars_dice/subgame_solving.cc [577:664]
void step(int traverser) override {
update_regrets(traverser);
root_values[traverser] = traverser_values[0];
{
const double alpha = params.linear_update
? 2. / (num_steps[traverser] + 2)
: 1. / (num_steps[traverser] + 1);
root_values_means[traverser].resize(root_values[traverser].size());
for (size_t i = 0; i < root_values[traverser].size(); ++i) {
root_values_means[traverser][i] +=
(root_values[traverser][i] - root_values_means[traverser][i]) *
alpha;
}
}
double pos_discount = 1;
double neg_discount = 1;
double strat_discount = 1;
{
// We always have uniform strategy, hence +1.
const double num_strategies = num_steps[traverser] + 1;
if (params.linear_update) {
pos_discount = neg_discount = strat_discount =
num_strategies / (num_strategies + 1);
} else if (params.dcfr) {
if (params.dcfr_alpha >= 5) {
pos_discount = 1;
} else {
pos_discount = pow(num_strategies, params.dcfr_alpha) /
(pow(num_strategies, params.dcfr_alpha) + 1.);
}
if (params.dcfr_beta <= -5) {
neg_discount = 0;
} else {
neg_discount = pow(num_strategies, params.dcfr_beta) /
(pow(num_strategies, params.dcfr_beta) + 1.);
}
strat_discount =
pow(num_strategies / (num_strategies + 1), params.dcfr_gamma);
}
}
for (size_t node = 0; node < tree.size(); ++node) {
if (!tree[node].num_children() ||
tree[node].state.player_id != traverser) {
continue;
}
const auto [start, end] = game.get_bid_range(tree[node].state);
for (int i = 0; i < game.num_hands(); i++) {
for (int action = start; action < end; ++action) {
// TODO(akhti): remove magic constant.
last_strategies[node][i][action] =
std::max<double>(regrets[node][i][action], kRegretSmoothingEps);
}
normalize_probabilities(last_strategies[node][i],
&last_strategies[node][i]);
}
}
compute_reach_probabilities(tree, last_strategies,
initial_beliefs[traverser], traverser,
&reach_probabilities_buffer);
for (size_t node = 0; node < tree.size(); ++node) {
if (!tree[node].num_children() ||
tree[node].state.player_id != traverser) {
continue;
}
const auto [action_begin, action_end] =
game.get_bid_range(tree[node].state);
for (int i = 0; i < game.num_hands(); i++) {
for (Action a = action_begin; a < action_end; ++a) {
regrets[node][i][a] *=
regrets[node][i][a] > 0 ? pos_discount : neg_discount;
}
for (Action a = action_begin; a < action_end; ++a) {
sum_strategies[node][i][a] *= strat_discount;
}
for (Action a = action_begin; a < action_end; ++a) {
sum_strategies[node][i][a] +=
reach_probabilities_buffer[node][i] * last_strategies[node][i][a];
}
normalize_probabilities(sum_strategies[node][i],
&average_strategies[node][i]);
}
}
++num_steps[traverser];
}