in pachi_py/pachi/uct/policy/ucb1amaf.c [181:218]
URAVE_DEBUG fprintf(stderr, "\t%s value = %f (prior %f)\n",
coord2sstr(node_coord(node), tree->board), n.value, node->prior.value);
}
} else if (r.playouts) {
value = r.value;
URAVE_DEBUG fprintf(stderr, "\t%s value = rave %f (prior %f)\n",
coord2sstr(node_coord(node), tree->board), r.value, node->prior.value);
}
descent->value.playouts = r.playouts + n.playouts;
descent->value.value = value;
return tree_node_get_value(tree, parity, value);
}
void
ucb1rave_descend(struct uct_policy *p, struct tree *tree, struct uct_descent *descent, int parity, bool allow_pass)
{
struct ucb1_policy_amaf *b = p->data;
floating_t nconf = 1.f;
if (b->explore_p > 0)
nconf = sqrt(log(descent->node->u.playouts + descent->node->prior.playouts));
struct uct *u = p->uct;
int vwin = 0;
if (u->max_slaves > 0 && u->slave_index >= 0)
vwin = descent->node == tree->root ? b->root_virtual_win : b->virtual_win;
int child = 0;
uctd_try_node_children(tree, descent, allow_pass, parity, u->tenuki_d, di, urgency) {
struct tree_node *ni = di.node;
urgency = ucb1rave_evaluate(p, tree, &di, parity);
/* In distributed mode, encourage different slaves to work on different
* parts of the tree. We rely on the fact that children (if they exist)
* are the same and in the same order in all slaves. */
if (vwin > 0 && ni->u.playouts > b->vwin_min_playouts && (child - u->slave_index) % u->max_slaves == 0)
urgency += vwin / (ni->u.playouts + vwin);
if (ni->u.playouts > 0 && b->explore_p > 0) {