pachi_py/pachi/uct/search.c

#include <assert.h> #include <limits.h> #include <math.h> #include <pthread.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h> #define DEBUG #include "debug.h" #include "distributed/distributed.h" #include "move.h" #include "random.h" #include "timeinfo.h" #include "uct/dynkomi.h" #include "uct/internal.h" #include "uct/search.h" #include "uct/tree.h" #include "uct/uct.h" #include "uct/walk.h" /* Default time settings for the UCT engine. In distributed mode, slaves are * unlimited by default and all control is done on the master, either in time * or with total number of playouts over all slaves. (It is also possible but * not recommended to limit only the slaves; the master then decides the move * when a majority of slaves have made their choice.) */ static struct time_info default_ti; static __attribute__((constructor)) void default_ti_init(void) { time_parse(&default_ti, "15"); } static const struct time_info unlimited_ti = { .period = TT_MOVE, .dim = TD_GAMES, .len = { .games = INT_MAX }, }; /* When terminating UCT search early, the safety margin to add to the * remaining playout number estimate when deciding whether the result can * still change. */ #define PLAYOUT_DELTA_SAFEMARGIN 1000 /* Minimal number of simulations to consider early break. */ #define PLAYOUT_EARLY_BREAK_MIN 5000 /* Minimal time to consider early break (in seconds). */ #define TIME_EARLY_BREAK_MIN 1.0 /* Pachi threading structure: * * main thread * | main(), GTP communication, ... * | starts and stops the search managed by thread_manager * | * thread_manager * | spawns and collects worker threads * | * worker0 * worker1 * ... * workerK * uct_playouts() loop, doing descend-playout until uct_halt * * Another way to look at it is by functions (lines denote thread boundaries): * * | uct_genmove() * | uct_search() (uct_search_start() .. uct_search_stop()) * | ----------------------- * | spawn_thread_manager() * | ----------------------- * | spawn_worker() * V uct_playouts() */ /* Set in thread manager in case the workers should stop. */ volatile sig_atomic_t uct_halt = 0; /* ID of the thread manager. */ static pthread_t thread_manager; bool thread_manager_running; static pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER; static volatile int finish_thread; static pthread_mutex_t finish_serializer = PTHREAD_MUTEX_INITIALIZER; static void * spawn_worker(void *ctx_) { struct uct_thread_ctx *ctx = ctx_; /* Setup */ fast_srandom(ctx->seed); /* Run */ ctx->games = uct_playouts(ctx->u, ctx->b, ctx->color, ctx->t, ctx->ti); /* Finish */ pthread_mutex_lock(&finish_serializer); pthread_mutex_lock(&finish_mutex); finish_thread = ctx->tid; pthread_cond_signal(&finish_cond); pthread_mutex_unlock(&finish_mutex); return ctx; } /* Thread manager, controlling worker threads. It must be called with * finish_mutex lock held, but it will unlock it itself before exiting; * this is necessary to be completely deadlock-free. */ /* The finish_cond can be signalled for it to stop; in that case, * the caller should set finish_thread = -1. */ /* After it is started, it will update mctx->t to point at some tree * used for the actual search, on return * it will set mctx->games to the number of performed simulations. */ static void * spawn_thread_manager(void *ctx_) { /* In thread_manager, we use only some of the ctx fields. */ struct uct_thread_ctx *mctx = ctx_; struct uct *u = mctx->u; struct tree *t = mctx->t; fast_srandom(mctx->seed); int played_games = 0; pthread_t threads[u->threads]; int joined = 0; uct_halt = 0; /* Garbage collect the tree by preference when pondering. */ if (u->pondering && t->nodes && t->nodes_size >= t->pruning_threshold) { t->root = tree_garbage_collect(t, t->root); } /* Spawn threads... */ for (int ti = 0; ti < u->threads; ti++) { struct uct_thread_ctx *ctx = malloc2(sizeof(*ctx)); ctx->u = u; ctx->b = mctx->b; ctx->color = mctx->color; mctx->t = ctx->t = t; ctx->tid = ti; ctx->seed = fast_random(65536) + ti; ctx->ti = mctx->ti; pthread_attr_t a; pthread_attr_init(&a); pthread_attr_setstacksize(&a, 1048576); pthread_create(&threads[ti], &a, spawn_worker, ctx); if (UDEBUGL(4)) fprintf(stderr, "Spawned worker %d\n", ti); } /* ...and collect them back: */ while (joined < u->threads) { /* Wait for some thread to finish... */ pthread_cond_wait(&finish_cond, &finish_mutex); if (finish_thread < 0) { /* Stop-by-caller. Tell the workers to wrap up * and unblock them from terminating. */ uct_halt = 1; /* We need to make sure the workers do not complete * the termination sequence before we get officially * stopped - their wake and the stop wake could get * coalesced. */ pthread_mutex_unlock(&finish_serializer); continue; } /* ...and gather its remnants. */ struct uct_thread_ctx *ctx; pthread_join(threads[finish_thread], (void **) &ctx); played_games += ctx->games; joined++; free(ctx); if (UDEBUGL(4)) fprintf(stderr, "Joined worker %d\n", finish_thread); pthread_mutex_unlock(&finish_serializer); } pthread_mutex_unlock(&finish_mutex); mctx->games = played_games; return mctx; } /*** THREAD MANAGER end */ /*** Search infrastructure: */ int uct_search_games(struct uct_search_state *s) { return s->ctx->t->root->u.playouts; } void uct_search_start(struct uct *u, struct board *b, enum stone color, struct tree *t, struct time_info *ti, struct uct_search_state *s) { /* Set up search state. */ s->base_playouts = s->last_dynkomi = s->last_print = t->root->u.playouts; s->print_interval = u->reportfreq * u->threads; s->fullmem = false; if (ti) { if (ti->period == TT_NULL) { if (u->slave) { *ti = unlimited_ti; } else { *ti = default_ti; time_start_timer(ti); } } time_stop_conditions(ti, b, u->fuseki_end, u->yose_start, u->max_maintime_ratio, &s->stop); } /* Fire up the tree search thread manager, which will in turn * spawn the searching threads. */ assert(u->threads > 0); assert(!thread_manager_running); static struct uct_thread_ctx mctx; mctx = (struct uct_thread_ctx) { .u = u, .b = b, .color = color, .t = t, .seed = fast_random(65536), .ti = ti }; s->ctx = &mctx; pthread_mutex_lock(&finish_serializer); pthread_mutex_lock(&finish_mutex); pthread_create(&thread_manager, NULL, spawn_thread_manager, s->ctx); thread_manager_running = true; } struct uct_thread_ctx * uct_search_stop(void) { assert(thread_manager_running); /* Signal thread manager to stop the workers. */ pthread_mutex_lock(&finish_mutex); finish_thread = -1; pthread_cond_signal(&finish_cond); pthread_mutex_unlock(&finish_mutex); /* Collect the thread manager. */ struct uct_thread_ctx *pctx; thread_manager_running = false; pthread_join(thread_manager, (void **) &pctx); return pctx; } void uct_search_progress(struct uct *u, struct board *b, enum stone color, struct tree *t, struct time_info *ti, struct uct_search_state *s, int i) { struct uct_thread_ctx *ctx = s->ctx; /* Adjust dynkomi? */ int di = u->dynkomi_interval * u->threads; if (ctx->t->use_extra_komi && u->dynkomi->permove && !u->pondering && di && i > s->last_dynkomi + di) { s->last_dynkomi += di; floating_t old_dynkomi = ctx->t->extra_komi; ctx->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, ctx->t); if (UDEBUGL(3) && old_dynkomi != ctx->t->extra_komi) fprintf(stderr, "dynkomi adjusted (%f -> %f)\n", old_dynkomi, ctx->t->extra_komi); } /* Print progress? */ if (i - s->last_print > s->print_interval) { s->last_print += s->print_interval; // keep the numbers tidy uct_progress_status(u, ctx->t, color, s->last_print, NULL); } if (!s->fullmem && ctx->t->nodes_size > u->max_tree_size) { if (UDEBUGL(2)) fprintf(stderr, "memory limit hit (%lu > %lu)\n", ctx->t->nodes_size, u->max_tree_size); s->fullmem = true; } } /* Determine whether we should terminate the search early. */ static bool uct_search_stop_early(struct uct *u, struct tree *t, struct board *b, struct time_info *ti, struct time_stop *stop, struct tree_node *best, struct tree_node *best2, int played, bool fullmem) { /* If the memory is full, stop immediately. Since the tree * cannot grow anymore, some non-well-expanded nodes will * quickly take over with extremely high ratio since the * counters are not properly simulated (just as if we use * non-UCT MonteCarlo). */ /* (XXX: A proper solution would be to prune the tree * on the spot.) */ if (fullmem) return true; /* Think at least 100ms to avoid a random move. This is particularly * important in distributed mode, where this function is called frequently. */ double elapsed = 0.0; if (ti->dim == TD_WALLTIME) { elapsed = time_now() - ti->len.t.timer_start; if (elapsed < TREE_BUSYWAIT_INTERVAL) return false; } /* Break early if we estimate the second-best move cannot * catch up in assigned time anymore. We use all our time * if we are in byoyomi with single stone remaining in our * period, however - it's better to pre-ponder. */ bool time_indulgent = (!ti->len.t.main_time && ti->len.t.byoyomi_stones == 1); if (best2 && ti->dim == TD_WALLTIME && played >= PLAYOUT_EARLY_BREAK_MIN && !time_indulgent) { double remaining = stop->worst.time - elapsed; double pps = ((double)played) / elapsed; double estplayouts = remaining * pps + PLAYOUT_DELTA_SAFEMARGIN; if (best->u.playouts > best2->u.playouts + estplayouts) { if (UDEBUGL(2)) fprintf(stderr, "Early stop, result cannot change: " "best %d, best2 %d, estimated %f simulations to go (%d/%f=%f pps)\n", best->u.playouts, best2->u.playouts, estplayouts, played, elapsed, pps); return true; } } /* Early break in won situation. */ if (best->u.playouts >= PLAYOUT_EARLY_BREAK_MIN && (ti->dim != TD_WALLTIME || elapsed > TIME_EARLY_BREAK_MIN) && tree_node_get_value(t, 1, best->u.value) >= u->sure_win_threshold) { return true; } return false; } /* Determine whether we should terminate the search later than expected. */ static bool uct_search_keep_looking(struct uct *u, struct tree *t, struct board *b, struct time_info *ti, struct time_stop *stop, struct tree_node *best, struct tree_node *best2, struct tree_node *bestr, struct tree_node *winner, int i) { if (!best) { if (UDEBUGL(2)) fprintf(stderr, "Did not find best move, still trying...\n"); return true; } /* Do not waste time if we are winning. Spend up to worst time if * we are unsure, but only desired time if we are sure of winning. */ floating_t beta = 2 * (tree_node_get_value(t, 1, best->u.value) - 0.5); if (ti->dim == TD_WALLTIME && beta > 0) { double good_enough = stop->desired.time * beta + stop->worst.time * (1 - beta); double elapsed = time_now() - ti->len.t.timer_start; if (elapsed > good_enough) return false; } if (u->best2_ratio > 0) { /* Check best/best2 simulations ratio. If the * two best moves give very similar results, * keep simulating. */ if (best2 && best2->u.playouts && (double)best->u.playouts / best2->u.playouts < u->best2_ratio) { if (UDEBUGL(2)) fprintf(stderr, "Best2 ratio %f < threshold %f\n", (double)best->u.playouts / best2->u.playouts, u->best2_ratio); return true; } } if (u->bestr_ratio > 0) { /* Check best, best_best value difference. If the best move * and its best child do not give similar enough results, * keep simulating. */ if (bestr && bestr->u.playouts && fabs((double)best->u.value - bestr->u.value) > u->bestr_ratio) { if (UDEBUGL(2)) fprintf(stderr, "Bestr delta %f > threshold %f\n", fabs((double)best->u.value - bestr->u.value), u->bestr_ratio); return true; } } if (winner && winner != best) { /* Keep simulating if best explored * does not have also highest value. */ if (UDEBUGL(2)) fprintf(stderr, "[%d] best %3s [%d] %f != winner %3s [%d] %f\n", i, coord2sstr(node_coord(best), t->board), best->u.playouts, tree_node_get_value(t, 1, best->u.value), coord2sstr(node_coord(winner), t->board), winner->u.playouts, tree_node_get_value(t, 1, winner->u.value)); return true; } /* No reason to keep simulating, bye. */ return false; } bool uct_search_check_stop(struct uct *u, struct board *b, enum stone color, struct tree *t, struct time_info *ti, struct uct_search_state *s, int i) { struct uct_thread_ctx *ctx = s->ctx; /* Never consider stopping if we played too few simulations. * Maybe we risk losing on time when playing in super-extreme * time pressure but the tree is going to be just too messed * up otherwise - we might even play invalid suicides or pass * when we mustn't. */ assert(!(ti->dim == TD_GAMES && ti->len.games < GJ_MINGAMES)); if (i < GJ_MINGAMES) return false; struct tree_node *best = NULL; struct tree_node *best2 = NULL; // Second-best move. struct tree_node *bestr = NULL; // best's best child. struct tree_node *winner = NULL; best = u->policy->choose(u->policy, ctx->t->root, b, color, resign); if (best) best2 = u->policy->choose(u->policy, ctx->t->root, b, color, node_coord(best)); /* Possibly stop search early if it's no use to try on. */ int played = u->played_all + i - s->base_playouts; if (best && uct_search_stop_early(u, ctx->t, b, ti, &s->stop, best, best2, played, s->fullmem)) return true; /* Check against time settings. */ bool desired_done; if (ti->dim == TD_WALLTIME) { double elapsed = time_now() - ti->len.t.timer_start; if (elapsed > s->stop.worst.time) return true; desired_done = elapsed > s->stop.desired.time; } else { assert(ti->dim == TD_GAMES); if (i > s->stop.worst.playouts) return true; desired_done = i > s->stop.desired.playouts; } /* We want to stop simulating, but are willing to keep trying * if we aren't completely sure about the winner yet. */ if (desired_done) { if (u->policy->winner && u->policy->evaluate) { struct uct_descent descent = { .node = ctx->t->root }; u->policy->winner(u->policy, ctx->t, &descent); winner = descent.node; } if (best) bestr = u->policy->choose(u->policy, best, b, stone_other(color), resign); if (!uct_search_keep_looking(u, ctx->t, b, ti, &s->stop, best, best2, bestr, winner, i)) return true; } /* TODO: Early break if best->variance goes under threshold * and we already have enough playouts (possibly thanks to tbook * or to pondering)? */ return false; } struct tree_node * uct_search_result(struct uct *u, struct board *b, enum stone color, bool pass_all_alive, int played_games, int base_playouts, coord_t *best_coord) { /* Choose the best move from the tree. */ struct tree_node *best = u->policy->choose(u->policy, u->t->root, b, color, resign); if (!best) { *best_coord = pass; return NULL; } *best_coord = node_coord(best); if (UDEBUGL(1)) fprintf(stderr, "*** WINNER is %s (%d,%d) with score %1.4f (%d/%d:%d/%d games), extra komi %f\n", coord2sstr(node_coord(best), b), coord_x(node_coord(best), b), coord_y(node_coord(best), b), tree_node_get_value(u->t, 1, best->u.value), best->u.playouts, u->t->root->u.playouts, u->t->root->u.playouts - base_playouts, played_games, u->t->extra_komi); /* Do not resign if we're so short of time that evaluation of best * move is completely unreliable, we might be winning actually. * In this case best is almost random but still better than resign. * Also do not resign if we are getting bad results while actually * giving away extra komi points (dynkomi). */ if (tree_node_get_value(u->t, 1, best->u.value) < u->resign_threshold && !is_pass(node_coord(best)) // If only simulated node has been a pass and no other node has // been simulated but pass won't win, an unsimulated node has // been returned; test therefore also for #simulations at root. && (best->u.playouts > GJ_MINGAMES || u->t->root->u.playouts > GJ_MINGAMES * 2) && (!u->t->use_extra_komi || komi_by_color(u->t->extra_komi, color) < 0.5) && !u->t->untrustworthy_tree) { *best_coord = resign; return NULL; } /* If the opponent just passed and we win counting, always * pass as well. For option stones_only, we pass only when there * there is nothing else to do, to show how to maximize score. */ if (b->moves > 1 && is_pass(b->last_move.coord) && b->rules != RULES_STONES_ONLY) { if (uct_pass_is_safe(u, b, color, pass_all_alive)) { if (UDEBUGL(0)) fprintf(stderr, "<Will rather pass, looks safe enough; score %f>\n", board_official_score(b, NULL) / 2); *best_coord = pass; best = u->t->root->children; // pass is the first child assert(is_pass(node_coord(best))); return best; } else { if (UDEBUGL(3)) fprintf(stderr, "Refusing to pass, unsafe; pass_all_alive %d, ownermap #playouts %d, raw score %f\n", pass_all_alive, u->ownermap.playouts, board_official_score(b, NULL) / 2); } } return best; }

pachi_py/pachi/uct/search.c (342 lines of code) (raw):