pachi_py/pachi/playout.c (118 lines of code) (raw):

#define DEBUG #include <assert.h> #include <math.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "board.h" #include "debug.h" #include "engine.h" #include "move.h" #include "ownermap.h" #include "playout.h" /* Whether to set global debug level to the same as the playout * has, in case it is different. This can make sure e.g. tactical * reading produces proper level of debug prints during simulations. * But it is safe to enable this only in single-threaded instances! */ //#define DEBUGL_BY_PLAYOUT #define PLDEBUGL(n) DEBUGL_(policy->debug_level, n) coord_t play_random_move(struct playout_setup *setup, struct board *b, enum stone color, struct playout_policy *policy) { coord_t coord = pass; if (setup->prepolicy_hook) { coord = setup->prepolicy_hook(policy, setup, b, color); // fprintf(stderr, "prehook: %s\n", coord2sstr(coord, b)); } if (is_pass(coord)) { coord = policy->choose(policy, setup, b, color); // fprintf(stderr, "policy: %s\n", coord2sstr(coord, b)); } if (is_pass(coord) && setup->postpolicy_hook) { coord = setup->postpolicy_hook(policy, setup, b, color); // fprintf(stderr, "posthook: %s\n", coord2sstr(coord, b)); } if (is_pass(coord)) { play_random: /* Defer to uniformly random move choice. */ /* This must never happen if the policy is tracking * internal board state, obviously. */ assert(!policy->setboard || policy->setboard_randomok); board_play_random(b, color, &coord, (ppr_permit) policy->permit, policy); } else { struct move m; m.coord = coord; m.color = color; if (board_play(b, &m) < 0) { if (PLDEBUGL(4)) { fprintf(stderr, "Pre-picked move %d,%d is ILLEGAL:\n", coord_x(coord, b), coord_y(coord, b)); board_print(b, stderr); } goto play_random; } } return coord; } int play_random_game(struct playout_setup *setup, struct board *b, enum stone starting_color, struct playout_amafmap *amafmap, struct board_ownermap *ownermap, struct playout_policy *policy) { assert(setup && policy); int gamelen = setup->gamelen - b->moves; if (policy->setboard) policy->setboard(policy, b); #ifdef DEBUGL_BY_PLAYOUT int debug_level_orig = debug_level; debug_level = policy->debug_level; #endif enum stone color = starting_color; int passes = is_pass(b->last_move.coord) && b->moves > 0; while (gamelen-- && passes < 2) { coord_t coord = play_random_move(setup, b, color, policy); #if 0 /* For UCT, superko test here is downright harmful since * in superko-likely situation we throw away literally * 95% of our playouts; UCT will deal with this fine by * itself. */ if (unlikely(b->superko_violation)) { /* We ignore superko violations that are suicides. These * are common only at the end of the game and are * rather harmless. (They will not go through as a root * move anyway.) */ if (group_at(b, coord)) { if (DEBUGL(3)) { fprintf(stderr, "Superko fun at %d,%d in\n", coord_x(coord, b), coord_y(coord, b)); if (DEBUGL(4)) board_print(b, stderr); } return 0; } else { if (DEBUGL(6)) { fprintf(stderr, "Ignoring superko at %d,%d in\n", coord_x(coord, b), coord_y(coord, b)); board_print(b, stderr); } b->superko_violation = false; } } #endif if (PLDEBUGL(7)) { fprintf(stderr, "%s %s\n", stone2str(color), coord2sstr(coord, b)); if (PLDEBUGL(8)) board_print(b, stderr); } if (unlikely(is_pass(coord))) { passes++; } else { passes = 0; } if (amafmap) { assert(amafmap->gamelen < MAX_GAMELEN); amafmap->is_ko_capture[amafmap->gamelen] = board_playing_ko_threat(b); amafmap->game[amafmap->gamelen++] = coord; } if (setup->mercymin && abs(b->captures[S_BLACK] - b->captures[S_WHITE]) > setup->mercymin) break; color = stone_other(color); } floating_t score = board_fast_score(b); int result = (starting_color == S_WHITE ? score * 2 : - (score * 2)); if (DEBUGL(6)) { fprintf(stderr, "Random playout result: %d (W %f)\n", result, score); if (DEBUGL(7)) board_print(b, stderr); } if (ownermap) board_ownermap_fill(ownermap, b); if (b->ps) free(b->ps); #ifdef DEBUGL_BY_PLAYOUT debug_level = debug_level_orig; #endif return result; }