in simple_game/search.h [1304:1494]
ResultAgg _search2(const std::vector<Entry>& prefix,
const InfoSets& infoSets,
int playerIdx,
Analysis* analysis,
Stats* stats) const {
// From seed, iteratively add new infosets until we reach terminal.
//
// Preprocessing.
std::vector<std::vector<float>> f(infoSets.size());
auto start = std::chrono::high_resolution_clock::now();
for (int k = 0; k < (int)infoSets.size(); ++k) {
const auto& info = infoSets[k];
f[k].resize(info->numAction(), 0);
// if the policy didn't change, what would be the j1 term?
// Note this is dependent on upstream policies so we have to compute it
// here (otherwise we could precompute it)
if (options_.verbose == VERBOSE) {
std::cout << "=== " << printPrefix(prefix) << ", " << info->key()
<< " === " << std::endl;
}
for (const auto& s : info->states()) {
if ((options_.numSample > 0 || options_.numSampleTotal > 0) &&
!s->sampleActive()) {
// std::cout << "Skipping sample..." << std::endl;
continue;
}
float alterReach;
std::string comment;
const State* ss = s.get();
const State* p;
float prob = 1.0f;
int hop = 0;
while (true) {
p = ss->parent();
if (p == nullptr || p->infoSet().phi() >= 0)
break;
prob *= p->infoSet().strategy()[ss->parentActionIdx()];
ss = p;
hop++;
}
// Policy of active nodes is skipped since it is aways 1.
if (p == nullptr) {
// s doesn't connect to any active infoSet so the reach of s doesn't
// change.
alterReach = s->totalReach();
comment = "traceBackOriginal";
} else {
bool inActivePath = (p->infoSet().phi() == ss->parentActionIdx());
if (inActivePath) {
alterReach = p->alterReach() * prob;
} else {
alterReach = 0;
}
comment = "traceBackHop-" + std::to_string(hop);
}
s->setAlterReach(alterReach);
if (alterReach > 0) {
for (int a = 0; a < info->numAction(); ++a) {
float adv = s->child(a).u()[playerIdx] - s->u()[playerIdx];
f[k][a] += adv * alterReach;
}
}
if (analysis != nullptr) {
auto prefix2 = prefix;
prefix2.push_back(std::make_pair(s->key(), -1));
analysis->reachability.append(Result(prefix2, alterReach, comment));
}
if (options_.verbose == VERBOSE) {
std::cout << " " << s->key() << ", alterReach: " << alterReach
<< ", reach: " << s->totalReach()
<< ", u: " << s->u()[playerIdx];
}
}
if (options_.verbose == VERBOSE) {
std::cout << "J2[" << info->key() << "]: reach: " << info->totalReach()
<< ", u: " << info->u() << ", q: " << info->q()
<< ", pi: " << info->strategy() << std::endl;
}
}
if (options_.verbose == VERBOSE) {
std::cout << "_search2() summary: " << printPrefix(prefix)
<< " #infoSets(): " << infoSets.size() << std::endl;
for (int k = 0; k < (int)infoSets.size(); ++k) {
const auto& info = infoSets[k];
std::cout << " " << info->key()
<< ", #state: " << info->states().size() << std::endl;
}
}
if (stats != nullptr) {
auto stop = std::chrono::high_resolution_clock::now();
stats->time_states +=
std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
.count() /
1e6;
}
ResultAgg result;
if (options_.maxDepth <= 0 || options_.maxDepth > (int)prefix.size()) {
for (int k = 0; k < (int)infoSets.size(); ++k) {
const auto& info = infoSets[k];
for (int a = 0; a < info->numAction(); ++a) {
auto currPrefix = std::make_pair(info->key(), a);
if (options_.skipSameDeltaPolicy && info->isDeltaStrategy(a))
continue;
if (options_.use2ndOrder) {
for (int k2 = 0; k2 < k; ++k2) {
const auto& info2 = infoSets[k2];
for (int b = 0; b < info2->numAction(); ++b) {
auto currPrefix2 = std::make_pair(info2->key(), b);
if (options_.skipSameDeltaPolicy && info2->isDeltaStrategy(b))
continue;
float edge = f[k2][b] + f[k][a];
auto prefix2 = prefix;
prefix2.push_back(currPrefix);
prefix2.push_back(currPrefix2);
info->setPhi(a);
info2->setPhi(b);
auto nextInfoSets =
combineInfoSets(info->succ(a), info2->succ(b));
if (analysis != nullptr) {
auto prefix3 = prefix2;
prefix3.push_back(std::make_pair("edge2", -1));
analysis->terms.append(Result(prefix3, edge));
}
ResultAgg res =
_search2(prefix2, nextInfoSets, playerIdx, analysis, stats);
result.append(
res.attach(currPrefix, edge).attach(currPrefix2, 0));
info->setPhi(-1);
info2->setPhi(-1);
}
}
}
// What if we only improve one strategy?
if (!options_.skipSingleInfoSetOpt) {
float edge = f[k][a];
auto prefix2 = prefix;
prefix2.push_back(currPrefix);
if (analysis != nullptr) {
auto prefix3 = prefix2;
prefix3.push_back(std::make_pair("edge1", -1));
analysis->terms.append(Result(prefix3, edge));
}
info->setPhi(a);
ResultAgg res =
_search2(prefix2, info->succ(a), playerIdx, analysis, stats);
result.append(res.attach(currPrefix, edge));
info->setPhi(-1);
}
}
}
}
// Finally if no phi was set, what would be the performance?
result.append(Result(0));
for (int k = 0; k < (int)infoSets.size(); ++k) {
const auto& info = infoSets[k];
for (const auto& s : info->states()) {
s->clearAlterReach();
}
}
return result;
}