ResultAgg _search2()

in simple_game/search.h [1304:1494]


  ResultAgg _search2(const std::vector<Entry>& prefix,
                     const InfoSets& infoSets,
                     int playerIdx,
                     Analysis* analysis,
                     Stats* stats) const {
    // From seed, iteratively add new infosets until we reach terminal.
    //
    // Preprocessing.
    std::vector<std::vector<float>> f(infoSets.size());

    auto start = std::chrono::high_resolution_clock::now();
    for (int k = 0; k < (int)infoSets.size(); ++k) {
      const auto& info = infoSets[k];
      f[k].resize(info->numAction(), 0);
      // if the policy didn't change, what would be the j1 term?
      // Note this is dependent on upstream policies so we have to compute it
      // here (otherwise we could precompute it)
      if (options_.verbose == VERBOSE) {
        std::cout << "=== " << printPrefix(prefix) << ", " << info->key()
                  << " === " << std::endl;
      }

      for (const auto& s : info->states()) {
        if ((options_.numSample > 0 || options_.numSampleTotal > 0) &&
            !s->sampleActive()) {
          // std::cout << "Skipping sample..." << std::endl;
          continue;
        }

        float alterReach;
        std::string comment;

        const State* ss = s.get();
        const State* p;
        float prob = 1.0f;
        int hop = 0;
        while (true) {
          p = ss->parent();
          if (p == nullptr || p->infoSet().phi() >= 0)
            break;
          prob *= p->infoSet().strategy()[ss->parentActionIdx()];
          ss = p;
          hop++;
        }

        // Policy of active nodes is skipped since it is aways 1.
        if (p == nullptr) {
          // s doesn't connect to any active infoSet so the reach of s doesn't
          // change.
          alterReach = s->totalReach();
          comment = "traceBackOriginal";
        } else {
          bool inActivePath = (p->infoSet().phi() == ss->parentActionIdx());
          if (inActivePath) {
            alterReach = p->alterReach() * prob;
          } else {
            alterReach = 0;
          }
          comment = "traceBackHop-" + std::to_string(hop);
        }

        s->setAlterReach(alterReach);

        if (alterReach > 0) {
          for (int a = 0; a < info->numAction(); ++a) {
            float adv = s->child(a).u()[playerIdx] - s->u()[playerIdx];
            f[k][a] += adv * alterReach;
          }
        }

        if (analysis != nullptr) {
          auto prefix2 = prefix;
          prefix2.push_back(std::make_pair(s->key(), -1));
          analysis->reachability.append(Result(prefix2, alterReach, comment));
        }

        if (options_.verbose == VERBOSE) {
          std::cout << "  " << s->key() << ", alterReach: " << alterReach
                    << ", reach: " << s->totalReach()
                    << ", u: " << s->u()[playerIdx];
        }
      }

      if (options_.verbose == VERBOSE) {
        std::cout << "J2[" << info->key() << "]: reach: " << info->totalReach()
                  << ", u: " << info->u() << ", q: " << info->q()
                  << ", pi: " << info->strategy() << std::endl;
      }
    }

    if (options_.verbose == VERBOSE) {
      std::cout << "_search2() summary: " << printPrefix(prefix)
                << " #infoSets(): " << infoSets.size() << std::endl;
      for (int k = 0; k < (int)infoSets.size(); ++k) {
        const auto& info = infoSets[k];
        std::cout << "  " << info->key()
                  << ", #state: " << info->states().size() << std::endl;
      }
    }

    if (stats != nullptr) {
      auto stop = std::chrono::high_resolution_clock::now();
      stats->time_states +=
          std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
              .count() /
          1e6;
    }

    ResultAgg result;

    if (options_.maxDepth <= 0 || options_.maxDepth > (int)prefix.size()) {
      for (int k = 0; k < (int)infoSets.size(); ++k) {
        const auto& info = infoSets[k];

        for (int a = 0; a < info->numAction(); ++a) {
          auto currPrefix = std::make_pair(info->key(), a);

          if (options_.skipSameDeltaPolicy && info->isDeltaStrategy(a))
            continue;

          if (options_.use2ndOrder) {
            for (int k2 = 0; k2 < k; ++k2) {
              const auto& info2 = infoSets[k2];

              for (int b = 0; b < info2->numAction(); ++b) {
                auto currPrefix2 = std::make_pair(info2->key(), b);

                if (options_.skipSameDeltaPolicy && info2->isDeltaStrategy(b))
                  continue;

                float edge = f[k2][b] + f[k][a];

                auto prefix2 = prefix;
                prefix2.push_back(currPrefix);
                prefix2.push_back(currPrefix2);

                info->setPhi(a);
                info2->setPhi(b);

                auto nextInfoSets =
                    combineInfoSets(info->succ(a), info2->succ(b));

                if (analysis != nullptr) {
                  auto prefix3 = prefix2;
                  prefix3.push_back(std::make_pair("edge2", -1));
                  analysis->terms.append(Result(prefix3, edge));
                }

                ResultAgg res =
                    _search2(prefix2, nextInfoSets, playerIdx, analysis, stats);
                result.append(
                    res.attach(currPrefix, edge).attach(currPrefix2, 0));
                info->setPhi(-1);
                info2->setPhi(-1);
              }
            }
          }

          // What if we only improve one strategy?
          if (!options_.skipSingleInfoSetOpt) {
            float edge = f[k][a];

            auto prefix2 = prefix;
            prefix2.push_back(currPrefix);

            if (analysis != nullptr) {
              auto prefix3 = prefix2;
              prefix3.push_back(std::make_pair("edge1", -1));
              analysis->terms.append(Result(prefix3, edge));
            }

            info->setPhi(a);
            ResultAgg res =
                _search2(prefix2, info->succ(a), playerIdx, analysis, stats);
            result.append(res.attach(currPrefix, edge));
            info->setPhi(-1);
          }
        }
      }
    }
    // Finally if no phi was set, what would be the performance?
    result.append(Result(0));
    for (int k = 0; k < (int)infoSets.size(); ++k) {
      const auto& info = infoSets[k];
      for (const auto& s : info->states()) {
        s->clearAlterReach();
      }
    }

    return result;
  }