ResultAgg _search()

in simple_game/search.h [1496:1687]


  ResultAgg _search(const std::vector<Entry>& prefix,
                    const InfoSets& infoSets,
                    int playerIdx,
                    Analysis* analysis) const {
    // From seed, iteratively add new infosets until we reach terminal.
    //
    // Preprocessing.
    std::vector<float> j1s(infoSets.size(), 0.0f);
    std::vector<float> j3s(infoSets.size(), 0.0f);

    for (int k = 0; k < (int)infoSets.size(); ++k) {
      const auto& info = infoSets[k];
      // if the policy didn't change, what would be the j1 term?
      // Note this is dependent on upstream policies so we have to compute it
      // here (otherwise we could precompute it)
      if (options_.verbose == VERBOSE) {
        std::cout << "=== " << printPrefix(prefix) << ", " << info->key()
                  << " === " << std::endl;
      }

      for (const auto& s : info->states()) {
        // For each state, compute J1, which is purely due to analysis change.
        // Trace analysis.
        float alterReach;
        std::string comment;

        const State* ss = s.get();
        const State* p;
        float prob = 1.0f;
        int hop = 0;
        while (true) {
          p = ss->parent();
          if (p == nullptr || p->infoSet().phi() >= 0)
            break;
          prob *= p->infoSet().strategy()[ss->parentActionIdx()];
          ss = p;
          hop++;
        }

        // Policy of active nodes is skipped since it is aways 1.
        if (p == nullptr) {
          // s doesn't connect to any active infoSet so the reach of s doesn't
          // change.
          alterReach = s->totalReach();
          comment = "traceBackOriginal";
        } else {
          bool inActivePath = (p->infoSet().phi() == ss->parentActionIdx());
          if (inActivePath) {
            alterReach = p->alterReach() * prob;
          } else {
            alterReach = 0;
          }
          float term = (alterReach - s->totalReach()) * s->u()[playerIdx];
          if (hop == 0 && inActivePath) {
            // Then s is an immediate descent of active infoSets
            j1s[k] += term;
          } else {
            // Then s connect to some active infoSet so its reachability has
            // changed. Note that s has been accounted by one infoSet I' aside
            // to the active infoSet,
            //      assuming v(s) won't change once it leaves I'.
            // Now s comes back and we want to make corrections.
            j3s[k] += term;
          }
          comment = "traceBackHop-" + std::to_string(hop);
        }

        s->setAlterReach(alterReach);

        if (analysis != nullptr) {
          auto prefix2 = prefix;
          prefix2.push_back(std::make_pair(s->key(), -1));
          analysis->reachability.append(Result(prefix2, alterReach, comment));
        }

        if (options_.verbose == VERBOSE) {
          std::cout << "  " << s->key() << ", alterReach: " << alterReach
                    << ", reach: " << s->totalReach()
                    << ", u: " << s->u()[playerIdx] << ", j1: " << j1s[k]
                    << ", j3: " << j3s[k] << std::endl;
        }
      }

      if (options_.verbose == VERBOSE) {
        std::cout << "J2[" << info->key() << "]: reach: " << info->totalReach()
                  << ", u: " << info->u() << ", q: " << info->q()
                  << ", pi: " << info->strategy() << std::endl;
      }
    }

    float sumJ1 = std::accumulate(j1s.begin(), j1s.end(), 0.0f);

    if (options_.verbose == VERBOSE) {
      std::cout << "_search() summary: " << printPrefix(prefix)
                << " #infoSets(): " << infoSets.size() << std::endl;
      for (int k = 0; k < (int)infoSets.size(); ++k) {
        const auto& info = infoSets[k];
        std::cout << "  " << info->key()
                  << ", #state: " << info->states().size() << std::endl;
      }
    }

    ResultAgg result;

    if (options_.maxDepth <= 0 || options_.maxDepth > (int)prefix.size()) {
      for (int k = 0; k < (int)infoSets.size(); ++k) {
        const auto& info = infoSets[k];

        for (int a = 0; a < info->numAction(); ++a) {
          auto currPrefix = std::make_pair(info->key(), a);

          if (options_.skipSameDeltaPolicy && info->isDeltaStrategy(a))
            continue;

          if (options_.use2ndOrder) {
            for (int k2 = 0; k2 < k; ++k2) {
              const auto& info2 = infoSets[k2];

              for (int b = 0; b < info2->numAction(); ++b) {
                auto currPrefix2 = std::make_pair(info2->key(), b);

                if (options_.skipSameDeltaPolicy && info2->isDeltaStrategy(b))
                  continue;

                float edge = sumJ1 - j1s[k] - j3s[k] - j1s[k2] - j3s[k2] +
                             info->residue(a) + info2->residue(b);

                auto prefix2 = prefix;
                prefix2.push_back(currPrefix);
                prefix2.push_back(currPrefix2);

                info->setPhi(a);
                info2->setPhi(b);

                auto nextInfoSets =
                    combineInfoSets(info->succ(a), info2->succ(b));

                if (analysis != nullptr) {
                  auto prefix3 = prefix2;
                  prefix3.push_back(std::make_pair("edge2", -1));
                  analysis->terms.append(Result(prefix3, edge));
                }

                ResultAgg res =
                    _search(prefix2, nextInfoSets, playerIdx, analysis);
                result.append(
                    res.attach(currPrefix, edge).attach(currPrefix2, 0));
                info->setPhi(-1);
                info2->setPhi(-1);
              }
            }
          }

          // What if we only improve one strategy?
          if (!options_.skipSingleInfoSetOpt) {
            float edge = sumJ1 - j1s[k] - j3s[k] + info->residue(a);

            auto prefix2 = prefix;
            prefix2.push_back(currPrefix);

            if (analysis != nullptr) {
              auto prefix3 = prefix2;
              prefix3.push_back(std::make_pair("edge1", -1));
              analysis->terms.append(Result(prefix3, edge));
            }

            info->setPhi(a);
            ResultAgg res =
                _search(prefix2, info->succ(a), playerIdx, analysis);
            result.append(res.attach(currPrefix, edge));
            info->setPhi(-1);
          }
        }
      }
    }
    // Finally if no phi was set, what would be the performance?
    result.append(Result(sumJ1));
    if (analysis != nullptr) {
      auto prefix3 = prefix;
      prefix3.push_back(std::make_pair("sumJ1", -1));
      analysis->terms.append(Result(prefix3, sumJ1));
    }

    for (int k = 0; k < (int)infoSets.size(); ++k) {
      const auto& info = infoSets[k];
      for (const auto& s : info->states()) {
        s->clearAlterReach();
      }
    }

    return result;
  }