in simple_game/comm.h [188:233]
std::vector<float> getOptimalStrategy(const std::string &key) const override {
if (key == "s" || key.substr(0, 5) == "done-") return {};
// Skip "P1-r" or "P2-r";
int i = 4;
std::vector<int> actions;
while (i < (int)key.size()) {
if (key[i] == '-') break;
else {
actions.push_back(key[i] - '0');
i ++;
}
}
if (i == (int)key.size()) {
// Player 2
assert((int)actions.size() == commOptions_.numRound);
int n = 0;
for (int i = commOptions_.numRound - 1; i >= 0; --i) {
// Little Endian
n = ((n << 1) | actions[i]);
}
std::vector<float> policy(commOptions_.possibleCards, 0.0f);
if (n < (int)policy.size()) {
policy[n] = 1.0;
} else {
// Reachability should be zero here.
// But since the policy of any infoSet is requested, we need to return something here.
std::fill(policy.begin(), policy.end(), 1.0f / commOptions_.possibleCards);
}
return policy;
} else {
// Player 1. Binary encoding.
int n = std::stoi(key.substr(i + 1, -1));
std::vector<int> action_seq;
for (int k = 0; k < commOptions_.numRound; ++k) {
// Little Endian. 8 = 1000 => Action sequence [0, 0, 0, 1].
action_seq.push_back(n % 2);
n /= 2;
}
if (action_seq[actions.size()] == 0) return {1.0, 0.0};
else return {0.0, 1.0};
}
}