in simple_game/game_utils.h [44:153]
inline tabular::Policies loadPolicy(rela::Env &env, const std::string& filename) {
std::ifstream iFile(filename);
std::vector<std::string> lines;
std::string line;
while (std::getline(iFile, line)) {
lines.push_back(line);
}
// loop backwards to check the last eval iter
const std::string kStart = "eval iter";
const std::string kRegret = "regret:";
std::vector<std::pair<int, int>> regrets;
for (size_t i = 0; i < lines.size(); ++i) {
const std::string& l = lines[i];
if (l.size() >= kStart.size() && l.substr(0, kStart.size()) == kStart) {
auto pos = l.find(kRegret);
assert(pos != std::string::npos);
pos += kRegret.size();
auto j = l.find("(", pos);
if (j == std::string::npos) {
j = l.length();
}
int regret = std::stoi(l.substr(pos, j - pos));
regrets.emplace_back(regret, i);
}
}
// Find lowest regret.
auto it = std::min_element(regrets.begin() + 1, regrets.end());
std::cout << "Found min regret: " << it->first << std::endl;
it --;
int start = it->second + 3;
tabular::Policies policy;
int j = 0;
int N = -1;
std::cout << "Start parsing at line: " << start << std::endl;
// start parsing.
while (lines[start][0] == '|') {
auto items = split(lines[start], '|');
/*
for (const auto& item : items) {
std::cout << "\"" << item << "\"" << " ";
}
std::cout << std::endl;
*/
if (j == 0) {
N = int(items.size()) - 3;
std::cout << "N: " << N << std::endl;
}
assert(items.size() == N + 3);
for (int i = 2; i < (int)items.size() - 1; ++i) {
auto bids = split(trim(items[i]), ' ');
int c1 = j;
int c2 = i - 2;
// std::cout << "c1=" << c1 << ",c2=" << c2 << ": " << bids << std::endl;
env.reset();
// First action.
env.step(c1 + c2 * N);
// Going to setup a bunch of policies.
for (size_t k = 0; k < bids.size(); ++k) {
int action = env.str2action(bids[k]);
auto legalActions = env.legalActions();
std::vector<float> pi(legalActions.size(), 0.0);
int legalActionLoc = -1;
for (size_t kk = 0; kk < legalActions.size(); ++kk) {
if (legalActions[kk].first == action) {
pi[kk] = 1.0;
legalActionLoc = kk;
break;
}
}
assert(legalActionLoc != -1);
auto key = env.infoSet();
auto res = policy.emplace(key, pi);
if (! res.second) {
// There is already a policy. Make sure they are the same.
const auto& prevPi = res.first->second;
int prevAction = std::max_element(prevPi.begin(), prevPi.end()) - prevPi.begin();
if (prevAction != legalActionLoc) {
std::cout << "Error!! Same infoSet [" << key << "], different action ["
<< legalActionLoc << ", " << prevAction << "], legalActions: " << legalActions << std::endl;
assert(false);
}
}
env.step(action);
}
assert(env.terminated());
}
start ++;
j ++;
}
std::cout << "#Policy: " << policy.size() << std::endl;
return policy;
}