in simple_game/simple_hanabi.h [327:406]
void step(int action) override {
if (allCards_.empty()) {
// Initialize the game.
for (int suit = 0; suit < (int)options_.cards.size(); ++suit) {
for (int rank = 0; rank < (int)options_.cards[suit].size(); ++rank) {
for (int k = 0; k < options_.cards[suit][rank]; ++k) {
allCards_.emplace_back(suit, rank);
}
}
}
std::mt19937 rng(options_.seeds[action]);
// Shuffle the card.
std::shuffle(allCards_.begin(), allCards_.end(), rng);
cardIdx_ = 0;
// Deal the card.
hands_.resize(options_.numPlayer);
for (int i = 0; i < options_.numPlayer; ++i) {
hands_[i].resize(options_.numHold);
for (int j = 0; j < options_.numHold; ++j) {
hands_[i][j] = allCards_[cardIdx_++];
}
}
cardUsed_ = 0;
currPlayer_ = 1;
hints_ = options_.initHints;
lives_ = options_.initLives;
currTops_ = std::vector<int>(options_.cards.size(), -1);
return;
}
assert(currPlayer_ >= 1 && currPlayer_ <= options_.numPlayer);
Action a = parser_.decode(action);
// During the game play.
std::string pubAct = "p" + std::to_string(currPlayer_) + "_";
if (a.type == HINT) {
assert(hints_ > 0);
// Hints.
auto hintCard = hands_[a.hintPlayer - 1][a.cardIdx];
// Note that the public information is the index not the card itself (the
// player being hinted doesn't know the card).
pubAct += a.info() + "(" + hintCard.hintStr(a.hintType) + ")";
publicActions_.push_back(pubAct);
hints_--;
_nextPlayer();
return;
}
auto& card = hands_[currPlayer_ - 1][a.cardIdx];
assert(card.valid());
// The card is revealed after the action. so we need to put them to pubAct.
pubAct += a.info() + "(" + card.info(false) + ")";
publicActions_.push_back(pubAct);
if (a.type == PLAY) {
if (card.rank == currTops_[card.suit] + 1) {
currTops_[card.suit]++;
} else {
lives_--;
}
} else {
// Discard card.
hints_++;
}
cardUsed_++;
// Take another card from the pool
if (cardIdx_ < (int)allCards_.size()) {
card = allCards_[cardIdx_++];
} else {
// Missing card.
card.setInvalid();
}
_nextPlayer();
}