in hanabi-learning-environment/hanabi_lib/canonical_encoders.cc [293:422]
int EncodeLastAction_(const HanabiGame& game,
const HanabiObservation& obs,
int start_offset,
const std::vector<int>& order,
bool shuffle_color,
const std::vector<int>& color_permute,
std::vector<float>* encoding) {
int num_colors = game.NumColors();
int num_ranks = game.NumRanks();
int num_players = game.NumPlayers();
int hand_size = game.HandSize();
int offset = start_offset;
const HanabiHistoryItem* last_move = GetLastNonDealMove(obs.LastMoves());
if (last_move == nullptr) {
offset += LastActionSectionLength(game);
} else {
HanabiMove::Type last_move_type = last_move->move.MoveType();
// player_id
// Note: no assertion here. At a terminal state, the last player could have
// been me (player id 0).
(*encoding)[offset + last_move->player] = 1;
offset += num_players;
// move type
switch (last_move_type) {
case HanabiMove::Type::kPlay:
(*encoding)[offset] = 1;
break;
case HanabiMove::Type::kDiscard:
(*encoding)[offset + 1] = 1;
break;
case HanabiMove::Type::kRevealColor:
(*encoding)[offset + 2] = 1;
break;
case HanabiMove::Type::kRevealRank:
(*encoding)[offset + 3] = 1;
break;
default:
std::abort();
}
offset += 4;
// target player (if hint action)
if (last_move_type == HanabiMove::Type::kRevealColor ||
last_move_type == HanabiMove::Type::kRevealRank) {
int8_t observer_relative_target =
(last_move->player + last_move->move.TargetOffset()) % num_players;
(*encoding)[offset + observer_relative_target] = 1;
}
offset += num_players;
// color (if hint action)
if (last_move_type == HanabiMove::Type::kRevealColor) {
int color = last_move->move.Color();
if (shuffle_color) {
color = color_permute[color];
}
(*encoding)[offset + color] = 1;
}
offset += num_colors;
// rank (if hint action)
if (last_move_type == HanabiMove::Type::kRevealRank) {
(*encoding)[offset + last_move->move.Rank()] = 1;
}
offset += num_ranks;
if (!order.empty()) {
// when there are 2 players, we do not need to permute outcome
// as hinted cards are always our cards
// if there are more than 1 players, then this may not be true
// we have not implemented the case for >2 players, so assert here
assert(num_players == 2);
}
// outcome (if hinted action)
if (last_move_type == HanabiMove::Type::kRevealColor ||
last_move_type == HanabiMove::Type::kRevealRank) {
for (int i = 0, mask = 1; i < hand_size; ++i, mask <<= 1) {
if ((last_move->reveal_bitmask & mask) > 0) {
(*encoding)[offset + i] = 1;
}
}
}
offset += hand_size;
// position (if play or discard action)
// play & discard should always be permuted,
// because it is always partner's action, revealing info on partner's hand
if (last_move_type == HanabiMove::Type::kPlay ||
last_move_type == HanabiMove::Type::kDiscard) {
if (order.size() > 0) {
// does nothing
// std::cout << "hand idx" << hand_idx << std::endl;
// hand_idx = order[hand_idx];
} else {
// in normal mode, tells you which card was played/discarded
int hand_idx = last_move->move.CardIndex();
(*encoding)[offset + hand_idx] = 1;
}
}
offset += hand_size;
// card (if play or discard action)
if (last_move_type == HanabiMove::Type::kPlay ||
last_move_type == HanabiMove::Type::kDiscard) {
assert(last_move->color >= 0);
assert(last_move->rank >= 0);
int card_idx = CardIndex(
last_move->color, last_move->rank, num_ranks, shuffle_color, color_permute);
(*encoding)[offset + card_idx] = 1;
}
offset += BitsPerCard(game);
// was successful and/or added information token (if play action)
if (last_move_type == HanabiMove::Type::kPlay) {
if (last_move->scored) {
(*encoding)[offset] = 1;
}
if (last_move->information_token) {
(*encoding)[offset + 1] = 1;
}
}
offset += 2;
}
assert(offset - start_offset == LastActionSectionLength(game));
return offset - start_offset;
}