int EncodeLastAction_()

in hanabi-learning-environment/hanabi_lib/canonical_encoders.cc [293:422]


int EncodeLastAction_(const HanabiGame& game,
                      const HanabiObservation& obs,
                      int start_offset,
                      const std::vector<int>& order,
                      bool shuffle_color,
                      const std::vector<int>& color_permute,
                      std::vector<float>* encoding) {
  int num_colors = game.NumColors();
  int num_ranks = game.NumRanks();
  int num_players = game.NumPlayers();
  int hand_size = game.HandSize();

  int offset = start_offset;
  const HanabiHistoryItem* last_move = GetLastNonDealMove(obs.LastMoves());
  if (last_move == nullptr) {
    offset += LastActionSectionLength(game);
  } else {
    HanabiMove::Type last_move_type = last_move->move.MoveType();

    // player_id
    // Note: no assertion here. At a terminal state, the last player could have
    // been me (player id 0).
    (*encoding)[offset + last_move->player] = 1;
    offset += num_players;

    // move type
    switch (last_move_type) {
      case HanabiMove::Type::kPlay:
        (*encoding)[offset] = 1;
        break;
      case HanabiMove::Type::kDiscard:
        (*encoding)[offset + 1] = 1;
        break;
      case HanabiMove::Type::kRevealColor:
        (*encoding)[offset + 2] = 1;
        break;
      case HanabiMove::Type::kRevealRank:
        (*encoding)[offset + 3] = 1;
        break;
      default:
        std::abort();
    }
    offset += 4;

    // target player (if hint action)
    if (last_move_type == HanabiMove::Type::kRevealColor ||
        last_move_type == HanabiMove::Type::kRevealRank) {
      int8_t observer_relative_target =
          (last_move->player + last_move->move.TargetOffset()) % num_players;
      (*encoding)[offset + observer_relative_target] = 1;
    }
    offset += num_players;

    // color (if hint action)
    if (last_move_type == HanabiMove::Type::kRevealColor) {
      int color = last_move->move.Color();
      if (shuffle_color) {
        color = color_permute[color];
      }
      (*encoding)[offset + color] = 1;
    }
    offset += num_colors;

    // rank (if hint action)
    if (last_move_type == HanabiMove::Type::kRevealRank) {
      (*encoding)[offset + last_move->move.Rank()] = 1;
    }
    offset += num_ranks;

    if (!order.empty()) {
      // when there are 2 players, we do not need to permute outcome
      // as hinted cards are always our cards
      // if there are more than 1 players, then this may not be true
      // we have not implemented the case for >2 players, so assert here
      assert(num_players == 2);
    }
    // outcome (if hinted action)
    if (last_move_type == HanabiMove::Type::kRevealColor ||
        last_move_type == HanabiMove::Type::kRevealRank) {
      for (int i = 0, mask = 1; i < hand_size; ++i, mask <<= 1) {
        if ((last_move->reveal_bitmask & mask) > 0) {
          (*encoding)[offset + i] = 1;
        }
      }
    }
    offset += hand_size;

    // position (if play or discard action)
    // play & discard should always be permuted,
    // because it is always partner's action, revealing info on partner's hand
    if (last_move_type == HanabiMove::Type::kPlay ||
        last_move_type == HanabiMove::Type::kDiscard) {
      if (order.size() > 0) {
        // does nothing
        // std::cout << "hand idx" << hand_idx << std::endl;
        // hand_idx = order[hand_idx];
      } else {
        // in normal mode, tells you which card was played/discarded
        int hand_idx = last_move->move.CardIndex();
        (*encoding)[offset + hand_idx] = 1;
      }
    }
    offset += hand_size;

    // card (if play or discard action)
    if (last_move_type == HanabiMove::Type::kPlay ||
        last_move_type == HanabiMove::Type::kDiscard) {
      assert(last_move->color >= 0);
      assert(last_move->rank >= 0);
      int card_idx = CardIndex(
          last_move->color, last_move->rank, num_ranks, shuffle_color, color_permute);
      (*encoding)[offset + card_idx] = 1;
    }
    offset += BitsPerCard(game);

    // was successful and/or added information token (if play action)
    if (last_move_type == HanabiMove::Type::kPlay) {
      if (last_move->scored) {
        (*encoding)[offset] = 1;
      }
      if (last_move->information_token) {
        (*encoding)[offset + 1] = 1;
      }
    }
    offset += 2;
  }

  assert(offset - start_offset == LastActionSectionLength(game));
  return offset - start_offset;
}