in hanabi-learning-environment/hanabi_lib/canonical_encoders.cc [521:615]
int EncodeV0Belief_(const HanabiGame& game,
const HanabiObservation& obs,
int start_offset,
const std::vector<int>& order,
bool shuffle_color,
const std::vector<int>& color_permute,
std::vector<float>* encoding,
std::vector<int>* ret_card_count,
bool publ) {
// int bits_per_card = BitsPerCard(game);
int num_colors = game.NumColors();
int num_ranks = game.NumRanks();
int num_players = game.NumPlayers();
int hand_size = game.HandSize();
// compute public card count
std::vector<int> card_count = ComputeCardCount(
game, obs, shuffle_color, color_permute, publ);
if (ret_card_count != nullptr) {
*ret_card_count = card_count;
}
// card knowledge
const int len = EncodeCardKnowledge(
game, obs, start_offset, order, shuffle_color, color_permute, encoding);
const int player_offset = len / num_players;
const int per_card_offset = len / hand_size / num_players;
assert(per_card_offset == num_colors * num_ranks + num_colors + num_ranks);
auto ref_encoding = *encoding;
const std::vector<HanabiHand>& hands = obs.Hands();
for (int player_id = 0; player_id < num_players; ++player_id) {
int num_cards = hands[player_id].Cards().size();
for (int card_idx = 0; card_idx < num_cards; ++card_idx) {
float total = 0;
for (int i = 0; i < num_colors * num_ranks; ++i) {
int offset = (start_offset
+ player_offset * player_id
+ card_idx * per_card_offset
+ i);
// std::cout << offset << ", " << len << std::endl;
assert(offset - start_offset < len);
(*encoding)[offset] *= card_count[i];
total += (*encoding)[offset];
}
if (total <= 0) {
// const std::vector<HanabiHand>& hands = obs.Hands();
std::cout << "publ? " << publ << std::endl;
std::cout << "encoding size: " << encoding->size() << std::endl;
std::cout << hands[0].Cards().size() << std::endl;
std::cout << hands[1].Cards().size() << std::endl;
std::cout << "player idx: " << player_id
<< ", card idx: " << card_idx
<< ", hand: " << std::endl;
std::cout << hands[player_id].ToString()
<< std::endl;
std::cout << "total = 0 " << std::endl;
std::cout << "card count" << std::endl;
for (size_t x = 0; x < num_colors * num_ranks; ++x) {
std::cout << card_count[x] << ", ";
if ((x+1) % 5 == 0) {
std::cout << std::endl;
}
}
std::cout << "ck" << std::endl;
for (size_t x = 0; x < num_colors * num_ranks; ++x) {
int offset = (start_offset
+ player_offset * player_id
+ card_idx * per_card_offset
+ x);
std::cout << ref_encoding[offset] << ", ";
if ((x+1) % 5 == 0) {
std::cout << std::endl;
}
}
std::cout << "Game Seed: " << game.Seed() << std::endl;
std::cout << "OBS:" << std::endl;
std::cout << obs.ToString() << std::endl;
assert(false);
}
for (int i = 0; i < num_colors * num_ranks; ++i) {
int offset = (start_offset
+ player_offset * player_id
+ card_idx * per_card_offset
+ i);
(*encoding)[offset] /= total;
}
}
if (!publ) {
break;
}
}
return len;
}