in src/loss.cc [203:245]
void HierarchicalSoftmaxLoss::buildTree(const std::vector<int64_t>& counts) {
tree_.resize(2 * osz_ - 1);
for (int32_t i = 0; i < 2 * osz_ - 1; i++) {
tree_[i].parent = -1;
tree_[i].left = -1;
tree_[i].right = -1;
tree_[i].count = 1e15;
tree_[i].binary = false;
}
for (int32_t i = 0; i < osz_; i++) {
tree_[i].count = counts[i];
}
int32_t leaf = osz_ - 1;
int32_t node = osz_;
for (int32_t i = osz_; i < 2 * osz_ - 1; i++) {
int32_t mini[2] = {0};
for (int32_t j = 0; j < 2; j++) {
if (leaf >= 0 && tree_[leaf].count < tree_[node].count) {
mini[j] = leaf--;
} else {
mini[j] = node++;
}
}
tree_[i].left = mini[0];
tree_[i].right = mini[1];
tree_[i].count = tree_[mini[0]].count + tree_[mini[1]].count;
tree_[mini[0]].parent = i;
tree_[mini[1]].parent = i;
tree_[mini[1]].binary = true;
}
for (int32_t i = 0; i < osz_; i++) {
std::vector<int32_t> path;
std::vector<bool> code;
int32_t j = i;
while (tree_[j].parent != -1) {
path.push_back(tree_[j].parent - osz_);
code.push_back(tree_[j].binary);
j = tree_[j].parent;
}
paths_.push_back(path);
codes_.push_back(code);
}
}