in src/io/train_share_states.cpp [251:385]
void TrainingShareStates::CalcBinOffsets(const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
std::vector<uint32_t>* offsets, bool in_is_col_wise) {
offsets->clear();
feature_hist_offsets_.clear();
if (in_is_col_wise) {
uint32_t cur_num_bin = 0;
uint32_t hist_cur_num_bin = 0;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
if (feature_group->is_dense_multi_val_) {
for (int i = 0; i < feature_group->num_feature_; ++i) {
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) {
cur_num_bin += 1;
hist_cur_num_bin += 1;
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
int num_bin = bin_mapper->num_bin();
hist_cur_num_bin += num_bin;
if (bin_mapper->GetMostFreqBin() == 0) {
feature_hist_offsets_.back() += 1;
}
cur_num_bin += num_bin;
}
offsets->push_back(cur_num_bin);
CHECK(cur_num_bin == feature_group->bin_offsets_.back());
} else {
cur_num_bin += 1;
hist_cur_num_bin += 1;
for (int i = 0; i < feature_group->num_feature_; ++i) {
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
int num_bin = bin_mapper->num_bin();
if (bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
}
hist_cur_num_bin += num_bin;
cur_num_bin += num_bin;
}
offsets->push_back(cur_num_bin);
CHECK(cur_num_bin == feature_group->bin_offsets_.back());
}
} else {
for (int i = 0; i < feature_group->num_feature_; ++i) {
feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]);
}
hist_cur_num_bin += feature_group->bin_offsets_.back();
}
}
feature_hist_offsets_.push_back(hist_cur_num_bin);
num_hist_total_bin_ = static_cast<int>(feature_hist_offsets_.back());
} else {
double sum_dense_ratio = 0.0f;
int ncol = 0;
for (int gid = 0; gid < static_cast<int>(feature_groups.size()); ++gid) {
if (feature_groups[gid]->is_multi_val_) {
ncol += feature_groups[gid]->num_feature_;
} else {
++ncol;
}
for (int fid = 0; fid < feature_groups[gid]->num_feature_; ++fid) {
const auto& bin_mapper = feature_groups[gid]->bin_mappers_[fid];
sum_dense_ratio += 1.0f - bin_mapper->sparse_rate();
}
}
sum_dense_ratio /= ncol;
const bool is_sparse_row_wise = (1.0f - sum_dense_ratio) >=
MultiValBin::multi_val_bin_sparse_threshold ? 1 : 0;
if (is_sparse_row_wise) {
int cur_num_bin = 1;
uint32_t hist_cur_num_bin = 1;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
for (int i = 0; i < feature_group->num_feature_; ++i) {
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
int num_bin = bin_mapper->num_bin();
if (bin_mapper->GetMostFreqBin() == 0) {
num_bin -= 1;
}
cur_num_bin += num_bin;
hist_cur_num_bin += num_bin;
}
} else {
offsets->push_back(cur_num_bin);
cur_num_bin += feature_group->bin_offsets_.back() - 1;
for (int i = 0; i < feature_group->num_feature_; ++i) {
feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i] - 1);
}
hist_cur_num_bin += feature_group->bin_offsets_.back() - 1;
}
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
} else {
int cur_num_bin = 0;
uint32_t hist_cur_num_bin = 0;
for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
if (feature_group->is_multi_val_) {
for (int i = 0; i < feature_group->num_feature_; ++i) {
const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) {
cur_num_bin += 1;
hist_cur_num_bin += 1;
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
int num_bin = bin_mapper->num_bin();
cur_num_bin += num_bin;
hist_cur_num_bin += num_bin;
if (bin_mapper->GetMostFreqBin() == 0) {
feature_hist_offsets_.back() += 1;
}
}
} else {
offsets->push_back(cur_num_bin);
cur_num_bin += feature_group->bin_offsets_.back();
for (int i = 0; i < feature_group->num_feature_; ++i) {
feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]);
}
hist_cur_num_bin += feature_group->bin_offsets_.back();
}
}
offsets->push_back(cur_num_bin);
feature_hist_offsets_.push_back(hist_cur_num_bin);
}
num_hist_total_bin_ = static_cast<int>(feature_hist_offsets_.back());
}
}