void TrainingShareStates::CalcBinOffsets()

in src/io/train_share_states.cpp [251:385]


void TrainingShareStates::CalcBinOffsets(const std::vector<std::unique_ptr<FeatureGroup>>& feature_groups,
  std::vector<uint32_t>* offsets, bool in_is_col_wise) {
  offsets->clear();
  feature_hist_offsets_.clear();
  if (in_is_col_wise) {
    uint32_t cur_num_bin = 0;
    uint32_t hist_cur_num_bin = 0;
    for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
      const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
      if (feature_group->is_multi_val_) {
        if (feature_group->is_dense_multi_val_) {
          for (int i = 0; i < feature_group->num_feature_; ++i) {
            const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
            if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) {
              cur_num_bin += 1;
              hist_cur_num_bin += 1;
            }
            offsets->push_back(cur_num_bin);
            feature_hist_offsets_.push_back(hist_cur_num_bin);
            int num_bin = bin_mapper->num_bin();
            hist_cur_num_bin += num_bin;
            if (bin_mapper->GetMostFreqBin() == 0) {
              feature_hist_offsets_.back() += 1;
            }
            cur_num_bin += num_bin;
          }
          offsets->push_back(cur_num_bin);
          CHECK(cur_num_bin == feature_group->bin_offsets_.back());
        } else {
          cur_num_bin += 1;
          hist_cur_num_bin += 1;
          for (int i = 0; i < feature_group->num_feature_; ++i) {
            offsets->push_back(cur_num_bin);
            feature_hist_offsets_.push_back(hist_cur_num_bin);
            const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
            int num_bin = bin_mapper->num_bin();
            if (bin_mapper->GetMostFreqBin() == 0) {
              num_bin -= 1;
            }
            hist_cur_num_bin += num_bin;
            cur_num_bin += num_bin;
          }
          offsets->push_back(cur_num_bin);
          CHECK(cur_num_bin == feature_group->bin_offsets_.back());
        }
      } else {
        for (int i = 0; i < feature_group->num_feature_; ++i) {
          feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]);
        }
        hist_cur_num_bin += feature_group->bin_offsets_.back();
      }
    }
    feature_hist_offsets_.push_back(hist_cur_num_bin);
    num_hist_total_bin_ = static_cast<int>(feature_hist_offsets_.back());
  } else {
    double sum_dense_ratio = 0.0f;
    int ncol = 0;
    for (int gid = 0; gid < static_cast<int>(feature_groups.size()); ++gid) {
      if (feature_groups[gid]->is_multi_val_) {
        ncol += feature_groups[gid]->num_feature_;
      } else {
        ++ncol;
      }
      for (int fid = 0; fid < feature_groups[gid]->num_feature_; ++fid) {
        const auto& bin_mapper = feature_groups[gid]->bin_mappers_[fid];
        sum_dense_ratio += 1.0f - bin_mapper->sparse_rate();
      }
    }
    sum_dense_ratio /= ncol;
    const bool is_sparse_row_wise = (1.0f - sum_dense_ratio) >=
      MultiValBin::multi_val_bin_sparse_threshold ? 1 : 0;
    if (is_sparse_row_wise) {
      int cur_num_bin = 1;
      uint32_t hist_cur_num_bin = 1;
      for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
        const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
        if (feature_group->is_multi_val_) {
          for (int i = 0; i < feature_group->num_feature_; ++i) {
            offsets->push_back(cur_num_bin);
            feature_hist_offsets_.push_back(hist_cur_num_bin);
            const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
            int num_bin = bin_mapper->num_bin();
            if (bin_mapper->GetMostFreqBin() == 0) {
              num_bin -= 1;
            }
            cur_num_bin += num_bin;
            hist_cur_num_bin += num_bin;
          }
        } else {
          offsets->push_back(cur_num_bin);
          cur_num_bin += feature_group->bin_offsets_.back() - 1;
          for (int i = 0; i < feature_group->num_feature_; ++i) {
            feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i] - 1);
          }
          hist_cur_num_bin += feature_group->bin_offsets_.back() - 1;
        }
      }
      offsets->push_back(cur_num_bin);
      feature_hist_offsets_.push_back(hist_cur_num_bin);
    } else {
      int cur_num_bin = 0;
      uint32_t hist_cur_num_bin = 0;
      for (int group = 0; group < static_cast<int>(feature_groups.size()); ++group) {
        const std::unique_ptr<FeatureGroup>& feature_group = feature_groups[group];
        if (feature_group->is_multi_val_) {
          for (int i = 0; i < feature_group->num_feature_; ++i) {
            const std::unique_ptr<BinMapper>& bin_mapper = feature_group->bin_mappers_[i];
            if (group == 0 && i == 0 && bin_mapper->GetMostFreqBin() > 0) {
              cur_num_bin += 1;
              hist_cur_num_bin += 1;
            }
            offsets->push_back(cur_num_bin);
            feature_hist_offsets_.push_back(hist_cur_num_bin);
            int num_bin = bin_mapper->num_bin();
            cur_num_bin += num_bin;
            hist_cur_num_bin += num_bin;
            if (bin_mapper->GetMostFreqBin() == 0) {
              feature_hist_offsets_.back() += 1;
            }
          }
        } else {
          offsets->push_back(cur_num_bin);
          cur_num_bin += feature_group->bin_offsets_.back();
          for (int i = 0; i < feature_group->num_feature_; ++i) {
            feature_hist_offsets_.push_back(hist_cur_num_bin + feature_group->bin_offsets_[i]);
          }
          hist_cur_num_bin += feature_group->bin_offsets_.back();
        }
      }
      offsets->push_back(cur_num_bin);
      feature_hist_offsets_.push_back(hist_cur_num_bin);
    }
    num_hist_total_bin_ = static_cast<int>(feature_hist_offsets_.back());
  }
}