void ParallelSampler::preproc_ppr_approximate()

in para_graph_sampler/graph_engine/backend/ParallelSampler.cpp [237:344]


void ParallelSampler::preproc_ppr_approximate(std::vector<NodeType>& preproc_target,
      int k, float alpha, float epsilon, std::string fname_neighs, std::string fname_scores) {
  auto n = graph_full.num_nodes;
  top_ppr_neighs.resize(n);
  top_ppr_scores.resize(n);
  alpha = 1 - alpha;
  // check if ppr vec / neighs have been computed and stored
  if (read_PPR_from_binary_file(fname_neighs, fname_scores, k, alpha, epsilon)) {
    std::cout << "LOADING PPR INFO FROM EXTERNAL FILE: " << fname_neighs << " and " << fname_scores << std::endl;
    return;
  }
  std::vector<NodeType> degree_vec = graph_full.get_degrees();
  // setup top_ppr_neighs
  std::cout << "START COMPUTING PPR SCORE FOR " << preproc_target.size() << " NODES" << std::endl << std::flush;
  double t1 = omp_get_wtime();
  bool use_map = n > 5000000 ? true : false;
  if (use_map) {std::cout << "-- USING MAP for PPR comp" << std::endl;} 
  else {std::cout << "== USING VEC for PPR comp" << std::endl;}
  #pragma omp parallel for
  for (int64_t i_para = 0; i_para < preproc_target.size(); i_para++) {    // some omp version will complain on unsigned counter
    auto target = preproc_target[i_para];
    std::unordered_map<NodeType, PPRType> touched_neigh_map;
    std::map<NodeType, PPRType> pi_eps_m;
    std::map<NodeType, PPRType> residue_m;
    std::vector<PPRType> pi_eps_v;
    std::vector<PPRType> residue_v;
    if (use_map) {
      pi_eps_m[target] = 0.;
      residue_m[target] = 1.;
    } else {
      pi_eps_v.resize(n, 0.);
      residue_v.resize(n, 0.);
      residue_v[target] = 1.;
    }
    std::set<NodeType> prop_set {target};
    while (prop_set.size() > 0) {
      auto v_prop = *(prop_set.begin());
      PPRType res_target_orig;
      if (use_map) {
        res_target_orig = residue_m[v_prop];
        if (pi_eps_m.find(v_prop) != pi_eps_m.end()) {
          pi_eps_m[v_prop] += alpha * res_target_orig;
        } else {
          pi_eps_m[v_prop] = alpha * res_target_orig;
        }
      } else {
        res_target_orig = residue_v[v_prop];
        pi_eps_v[v_prop] += alpha * res_target_orig;
      }
      auto m = (1 - alpha) * res_target_orig / (2 * degree_vec[v_prop]);
      for (NodeType i = graph_full.indptr[v_prop]; i < graph_full.indptr[v_prop + 1]; i ++) {
        auto u = graph_full.indices[i];
        if (use_map) {
          if (residue_m.find(u) != residue_m.end()) {
            residue_m[u] += m;
          } else {
            residue_m[u] = m;
          }
          if (residue_m[u] > epsilon * degree_vec[u]) {
            prop_set.insert(u);
          }
        } else {
          residue_v[u] += m;
          if (residue_v[u] > epsilon * degree_vec[u]) {
            prop_set.insert(u);
          }
        }
      }
      if (use_map) {
        residue_m[v_prop] = res_target_orig * (1 - alpha) / 2;
        if (residue_m[v_prop] <= epsilon * degree_vec[v_prop]) {
          prop_set.erase(v_prop);
          touched_neigh_map[v_prop] = pi_eps_m[v_prop];
        }
      } else {
        residue_v[v_prop] = res_target_orig * (1 - alpha) / 2;
        if (residue_v[v_prop] <= epsilon * degree_vec[v_prop]) {
          prop_set.erase(v_prop);
          touched_neigh_map[v_prop] = pi_eps_v[v_prop];
        }
      }
    }
    // co-sorting indices.
    NodeType _k = std::min(static_cast<NodeType>(k), static_cast<NodeType>(touched_neigh_map.size()));
    std::vector<std::pair<PPRType, NodeType>> pi_idx;
    pi_idx.reserve(touched_neigh_map.size());
    for (auto ni : touched_neigh_map) {
      pi_idx.push_back(std::make_pair(-ni.second, ni.first));
    }
    std::nth_element(pi_idx.begin(), pi_idx.begin() + _k, pi_idx.end());
    std::sort(pi_idx.begin(), pi_idx.begin() + _k);    // We need this to allow reuse of the vecs from other runs with smaller k
    // extract just the indices
    std::vector<NodeType> top_idx;
    std::vector<PPRType> top_score;
    for (NodeType i = 0; i < _k; i++) {
      top_idx.push_back(pi_idx[i].second);
      top_score.push_back(-pi_idx[i].first);
      if (i > 1 && -pi_idx[1].first == 0) {
        assert(-pi_idx[i].first == 0);
      }
    }
    top_ppr_neighs[target] = top_idx;
    top_ppr_scores[target] = top_score;
  }
  double t2 = omp_get_wtime();
  std::cout << "TIME FOR PPR: " << t2 - t1 << std::endl;
  write_PPR_to_binary_file(fname_neighs, fname_scores, k, alpha, epsilon);
}