in para_graph_sampler/graph_engine/backend/ParallelSampler.cpp [237:344]
void ParallelSampler::preproc_ppr_approximate(std::vector<NodeType>& preproc_target,
int k, float alpha, float epsilon, std::string fname_neighs, std::string fname_scores) {
auto n = graph_full.num_nodes;
top_ppr_neighs.resize(n);
top_ppr_scores.resize(n);
alpha = 1 - alpha;
// check if ppr vec / neighs have been computed and stored
if (read_PPR_from_binary_file(fname_neighs, fname_scores, k, alpha, epsilon)) {
std::cout << "LOADING PPR INFO FROM EXTERNAL FILE: " << fname_neighs << " and " << fname_scores << std::endl;
return;
}
std::vector<NodeType> degree_vec = graph_full.get_degrees();
// setup top_ppr_neighs
std::cout << "START COMPUTING PPR SCORE FOR " << preproc_target.size() << " NODES" << std::endl << std::flush;
double t1 = omp_get_wtime();
bool use_map = n > 5000000 ? true : false;
if (use_map) {std::cout << "-- USING MAP for PPR comp" << std::endl;}
else {std::cout << "== USING VEC for PPR comp" << std::endl;}
#pragma omp parallel for
for (int64_t i_para = 0; i_para < preproc_target.size(); i_para++) { // some omp version will complain on unsigned counter
auto target = preproc_target[i_para];
std::unordered_map<NodeType, PPRType> touched_neigh_map;
std::map<NodeType, PPRType> pi_eps_m;
std::map<NodeType, PPRType> residue_m;
std::vector<PPRType> pi_eps_v;
std::vector<PPRType> residue_v;
if (use_map) {
pi_eps_m[target] = 0.;
residue_m[target] = 1.;
} else {
pi_eps_v.resize(n, 0.);
residue_v.resize(n, 0.);
residue_v[target] = 1.;
}
std::set<NodeType> prop_set {target};
while (prop_set.size() > 0) {
auto v_prop = *(prop_set.begin());
PPRType res_target_orig;
if (use_map) {
res_target_orig = residue_m[v_prop];
if (pi_eps_m.find(v_prop) != pi_eps_m.end()) {
pi_eps_m[v_prop] += alpha * res_target_orig;
} else {
pi_eps_m[v_prop] = alpha * res_target_orig;
}
} else {
res_target_orig = residue_v[v_prop];
pi_eps_v[v_prop] += alpha * res_target_orig;
}
auto m = (1 - alpha) * res_target_orig / (2 * degree_vec[v_prop]);
for (NodeType i = graph_full.indptr[v_prop]; i < graph_full.indptr[v_prop + 1]; i ++) {
auto u = graph_full.indices[i];
if (use_map) {
if (residue_m.find(u) != residue_m.end()) {
residue_m[u] += m;
} else {
residue_m[u] = m;
}
if (residue_m[u] > epsilon * degree_vec[u]) {
prop_set.insert(u);
}
} else {
residue_v[u] += m;
if (residue_v[u] > epsilon * degree_vec[u]) {
prop_set.insert(u);
}
}
}
if (use_map) {
residue_m[v_prop] = res_target_orig * (1 - alpha) / 2;
if (residue_m[v_prop] <= epsilon * degree_vec[v_prop]) {
prop_set.erase(v_prop);
touched_neigh_map[v_prop] = pi_eps_m[v_prop];
}
} else {
residue_v[v_prop] = res_target_orig * (1 - alpha) / 2;
if (residue_v[v_prop] <= epsilon * degree_vec[v_prop]) {
prop_set.erase(v_prop);
touched_neigh_map[v_prop] = pi_eps_v[v_prop];
}
}
}
// co-sorting indices.
NodeType _k = std::min(static_cast<NodeType>(k), static_cast<NodeType>(touched_neigh_map.size()));
std::vector<std::pair<PPRType, NodeType>> pi_idx;
pi_idx.reserve(touched_neigh_map.size());
for (auto ni : touched_neigh_map) {
pi_idx.push_back(std::make_pair(-ni.second, ni.first));
}
std::nth_element(pi_idx.begin(), pi_idx.begin() + _k, pi_idx.end());
std::sort(pi_idx.begin(), pi_idx.begin() + _k); // We need this to allow reuse of the vecs from other runs with smaller k
// extract just the indices
std::vector<NodeType> top_idx;
std::vector<PPRType> top_score;
for (NodeType i = 0; i < _k; i++) {
top_idx.push_back(pi_idx[i].second);
top_score.push_back(-pi_idx[i].first);
if (i > 1 && -pi_idx[1].first == 0) {
assert(-pi_idx[i].first == 0);
}
}
top_ppr_neighs[target] = top_idx;
top_ppr_scores[target] = top_score;
}
double t2 = omp_get_wtime();
std::cout << "TIME FOR PPR: " << t2 - t1 << std::endl;
write_PPR_to_binary_file(fname_neighs, fname_scores, k, alpha, epsilon);
}