in para_graph_sampler/graph_engine/frontend/samplers_ensemble.py [0:0]
def _sort_sampler_order(self, sampler_config_list, aug_feat_list):
""" Call this func for each backend.
While subgraph ensemble does not enforce an order of the samplers,
we "reorder" the samplers here so that C++ sampler knows which
pre-computed PPR file to load from.
e.g., with a PPR of k=300 and a PPR of k=200, the C++ sampler would
need to load the precomputed file of k=300.
"""
i_ppr_largest_k = None # idx of the PPR sampler with largets k (idx in the original config_list)
ppr_largest_k = None
# assert ppr and ppr_stochastic do not coexist
names_samplers = set(cfg['method'] for cfg in sampler_config_list)
assert not ('ppr' in names_samplers and 'ppr_st' in names_samplers), \
'pls check if you want both the determinstic and stochastic version of PPR?'
f_ppr_k_factor = lambda is_ppr_st: 1 + is_ppr_st # if stochastic PPR, then we sample k nodes from a pool of 2k candidates
for i, cfg in enumerate(sampler_config_list):
ppr_k_factor = f_ppr_k_factor(cfg['method'] == 'ppr_st')
if cfg["method"] in ['ppr', 'ppr_st']:
if i_ppr_largest_k is None or int(cfg["k"]) * ppr_k_factor > ppr_largest_k:
ppr_largest_k = int(cfg["k"]) * ppr_k_factor
i_ppr_largest_k = i
cfg["is_preproc"] = False
# re-order PPR
if i_ppr_largest_k is not None:
top1_ppr_sampler = sampler_config_list.pop(i_ppr_largest_k)
if top1_ppr_sampler['method'] == 'ppr_st':
top1_ppr_sampler['k_required'] = top1_ppr_sampler['k'] * f_ppr_k_factor(True)
sampler_config_list = [top1_ppr_sampler] + sampler_config_list
top1_aug_feat = aug_feat_list.pop(i_ppr_largest_k)
aug_feat_list = [top1_aug_feat] + aug_feat_list
# only preproc the first occurance of one type of sampler
sampler_set = set()
for cfg in sampler_config_list:
if cfg["method"] not in sampler_set:
sampler_set.add(cfg["method"])
cfg["is_preproc"] = True
return sampler_config_list, aug_feat_list