in shaDow/utils.py [0:0]
def parse_n_prepare(task, args, name_graph, dir_log, os_='linux'):
# [config]
if args.configs is not None:
config_train = args.configs
else:
assert task in ['inference', 'postproc']
if task == 'inference':
if args.inference_configs is None:
assert not args.compute_complexity_only
dir_candy = args.inference_dir
else:
assert args.inference_dir is None and args.compute_complexity_only
dir_candy = None
config_train = args.inference_configs
else:
if args.postproc_dir is not None:
dir_candy = args.postproc_dir
else:
with open(args.postproc_configs) as f:
config_temp = yaml.load(f, Loader=yaml.FullLoader)
if 'dir_pred_mat' in config_temp: # all such dirs MUST contain the same yaml
dir_candy = config_temp['dir_pred_mat'][0]
elif 'dir_emb_mat' in config_temp: # all ens models should have the same arch (only differs in sampler)
dir_candy = next(iter(config_temp['dir_emb_mat'].values()))[0]
else:
raise NotImplementedError
if dir_candy is not None:
assert os.path.isdir(dir_candy)
f_yml = [f for f in os.listdir(dir_candy) if f.split('.')[-1] in ['yml', 'yaml']]
assert len(f_yml) == 1
config_train = f"{dir_candy}/{f_yml[0]}"
with open(config_train) as f_config_train:
config_train = yaml.load(f_config_train, Loader=yaml.FullLoader)
config_train_copy = deepcopy(config_train)
# [data]
config_data = {
"to_undirected" : False,
"transductive" : False,
"norm_feat" : True,
"valedges_as_input": False
}
config_data.update(config_train['data'])
# [arch]
arch_gnn = { # default values
"dim" : -1,
"aggr" : "sage",
"residue" : "none",
"pooling" : "center",
"loss" : "softmax",
"num_layers" : -1,
"num_cls_layers" : 1, # 1 MLP layer for classifier on the node representation
"act" : "I",
"layer_norm" : 'norm_feat',
"heads" : -1,
"feature_augment" : "hops",
"feature_augment_ops": 'sum',
"feature_smoothen" : "none",
"label_smoothen" : "none", # label_smoothen is only considered if use_label != none
"ensemble_act" : "leakyrelu",
"branch_sharing" : False,
"use_label" : "none"
}
arch_gnn.update(config_train["architecture"])
for k, v in arch_gnn.items():
if type(v) == str:
arch_gnn[k] = v.lower()
assert arch_gnn['aggr'] in ['sage', 'gat', 'gatscat', 'gcn', 'mlp', 'gin', 'sgc', 'sign']
assert arch_gnn['use_label'] in ['all', 'none', 'no_valid']
assert arch_gnn['pooling'].split('-')[0] in ['mean', 'max', 'sum', 'center', 'sort']
assert arch_gnn['residue'] in ['sum', 'concat', 'max', 'none']
assert arch_gnn['feature_augment'] in ['hops', 'pprs', 'none', 'hops-pprs', 'drnls']
assert arch_gnn['feature_augment_ops'] in ['concat', 'sum']
assert arch_gnn['layer_norm'] in ['norm_feat', 'pairnorm']
if arch_gnn["feature_augment"] and arch_gnn["feature_augment"].lower() != "none":
arch_gnn["feature_augment"] = set(k for k in arch_gnn["feature_augment"].split("-"))
else:
arch_gnn['feature_augment'] = set()
# [params]
params_train = {
"lr" : 0.01,
"dropedge" : 0.0,
"ensemble_dropout" : "none",
"term_window_size" : 1,
"term_window_aggr" : 'center',
"percent_per_epoch" : {'train': 1., 'valid': 1., 'test': 1.}
}
params_train.update(config_train["hyperparameter"])
params_train["lr"] = float(params_train["lr"])
for m in ['train', 'valid', 'test']:
if m not in params_train['percent_per_epoch']:
params_train['percent_per_epoch'][v] = 1.
for m in ['train', 'valid', 'test']:
assert 0 <= params_train['percent_per_epoch'][m] <= 1.
# [sampler]
sampler_preproc, sampler_train = [], []
for s in config_train['sampler']:
phase = s.pop('phase')
if phase == 'preprocess':
sampler_preproc.append(s)
elif phase == 'train':
sampler_train.append(s)
else:
raise NotImplementedError
batch_size = config_train["hyperparameter"]["batch_size"]
config_sampler_preproc = {"batch_size": batch_size, "configs": sampler_preproc}
config_sampler_train = {"batch_size": batch_size, "configs": sampler_train}
# add self-edges for certain arch. e.g., for GAT, will be divide-by-0 error in grad without self-edges
if arch_gnn["aggr"] in ["gcn", "gat", "gatscat"]:
for sc in config_sampler_train["configs"]:
num_ens = [len(v) for k, v in sc.items() if k != 'method']
assert max(num_ens) == min(num_ens)
sc["add_self_edge"] = [True] * num_ens[0]
# [copy yml]
name_key = f"{arch_gnn['aggr']}_{arch_gnn['num_layers']}"
if arch_gnn['num_cls_layers'] > 1:
name_key += f"_{arch_gnn['num_cls_layers']}"
dir_log_full = log_dir(task, config_train_copy, name_key, dir_log, name_graph, git_rev, timestamp)
return params_train, config_sampler_preproc, config_sampler_train, config_data, arch_gnn, dir_log_full