def parse_n_prepare()

in shaDow/utils.py [0:0]


def parse_n_prepare(task, args, name_graph, dir_log, os_='linux'):
    # [config]
    if args.configs is not None:
        config_train = args.configs
    else:
        assert task in ['inference', 'postproc']
        if task == 'inference':
            if args.inference_configs is None:
                assert not args.compute_complexity_only
                dir_candy = args.inference_dir
            else:
                assert args.inference_dir is None and args.compute_complexity_only
                dir_candy = None
                config_train = args.inference_configs
        else: 
            if args.postproc_dir is not None:
                dir_candy = args.postproc_dir
            else:
                with open(args.postproc_configs) as f:
                    config_temp = yaml.load(f, Loader=yaml.FullLoader)
                if 'dir_pred_mat' in config_temp:   # all such dirs MUST contain the same yaml
                    dir_candy = config_temp['dir_pred_mat'][0]  
                elif 'dir_emb_mat' in config_temp:  # all ens models should have the same arch (only differs in sampler)
                    dir_candy = next(iter(config_temp['dir_emb_mat'].values()))[0]
                else:
                    raise NotImplementedError
        if dir_candy is not None:
            assert os.path.isdir(dir_candy)
            f_yml = [f for f in os.listdir(dir_candy) if f.split('.')[-1] in ['yml', 'yaml']]
            assert len(f_yml) == 1
            config_train = f"{dir_candy}/{f_yml[0]}"
    with open(config_train) as f_config_train:
        config_train = yaml.load(f_config_train, Loader=yaml.FullLoader)
    config_train_copy = deepcopy(config_train)
    # [data]
    config_data = {
        "to_undirected"  : False,
        "transductive"   : False,
        "norm_feat"      : True,
        "valedges_as_input": False
    }
    config_data.update(config_train['data'])
    # [arch]
    arch_gnn = {        # default values
        "dim"               : -1,
        "aggr"              : "sage",
        "residue"           : "none",
        "pooling"           : "center",
        "loss"              : "softmax",
        "num_layers"        : -1,
        "num_cls_layers"    : 1,            # 1 MLP layer for classifier on the node representation
        "act"               : "I",
        "layer_norm"        : 'norm_feat',
        "heads"             : -1,
        "feature_augment"   : "hops",
        "feature_augment_ops": 'sum',
        "feature_smoothen"  : "none",
        "label_smoothen"    : "none",        # label_smoothen is only considered if use_label != none
        "ensemble_act"      : "leakyrelu",
        "branch_sharing"    : False,
        "use_label"         : "none"
    }
    arch_gnn.update(config_train["architecture"])
    for k, v in arch_gnn.items():
        if type(v) == str:
            arch_gnn[k] = v.lower()
    assert arch_gnn['aggr'] in ['sage', 'gat', 'gatscat', 'gcn', 'mlp', 'gin', 'sgc', 'sign']
    assert arch_gnn['use_label'] in ['all', 'none', 'no_valid']
    assert arch_gnn['pooling'].split('-')[0] in ['mean', 'max', 'sum', 'center', 'sort']
    assert arch_gnn['residue'] in ['sum', 'concat', 'max', 'none']
    assert arch_gnn['feature_augment'] in ['hops', 'pprs', 'none', 'hops-pprs', 'drnls']
    assert arch_gnn['feature_augment_ops'] in ['concat', 'sum']
    assert arch_gnn['layer_norm'] in ['norm_feat', 'pairnorm']
    if arch_gnn["feature_augment"] and arch_gnn["feature_augment"].lower() != "none":
        arch_gnn["feature_augment"] = set(k for k in arch_gnn["feature_augment"].split("-"))
    else:
        arch_gnn['feature_augment'] = set()
    # [params]
    params_train = {
        "lr"                : 0.01,
        "dropedge"          : 0.0,
        "ensemble_dropout"  : "none",
        "term_window_size"  : 1,
        "term_window_aggr"  : 'center',
        "percent_per_epoch" : {'train': 1., 'valid': 1., 'test': 1.}
    }
    params_train.update(config_train["hyperparameter"])
    params_train["lr"] = float(params_train["lr"])
    for m in ['train', 'valid', 'test']:
        if m not in params_train['percent_per_epoch']:
            params_train['percent_per_epoch'][v] = 1.
    for m in ['train', 'valid', 'test']:
        assert 0 <= params_train['percent_per_epoch'][m] <= 1.
    # [sampler]
    sampler_preproc, sampler_train = [], []
    for s in config_train['sampler']:
        phase = s.pop('phase')
        if phase == 'preprocess':
            sampler_preproc.append(s)
        elif phase == 'train':
            sampler_train.append(s)
        else:
            raise NotImplementedError
    batch_size = config_train["hyperparameter"]["batch_size"]
    config_sampler_preproc = {"batch_size": batch_size, "configs": sampler_preproc}
    config_sampler_train = {"batch_size": batch_size, "configs": sampler_train}
    # add self-edges for certain arch. e.g., for GAT, will be divide-by-0 error in grad without self-edges
    if arch_gnn["aggr"] in ["gcn", "gat", "gatscat"]:
        for sc in config_sampler_train["configs"]:
            num_ens = [len(v) for k, v in sc.items() if k != 'method']
            assert max(num_ens) == min(num_ens)
            sc["add_self_edge"] = [True] * num_ens[0]
    # [copy yml]
    name_key = f"{arch_gnn['aggr']}_{arch_gnn['num_layers']}"
    if arch_gnn['num_cls_layers'] > 1:
        name_key += f"_{arch_gnn['num_cls_layers']}"
    dir_log_full = log_dir(task, config_train_copy, name_key, dir_log, name_graph, git_rev, timestamp)
    return params_train, config_sampler_preproc, config_sampler_train, config_data, arch_gnn, dir_log_full