def prepare_dataset_ogb_wikikg2()

in src/preprocess_datasets.py [0:0]


def prepare_dataset_ogb_wikikg2(name):
    """ogbl-wikikg2 is a OGB link property prediction dataset. 
    Note that the evaluation protocol is different from conventional KBC datasets.

    training input: (h,r,t)
    valid/test input: (h, r, t, h_neg, t_neg), including 500 negatives respectively for h and t.
    """
    dataset = LinkPropPredDataset(name)
    split_edge = dataset.get_edge_split()
    train_triples, valid_triples, test_triples = split_edge["train"], split_edge["valid"], split_edge["test"]

    nrelation = int(max(train_triples['relation']))+1
    nentity = int(max(np.concatenate((train_triples['head'], 
                                      train_triples['tail']))))+1
    print(nentity, nrelation)

    train_array = np.concatenate((train_triples['head'].reshape(-1, 1),
                                  train_triples['relation'].reshape(-1, 1),
                                  train_triples['tail'].reshape(-1, 1),
                                  ), axis=1)

    valid_array = np.concatenate((valid_triples['head'].reshape(-1, 1),
                                  valid_triples['relation'].reshape(-1, 1),
                                  valid_triples['tail'].reshape(-1, 1),
                                  valid_triples['head_neg'],
                                  valid_triples['tail_neg'],
                                  ), axis=1)

    test_array = np.concatenate((test_triples['head'].reshape(-1, 1),
                                  test_triples['relation'].reshape(-1, 1),
                                  test_triples['tail'].reshape(-1, 1),
                                  test_triples['head_neg'],
                                  test_triples['tail_neg'],
                                  ), axis=1)
    print('Saving arrays ...')
    p = Path(DATA_PATH / name)
    p.mkdir(parents=True, exist_ok=True) 
    # using npy since it is too big for pickling
    with open(Path(DATA_PATH) / name / ('train' + '.npy'), 'wb') as out:
        np.save(out, train_array.astype('uint64')) 
    with open(Path(DATA_PATH) / name / ('valid' + '.npy'), 'wb') as out:
        np.save(out, valid_array.astype('uint64'))
    with open(Path(DATA_PATH) / name / ('test' + '.npy'), 'wb') as out:
        np.save(out, test_array.astype('uint64'))
    print('Saving meta_info ...')
    meta_info = {'n_provided_neg_head': valid_triples['head_neg'].shape[1],
                 'n_provided_neg_tail': valid_triples['tail_neg'].shape[1],
                 }
    out = open(Path(DATA_PATH) / name / ('meta_info' + '.pickle'), 'wb')
    pickle.dump(meta_info, out)
    out.close()
    print('Done processing!')