in python/dglke/eval.py [0:0]
def main():
args = ArgParser().parse_args()
args.eval_filter = not args.no_eval_filter
if args.neg_deg_sample_eval:
assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges."
assert os.path.exists(args.model_path), 'No existing model_path: {}'.format(args.model_path)
# load dataset and samplers
dataset = get_dataset(args.data_path,
args.dataset,
args.format,
args.delimiter,
args.data_files)
args.train = False
args.valid = False
args.test = True
args.strict_rel_part = False
args.soft_rel_part = False
args.async_update = False
args.has_edge_importance = False
if len(args.gpu) > 1:
args.mix_cpu_gpu = True
if args.num_proc < len(args.gpu):
args.num_proc = len(args.gpu)
# We need to ensure that the number of processes should match the number of GPUs.
if len(args.gpu) > 1 and args.num_proc > 1:
assert args.num_proc % len(args.gpu) == 0, \
'The number of processes needs to be divisible by the number of GPUs'
# Here we want to use the regualr negative sampler because we need to ensure that
# all positive edges are excluded.
g = ConstructGraph(dataset, args)
eval_dataset = EvalDataset(g, dataset, args)
if args.neg_sample_size_eval < 0:
args.neg_sample_size_eval = args.neg_sample_size = eval_dataset.g.number_of_nodes()
args.batch_size_eval = get_compatible_batch_size(args.batch_size_eval, args.neg_sample_size_eval)
args.num_workers = 8 # fix num_workers to 8
if args.num_proc > 1:
test_sampler_tails = []
test_sampler_heads = []
for i in range(args.num_proc):
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_eval,
args.neg_sample_size_eval,
args.eval_filter,
mode='chunk-head',
num_workers=args.num_workers,
rank=i, ranks=args.num_proc)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_eval,
args.neg_sample_size_eval,
args.eval_filter,
mode='chunk-tail',
num_workers=args.num_workers,
rank=i, ranks=args.num_proc)
test_sampler_heads.append(test_sampler_head)
test_sampler_tails.append(test_sampler_tail)
else:
test_sampler_head = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_eval,
args.neg_sample_size_eval,
args.eval_filter,
mode='chunk-head',
num_workers=args.num_workers,
rank=0, ranks=1)
test_sampler_tail = eval_dataset.create_sampler('test', args.batch_size_eval,
args.neg_sample_size_eval,
args.neg_sample_size_eval,
args.eval_filter,
mode='chunk-tail',
num_workers=args.num_workers,
rank=0, ranks=1)
# load model
n_entities = dataset.n_entities
n_relations = dataset.n_relations
ckpt_path = args.model_path
model = load_model_from_checkpoint(args, n_entities, n_relations, ckpt_path)
if args.num_proc > 1:
model.share_memory()
# test
args.step = 0
args.max_step = 0
start = time.time()
if args.num_proc > 1:
queue = mp.Queue(args.num_proc)
procs = []
for i in range(args.num_proc):
proc = mp.Process(target=test_mp, args=(args,
model,
[test_sampler_heads[i], test_sampler_tails[i]],
i,
'Test',
queue))
procs.append(proc)
proc.start()
total_metrics = {}
metrics = {}
logs = []
for i in range(args.num_proc):
log = queue.get()
logs = logs + log
for metric in logs[0].keys():
metrics[metric] = sum([log[metric] for log in logs]) / len(logs)
print("-------------- Test result --------------")
for k, v in metrics.items():
print('Test average {}: {}'.format(k, v))
print("-----------------------------------------")
for proc in procs:
proc.join()
else:
test(args, model, [test_sampler_head, test_sampler_tail])
print('Test takes {:.3f} seconds'.format(time.time() - start))