in python/dglke/models/ke_model.py [0:0]
def link_predict(self, head=None, rel=None, tail=None, exec_mode='all', sfunc='none', topk=10, exclude_mode=None, batch_size=DEFAULT_INFER_BATCHSIZE):
""" Predicts missing entities or relations in a triplet.
Given head_id, relation_id and tail_id, return topk most relevent triplet.
Parameters
----------
head: th.Tensor
A tensor of head entity id.
rel: th.Tensor
A tensor of relation id.
tail: th.Tensor
A tensor of tail entity id.
exec_mode: str
How to calculate scores for triplets and calculate topK:
* triplet_wise: head, relation and tail lists have the same length N,
and we calculate the similarity triplet by triplet:
``result = topK([score(h_i, r_i, t_i) for i in N])``,
the result shape will be (K,)
* all: three lists of head, relation and tail ids are provided as H, R and T,
and we calculate all possible combinations of all triplets (h_i, r_j, t_k):
``result = topK([[[score(h_i, r_j, t_k) for each h_i in H] for each r_j in R] for each t_k in T])``,
the result shape will be (K,)
* batch_head: three lists of head, relation and tail ids are provided as H, R and T
and we calculate topK for each element in head:
``result = topK([[score(h_i, r_j, t_k) for each r_j in R] for each t_k in T]) for each h_i in H``
the result shape will be (sizeof(H), K)
* batch_rel: three lists of head, relation and tail ids are provided as H, R and T,
and we calculate topK for each element in relation:
``result = topK([[score(h_i, r_j, t_k) for each h_i in H] for each t_k in T]) for each r_j in R``,
the result shape will be (sizeof(R), K)
* batch_tail: three lists of head, relation and tail ids are provided as H, R and T,
and we calculate topK for each element in tail:
``result = topK([[score(h_i, r_j, t_k) for each h_i in H] for each r_j in R]) for each t_k in T``,
the result shape will be (sizeof(T), K)
sfunc: str
What kind of score is used in ranking and will be output:
* none: $score = x$
* logsigmoid: $score = log(sigmoid(x))
topk: int
Return top k results
exclude_mode: str
Whether to exclude positive edges:
* None: Do not exclude positive edges.
* 'mask': Return topk edges and a mask indicating which one is positive edge.
* 'exclude': Exclude positive edges, the returned k edges will be missing edges in the graph.
Return
------
A list of (head_idx, rel_idx, tail_idx, score)
"""
if head is None:
head = th.arange(0, self.num_entity)
else:
head = th.tensor(head)
if rel is None:
rel = th.arange(0, self.num_rel)
else:
rel = th.tensor(rel)
if tail is None:
tail = th.arange(0, self.num_entity)
else:
tail = th.tensor(tail)
num_head = head.shape[0]
num_rel = rel.shape[0]
num_tail = tail.shape[0]
if sfunc == 'none':
sfunc = none
else:
sfunc = logsigmoid
# if exclude_mode is not None, we need a graph to do the edge filtering
assert (self._g is not None) or (exclude_mode is None), \
'If exclude_mode is not None, please use load_graph() to initialize ' \
'a graph for edge filtering.'
if exec_mode == 'triplet_wise':
assert num_head == num_rel, \
'For triplet wise exection mode, head, relation and tail lists should have same length'
assert num_head == num_tail, \
'For triplet wise exection mode, head, relation and tail lists should have same length'
with th.no_grad():
raw_score = self._infer_score_func(head, rel, tail, triplet_wise=True, batch_size=batch_size)
score = sfunc(raw_score)
idx = th.arange(0, num_head)
result = self._topk_exclude_pos(score=score,
idx=idx,
head=head,
rel=rel,
tail=tail,
topk=topk,
exec_mode=exec_mode,
exclude_mode=exclude_mode)
elif exec_mode == 'all':
result = []
with th.no_grad():
raw_score = self._infer_score_func(head, rel, tail)
raw_score = th.reshape(raw_score, (head.shape[0]*rel.shape[0]*tail.shape[0],))
score = sfunc(raw_score)
idx = th.arange(0, num_head * num_rel * num_tail)
result = self._topk_exclude_pos(score=score,
idx=idx,
head=head,
rel=rel,
tail=tail,
topk=topk,
exec_mode=exec_mode,
exclude_mode=exclude_mode)
elif exec_mode == 'batch_head':
result = []
with th.no_grad():
raw_score = self._infer_score_func(head, rel, tail)
for i in range(num_head):
score = sfunc(th.reshape(raw_score[i,:,:], (rel.shape[0]*tail.shape[0],)))
idx = th.arange(0, num_rel * num_tail)
res = self._topk_exclude_pos(score=score,
idx=idx,
head=head[i],
rel=rel,
tail=tail,
topk=topk,
exec_mode=exec_mode,
exclude_mode=exclude_mode)
result.append(res[0])
elif exec_mode == 'batch_rel':
result = []
with th.no_grad():
raw_score = self._infer_score_func(head, rel, tail)
for i in range(num_rel):
score = sfunc(th.reshape(raw_score[:,i,:], (head.shape[0]*tail.shape[0],)))
idx = th.arange(0, num_head * num_tail)
res = self._topk_exclude_pos(score=score,
idx=idx,
head=head,
rel=rel[i],
tail=tail,
topk=topk,
exec_mode=exec_mode,
exclude_mode=exclude_mode)
result.append(res[0])
elif exec_mode == 'batch_tail':
result = []
with th.no_grad():
raw_score = self._infer_score_func(head, rel, tail)
for i in range(num_tail):
score = sfunc(th.reshape(raw_score[:,:,i], (head.shape[0]*rel.shape[0],)))
idx = th.arange(0, num_head * num_rel)
res = self._topk_exclude_pos(score=score,
idx=idx,
head=head,
rel=rel,
tail=tail[i],
topk=topk,
exec_mode=exec_mode,
exclude_mode=exclude_mode)
result.append(res[0])
else:
assert False, 'unknow execution mode type {}'.format(exec_mode)
return result