in core/src/main/python/synapse/ml/cyber/dataset.py [0:0]
def edges_between(
self,
users: List[str],
resources: List[str],
ratio: float,
full_node_coverage: bool,
not_set: Optional[Set[Tuple[str, str]]] = None) -> List[Tuple[str, str, float]]:
import itertools
if len(users) == 0 or len(resources) == 0:
return []
required_edge_cnt = len(users) * len(resources) * ratio
tups = []
seen = set([])
seen_users = set([])
seen_resources = set([])
# optimization for creating dense access patterns (fill all the possible pairs in advance)
cart = list(itertools.product(range(len(users)), range(len(resources)))) if ratio >= 0.5 else None
while len(tups) < required_edge_cnt \
or (full_node_coverage and (len(seen_users) < len(users)) or (len(seen_resources) < len(resources))):
if cart is not None:
assert len(cart) > 0, cart
ii = self.rand.randint(0, len(cart) - 1)
ui, ri = cart[ii]
cart[ii] = cart[-1]
cart.pop()
else:
assert len(users) > 0, users
assert len(resources) > 0, resources
ui = self.rand.randint(0, len(users) - 1)
ri = self.rand.randint(0, len(resources) - 1)
user = users[ui]
res = resources[ri]
if ((ui, ri) in seen) or ((not_set is not None) and ((user, res) in not_set)):
continue
seen.add((ui, ri))
seen_users.add(ui)
seen_resources.add(ri)
assert users[ui] is not None
assert resources[ri] is not None
score = self.rand.randint(500, 1000)
tups.append((user, res, score))
return tups