def get_num_edge_chunks()

in torchbiggraph/train_cpu.py [0:0]


def get_num_edge_chunks(config: ConfigSchema) -> int:
    if config.num_edge_chunks is not None:
        return config.num_edge_chunks

    max_edges_per_bucket = 0
    # We should check all edge paths, all lhs partitions and all rhs partitions,
    # but the combinatorial explosion could lead to thousands of checks. Let's
    # assume that edges are uniformly distributed among buckets (this is not
    # exactly the case, as it's the entities that are uniformly distributed
    # among the partitions, and edge assignments to buckets are a function of
    # that, thus, for example, very high degree entities could skew this), and
    # use the size of bucket (0, 0) as an estimate of the average bucket size.
    # We still do it for all edge paths as there could be semantic differences
    # between them which lead to different sizes.
    for edge_path in config.edge_paths:
        edge_storage = EDGE_STORAGES.make_instance(edge_path)
        max_edges_per_bucket = max(
            max_edges_per_bucket,
            edge_storage.get_number_of_edges(UNPARTITIONED, UNPARTITIONED),
        )
    return max(1, math.ceil(max_edges_per_bucket / config.max_edges_per_chunk))