def hashes()

in cc_net/mine.py [0:0]


def hashes(conf: Config) -> List[Path]:
    """Computes hashes for each shard."""

    hashes_dir = conf.output_dir / "hashes" / conf.dump
    outputs = [hashes_dir / f"{shard:04d}.bin" for shard in range(conf.num_shards)]
    missing_outputs = [(shard, o) for shard, o in enumerate(outputs) if not o.exists()]

    if not missing_outputs:
        return outputs

    hashes_dir.mkdir(parents=True, exist_ok=True)
    # With FlatHashSet we need ~2Gb of RAM / shard, but we need to account for
    # overhead due to how the dynamic allocation works.
    ex = conf.get_executor(f"hashes_{conf.dump}", mem_gb=4, timeout_hour=6, cpus=2)
    ex(_hashes_shard, repeat(conf), *_transpose(missing_outputs))

    # Wait a bit so that files appears on the disk.
    time.sleep(20)
    assert all(o.exists() for o in outputs)
    return outputs