def estimate_xtool()

in de/estimate.py [0:0]


def estimate_xtool(paths):
    with tempfile.NamedTemporaryFile(suffix=".json") as tmp:
        env = os.environ.copy()
        env["DEFAULT_MIN_N_CHUNKS_PER_RANGE"] = "1"
        cmd = [
            "xtool",
            "--repo-type",
            "dataset",
            "--repo-id",
            "kszucs/pq",
            "--token",
            os.environ["XTOOL_TOKEN"],
            "dedup",
            "-s",
            "-o",
            tmp.name,
            *map(str, paths),
        ]
        result = subprocess.run(
            cmd, check=True, capture_output=True, text=True, env=env
        )

    # stderr looks like:
    # 'Dedupping 26 files...\nUsing lz4 compression\n\n\nClean results:\nTransmitted 3180990288 bytes in total.\n'
    transmitted = int(result.stderr.splitlines()[-1].split()[1])
    return {"transmitted_xtool_bytes": transmitted}