in de/estimate.py [0:0]
def estimate_xtool(paths):
with tempfile.NamedTemporaryFile(suffix=".json") as tmp:
env = os.environ.copy()
env["DEFAULT_MIN_N_CHUNKS_PER_RANGE"] = "1"
cmd = [
"xtool",
"--repo-type",
"dataset",
"--repo-id",
"kszucs/pq",
"--token",
os.environ["XTOOL_TOKEN"],
"dedup",
"-s",
"-o",
tmp.name,
*map(str, paths),
]
result = subprocess.run(
cmd, check=True, capture_output=True, text=True, env=env
)
# stderr looks like:
# 'Dedupping 26 files...\nUsing lz4 compression\n\n\nClean results:\nTransmitted 3180990288 bytes in total.\n'
transmitted = int(result.stderr.splitlines()[-1].split()[1])
return {"transmitted_xtool_bytes": transmitted}