in distilvit/_datasets/coco.py [0:0]
def download_file(url, directory):
local_filename = url.split("/")[-1]
os.makedirs(directory, exist_ok=True) # Ensure the directory exists
path_to_file = os.path.join(directory, local_filename)
# Only download if the file does not exist
if not os.path.exists(path_to_file):
with requests.get(url, stream=True) as r:
r.raise_for_status()
total_size_in_bytes = int(r.headers.get("content-length", 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
with open(path_to_file, "wb") as f:
for chunk in r.iter_content(chunk_size=block_size):
progress_bar.update(len(chunk))
f.write(chunk)
progress_bar.close()
return path_to_file
else:
print(f"{local_filename} already exists. Skipping download.")
return path_to_file