in benchmarking/download_benchmarks/download_benchmarks.py [0:0]
def downloadFile(self, location, md5):
if location.startswith("http"):
dirs = location.split(":/")
replace_pattern = {
" ": "-",
"\\": "-",
":": "/",
}
path = os.path.join(
self.root_model_dir,
getFilename(location, replace_pattern=replace_pattern),
)
elif not location.startswith("//"):
return
else:
dirs = location[2:].split("/")
if len(dirs) <= 2:
return
path = self.root_model_dir + location[1:]
if os.path.isfile(path):
if md5:
getLogger().info("Calculate md5 of {}".format(path))
file_hash = None
with open(path, "rb") as f:
file_hash = hashlib.md5()
for chunk in iter(lambda: f.read(8192), b""):
file_hash.update(chunk)
new_md5 = file_hash.hexdigest()
del file_hash
gc.collect()
if md5 == new_md5:
getLogger().info(
"File {}".format(os.path.basename(path))
+ " is cached, skip downloading"
)
return path
else:
# assume the file is the same
return path
downloader_controller = DownloadFile(
dirs=dirs, logger=self.logger, args=self.args
)
downloader_controller.download_file(location, path)
return path