in probe_scraper/scrapers/moz_central_scraper.py [0:0]
def download_files(channel, node, temp_dir, error_cache, version, tree=None):
if tree is None:
uri = CHANNELS[channel]["base_uri"]
else:
# mozilla-release and mozilla-beta need to be prefixed with "release/"
# sometimes they aren't from buildhub, add them if they are missing
if not tree.startswith("releases/") and tree != "mozilla-central":
tree = f"releases/{tree}"
uri = f"{BASE_URI}/{tree}"
base_uri = f"{uri}/raw-file/{node}/"
node_path = os.path.join(temp_dir, "hg", node)
results = {}
def add_result(ptype, disk_path):
if ptype not in results:
results[ptype] = []
results[ptype].append(disk_path)
all_files = [(k, x) for k, l in list(REGISTRY_FILES.items()) for x in l]
for ptype, rel_path in all_files:
disk_path = os.path.join(node_path, rel_path)
if os.path.exists(disk_path):
add_result(ptype, disk_path)
continue
uri = base_uri + rel_path
# requests_cache doesn't cache on error status codes.
# We just use our own cache for these for now.
if uri in error_cache:
continue
if not relative_path_is_in_version(rel_path, int(version)):
continue
req = requests.get(uri, headers=HTTP_HEADERS)
if req.status_code != requests.codes.ok:
if os.path.basename(rel_path) == "Histograms.json":
raise Exception(
"Request returned status " + str(req.status_code) + " for " + uri
)
else:
error_cache[uri] = req.status_code
continue
dir = os.path.split(disk_path)[0]
if not os.path.exists(dir):
os.makedirs(dir)
with open(disk_path, "wb") as f:
for chunk in req.iter_content(chunk_size=128):
f.write(chunk)
add_result(ptype, disk_path)
return results