in analytics/download_count_wheels.py [0:0]
def parse_logs(log_directory: str) -> dict:
bytes_cache = dict()
for (dirpath, _, filenames) in os.walk(log_directory):
for filename in tqdm(filenames):
with gzip.open(os.path.join(dirpath, filename), 'r') as gf:
string = gf.read().decode("utf-8")
entries = []
entries += string.splitlines()[2:]
for entry in entries:
columns = entry.split('\t')
bytes_sent = int(columns[3])
download_uri = urllib.parse.unquote(
urllib.parse.unquote(columns[7])
)
status = columns[8]
if not all([
status.startswith("2"),
download_uri.endswith((".whl", ".zip"))
]):
continue
if not bytes_cache.get(download_uri):
bytes_cache[download_uri] = CacheEntry(download_uri)
bytes_cache[download_uri].bytes_sent += bytes_sent
return bytes_cache