in logging/import-logs/main.py [0:0]
def list_log_files(first_day: date, last_day: date, client: storage.Client) -> List:
"""Load paths to all log files stored in Cloud Storage in between first and last days.
For log organization hierarchy see
https://cloud.google.com/logging/docs/export/storage#gcs-organization.
"""
paths = []
# collect paths for special case when first and last days are in the same month
if first_day.year == last_day.year and first_day.month == last_day.month:
blobs = client.list_blobs(
BUCKET_NAME, prefix=_prefix(first_day), delimiter=None
)
paths = [
b.name
for b in blobs
if _day(b.name) >= first_day.day and _day(b.name) <= last_day.day
]
return paths
# collect all log file paths in first month and filter those for early days
blobs = client.list_blobs(BUCKET_NAME, prefix=_prefix(first_day), delimiter=None)
paths.extend([b.name for b in blobs if _day(b.name) >= first_day.day])
# process all paths in last months
blobs = client.list_blobs(BUCKET_NAME, prefix=_prefix(last_day))
paths.extend([b.name for b in blobs if _day(b.name) <= last_day.day])
# process all paths in between
for year in range(first_day.year, last_day.year + 1):
for month in range(
first_day.month + 1 if year == first_day.year else 1,
last_day.month if year == last_day.year else 13,
):
blobs = client.list_blobs(
BUCKET_NAME, prefix=_prefix(date(year=year, month=month, day=1))
)
paths.extend([b.name for b in blobs])
return paths