in templates/aws-cloudfront-waf/source/log_parser/build_athena_queries.py [0:0]
def build_athena_query_part_two_for_partition(
log, start_timestamp, end_timestamp):
"""
This function dynamically builds the second part
of the athena query, where partition values are added.
The query will only scan the logs in the partitions
that are between start_timestamp and end_timestamp.
Args:
log: logging object
start_timestamp: datetime. The start time stamp of the logs being scanned
end_timestamp: datetime. The end time stamp of the logs being scanned
Returns:
Athena query string
"""
start_year = start_timestamp.year
start_month = start_timestamp.month
start_day = start_timestamp.day
start_hour = start_timestamp.hour
end_year = end_timestamp.year
end_month = end_timestamp.month
end_day = end_timestamp.day
end_hour = end_timestamp.hour
# same day query filter!
if (start_timestamp.date() == end_timestamp.date()):
log.debug(
"[build_athena_query_part_two_for_partition] \
Same day query filter")
query_string = "\n\t\tWHERE year = " + str(start_year) + "\n" \
"\t\tAND month = " + str(start_month).zfill(2) + "\n" \
"\t\tAND day = " + str(start_day).zfill(2) + "\n" \
"\t\tAND hour between " \
+ str(start_hour).zfill(2) + " and " + str(end_hour).zfill(2)
# different days - cross days query filter!
elif (start_year == end_year):
log.debug(
"[build_athena_query_part_two_for_partition] \
Different days - cross days query filter")
if (start_month == end_month): # year and month are the same, but days are different
query_string = "\n\t\tWHERE year = " + str(start_year) + "\n" \
"\t\tAND month = " + str(start_month).zfill(2) + "\n" \
"\t\tAND (\n" \
"\t\t\t(day = " + str(start_day).zfill(2) + " AND hour >= " + str(start_hour).zfill(2) + ")\n" \
"\t\t\tOR (day = " + str(end_day).zfill(2) + " AND hour <= " + str(end_hour).zfill(2) + ")\n" \
"\t\t)\n"
else: # years are the same, but months and days are different
query_string = "\n\t\tWHERE year = " + str(start_year) + "\n" \
"\t\tAND (\n" \
"\t\t\t(month = " + str(start_month).zfill(2) + " AND day = " + str(start_day).zfill(2) + " AND hour >= " + str(start_hour).zfill(2) + ")\n" \
"\t\t\tOR (month = " + str(end_month).zfill(2) + " AND day = " + str(end_day).zfill(2) + " AND hour <= " + str(end_hour).zfill(2) + ")\n" \
"\t\t)\n"
else: # years are different
log.debug(
"[build_athena_query_part_two_for_partition] \
Different years - cross years query filter")
query_string = "\n\t\tWHERE (year = " + str(start_year) + "\n" \
"\t\t\tAND month = " + str(start_month).zfill(2) + "\n" \
"\t\t\tAND day = " + str(start_day).zfill(2) + "\n" \
"\t\t\tAND hour >= " + str(start_hour).zfill(2) + ")\n" \
"\t\tOR (year = " + str(end_year) + "\n" \
"\t\t\tAND month = " + str(end_month).zfill(2) + "\n" \
"\t\t\tAND day = " + str(end_day).zfill(2) + "\n" \
"\t\t\tAND hour <= " + str(end_hour).zfill(2) + ")\n" \
log.debug(
"[build_athena_query_part_two_for_partition] \
Query string part Two:\n %s"%query_string)
return query_string