def build_athena_query_part_two_for_partition()

in templates/aws-cloudfront-waf/source/log_parser/build_athena_queries.py [0:0]


def build_athena_query_part_two_for_partition(
        log, start_timestamp, end_timestamp):
    """
    This function dynamically builds the second part
    of the athena query, where partition values are added.
    The query will only scan the logs in the partitions
    that are between start_timestamp and end_timestamp.

    Args:
        log: logging object
        start_timestamp: datetime. The start time stamp of the logs being scanned
        end_timestamp: datetime. The end time stamp of the logs being scanned

    Returns:
        Athena query string
    """
    start_year = start_timestamp.year
    start_month = start_timestamp.month
    start_day = start_timestamp.day
    start_hour = start_timestamp.hour
    end_year = end_timestamp.year
    end_month = end_timestamp.month
    end_day = end_timestamp.day
    end_hour = end_timestamp.hour

    # same day query filter!
    if (start_timestamp.date() == end_timestamp.date()):
        log.debug(
            "[build_athena_query_part_two_for_partition] \
            Same day query filter")
        query_string = "\n\t\tWHERE year = " + str(start_year) + "\n"  \
                       "\t\tAND month = " + str(start_month).zfill(2) + "\n"  \
                       "\t\tAND day = " + str(start_day).zfill(2) + "\n"  \
                       "\t\tAND hour between "  \
                       + str(start_hour).zfill(2) + " and " + str(end_hour).zfill(2)
    # different days - cross days query filter!
    elif (start_year == end_year):
        log.debug(
            "[build_athena_query_part_two_for_partition] \
             Different days - cross days query filter")
        if (start_month == end_month):  # year and month are the same, but days are different
            query_string = "\n\t\tWHERE year = " + str(start_year) + "\n"  \
                        "\t\tAND month = " + str(start_month).zfill(2) + "\n"  \
                        "\t\tAND (\n"  \
                        "\t\t\t(day = " + str(start_day).zfill(2) + " AND hour >= " + str(start_hour).zfill(2) + ")\n"  \
                        "\t\t\tOR (day = " + str(end_day).zfill(2) + " AND hour <= " + str(end_hour).zfill(2) + ")\n"  \
                        "\t\t)\n"
        else:  # years are the same, but months and days are different
            query_string = "\n\t\tWHERE year = " + str(start_year) + "\n"  \
                        "\t\tAND (\n"  \
                        "\t\t\t(month = " + str(start_month).zfill(2) + " AND day = " + str(start_day).zfill(2) + " AND hour >= " + str(start_hour).zfill(2) + ")\n"  \
                        "\t\t\tOR (month = " + str(end_month).zfill(2) + " AND day = " + str(end_day).zfill(2) + " AND hour <= " + str(end_hour).zfill(2) + ")\n"  \
                        "\t\t)\n"
    else:  # years are different
        log.debug(
            "[build_athena_query_part_two_for_partition] \
             Different years - cross years query filter")
        query_string = "\n\t\tWHERE (year = " + str(start_year) + "\n"  \
                    "\t\t\tAND month = " + str(start_month).zfill(2) + "\n"  \
                    "\t\t\tAND day = " + str(start_day).zfill(2) + "\n"  \
                    "\t\t\tAND hour >= " + str(start_hour).zfill(2) + ")\n"  \
                    "\t\tOR (year = " + str(end_year) + "\n"  \
                    "\t\t\tAND month = " + str(end_month).zfill(2) + "\n"  \
                    "\t\t\tAND day = " + str(end_day).zfill(2) + "\n"  \
                    "\t\t\tAND hour <= " + str(end_hour).zfill(2) + ")\n"  \

    log.debug(
        "[build_athena_query_part_two_for_partition]  \
         Query string part Two:\n %s"%query_string)
    return query_string