def lambda_handler()

in lambda-functions/cloudfront-logs-processor-function/prep-data.py [0:0]


def lambda_handler(event, context):
    #print("Received event: " + json.dumps(event, indent=2))

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.parse.unquote_plus(str(event['Records'][0]['s3']['object']['key']))
    dist_name = key.split("/")[-1].split(".")[0]
    print("Enter into try")

    try:
        response = s3.get_object(Bucket=bucket, Key=key)
        print("Getting the response on key "+key)
        bytestream = BytesIO(response['Body'].read())
        data = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8')

        for line in data.strip().split("\n"):

            if not line.startswith("#"):
                try:
                    line = line.strip()
                    print(line)
                    columns = line.split("\t")
                    ua = parse(columns[10])
                    json_data = {}
                    json_data['logdate'] = columns[0]
                    json_data['logtime'] = columns[1]
                    json_data["location"] = columns[2]
                    json_data["browserfamily"] = ua.browser.family
                    json_data["osfamily"] = ua.os.family
                    json_data["isbot"] = ua.is_bot
                    #normalise the cache result type HIT or MISS to upper case to be in sync
                    json_data["resulttype"] = columns[13].upper()
                    json_data["requestid"] = columns[14]
                    json_data["cdn_source"] = cdn_source
                    json_data["bytes"] = columns[3]
                    json_data["requestip"] = columns[4]
                    json_data["uri"] = columns[7]
                    json_data = json.dumps(json_data)

# cloudfront columns
#           - Name: logdate - 0
#             Type: date
#           - Name: logtime - 1
#             Type: string
#           - Name: location - 2
#             Type: string
#           - Name: bytes - 3
#             Type: bigint
#           - Name: requestip - 4
#             Type: string
#           - Name: method - 5
#             Type: string
#           - Name: host - 6
#             Type: string
#           - Name: uri - 7
#             Type: string
#           - Name: status - 8
#             Type: bigint
#           - Name: referrer - 9
#             Type: string
#           - Name: useragent - 10
#             Type: string
#           - Name: uriquery - 11
#             Type: string
#           - Name: cookie - 12
#             Type: string
#           - Name: resulttype - 13
#             Type: string
#           - Name: requestid - 14
#             Type: string
#           - Name: header - 15
#             Type: string
#           - Name: csprotocol - 16
#             Type: string
#           - Name: csbytes - 17
#             Type: string
#           - Name: timetaken - 18
#             Type: bigint
#           - Name: forwardedfor - 19
#             Type: string
#           - Name: sslprotocol - 20
#             Type: string
#           - Name: sslcipher - 21
#             Type: string
#           - Name: responseresulttype - 22
#             Type: string
#           - Name: protocolversion - 23
#             Type: string
#           - Name: fle-status - 24
#             Type: string
#           - Name: fle-encrypted-fields - 25
#             Type: string
#           - Name: browserfamily - 26
#             Type: string
#           - Name: osfamily - 27
#             Type: string
#           - Name: isbot - 28
#             Type: string
#           - Name: filename - 29
#             Type: string
#           - Name: distribution - 30
#             Type: string
#           - Name: cdn_source - 31
#             Type: string

                    # line = line + '\t' + str(ua.browser.family) + '\t' + str(ua.os.family) + '\t' + str(ua.is_bot) + '\t' + key + '\t' + dist_name + '\t' + cdn_source + '\n'
                    processed = True

                    print("pushing item to firehose stream -> ")
                    print(json_data)

                    firehose.put_record(
                        DeliveryStreamName=delivery_stream,
                            Record={
                                "Data":json_data+"\n"
                            }
                    )
                except Exception as e:
                    print(e)
                    print("Exception during utf8 conversion")

    except Exception as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
        raise e