def lambda_handler()

in lambda-functions/fastly-logs-processor-function/prep-data.py [0:0]


def lambda_handler(event, context):
    #print("Received event: " + json.dumps(event, indent=2))

    # Get the object from the event and show its content type
    bucket = event['Records'][0]['s3']['bucket']['name']
    key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'].encode('utf8'))
    dist_name = key.split("/")[-1].split(".")[0]
    print("Enter into try")

    try:
        response = s3.get_object(Bucket=bucket, Key=key)
        print("Getting the response on key "+key)
        bytestream = BytesIO(response['Body'].read())
        data = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8')

        for line in data.strip().split("\n"):

            if not line.startswith("#"):
                try:
                    line = line.strip()
                    line = line.replace("<134>","",1)
                    columns = line.split("|")
                    print(len(columns))
                    ua = parse(columns[5])
                    json_data = {}
                    firstColumn = columns[0].split(" ")
                    print(firstColumn)
                    datetime = firstColumn[0].split("T")
                    json_data['logdate'] = datetime[0]
                    json_data['logtime'] = datetime[1].replace("Z","",1)
                    json_data["location"] = firstColumn[1]
                    json_data["browserfamily"] = ua.browser.family
                    json_data["osfamily"] = ua.os.family
                    json_data["isbot"] = ua.is_bot
                    #normalise the cache result type HIT or MISS to upper case to be in sync
                    json_data["resulttype"] = columns[7].upper()
                    json_data["requestid"] = columns[8]
                    json_data["cdn_source"] = cdn_source
                    # following to be included for cross CDN compatibility
                    json_data["bytes"] = columns[3]
                    json_data["requestip"] = firstColumn[3]
                    json_data["uri"] = columns[4]
                    json_data = json.dumps(json_data)

# fastly columns
#<134>2019-01-06T04:17:46Z cache-bom18223 S3LogBucket[22354]: 49.36.1.187 "-" "-" [06/Jan/2019:04:17:45 +0000] "GET /sample-videos/hls/TearsOfSteel.m3u8 HTTP/1.1" 403 243 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" 0 "MISS" "33c478dd-0491-4bfb-b303-64f546b819e7"

                    # line = line + '\t' + str(ua.browser.family) + '\t' + str(ua.os.family) + '\t' + str(ua.is_bot) + '\t' + key + '\t' + dist_name + '\t' + cdn_source + '\n'
                    processed = True

                    print("pushing item to firehose stream -> ")
                    print(json_data)

                    firehose.put_record(
                        DeliveryStreamName=delivery_stream,
                            Record={
                                "Data":json_data+"\n"
                            }
                    )
                except Exception as e:
                    print(e)
                    print("Exception during utf8 conversion")

    except Exception as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
        raise e