def lambda_handler()

in micro-etl-app/app/app.py [0:0]


def lambda_handler(event, context):
    logger.info('## EVENT')
    logger.info(event)
    
    columns = ['Date', 'Region_Name', 'Area_Code', 'Detached_Average_Price',
       'Detached_Index', 'Detached_Monthly_Change', 'Detached_Annual_Change',
       'Semi_Detached_Average_Price', 'Semi_Detached_Index',
       'Semi_Detached_Monthly_Change', 'Semi_Detached_Annual_Change',
       'Terraced_Average_Price', 'Terraced_Index', 'Terraced_Monthly_Change',
       'Terraced_Annual_Change', 'Flat_Average_Price', 'Flat_Index',
       'Flat_Monthly_Change', 'Flat_Annual_Change']
    
    # Request to get the last 2000000 bytes to get the most recent data in the CSV skipping the first row
    # Implies that the value are in ascending order with most recent at the end of the file
    res = requests.get(URL, headers=range_header(-2000000), allow_redirects=True)
    df = pd.read_csv(io.StringIO(res.content.decode('utf-8')), engine='python', error_bad_lines=False, names=columns, skiprows=1)
    logger.info('## NUMBER OF ELEMENTS')
    logger.info(df.size)
    
    # Extract only values in a specified time range
    start_date = '2018-01-01'
    end_date = '2018-12-31'
    date_range = (df['Date'] >= start_date) & (df['Date'] <= end_date)
    df = df[date_range]
    logger.info('## NUMBER OF ELEMENTS IN THE RANGE')
    logger.info(df.size)

    # Save files into S3
    url = 's3://{}/{}'.format(S3_BUCKET, FILENAME)
    df.to_csv(url)
    logger.info('## FILE PATH')
    logger.info(url)