def handler()

in detection-ml-wksp/aws_lambda/cloudtrail_ingest.py [0:0]


def handler(event, context):
    # Load environment variables for input and output locations
    logs_input_bucket = os.environ['INPUT_BUCKET']
    logs_input_prefix = os.environ['INPUT_PREFIX']
    tuples_output_bucket = os.environ['OUTPUT_BUCKET']
    tuples_output_key = os.environ['OUTPUT_KEY']

    # Create a Boto3 session and S3 client
    session = boto3.session.Session()
    s3_client = session.client('s3')
    
    # Get a list of the CloudTrail log files for the workshop
    log_files = [(bucket, key) for bucket, key in get_workshop_log_files(
        s3_client, logs_input_bucket, logs_input_prefix
    )]
    
    tuples = []
    
    for bucket, key in log_files:
        # Load the records from each CloudTrail log file
        records = load_cloudtrail_log(s3_client, bucket, key)
        
        # Process the CloudTrail records
        for record in records:
            if record['sourceIPAddress'].endswith('.amazonaws.com'):
                continue  # Ignore calls coming from AWS service principals
            print_short_record(record)
            
            # TODO - Uncomment next lines to get tuples for each finding
            # principal, ip = get_tuple(record)
            # tuples.append('{},{}'.format(principal, ip))
    
    # Write the tuples to S3 where they can be read by the Sagemaker algorithm
    if len(tuples) > 0:
        logger.info('Writing tuples to s3://%s/%s', 
            tuples_output_bucket, tuples_output_key)
    
        s3_client.put_object(
            Bucket=tuples_output_bucket,
            Key=tuples_output_key,
            ContentType='text/csv',
            Body='\n'.join(tuples),
        )