in detection-ml-wksp/aws_lambda/cloudtrail_ingest.py [0:0]
def handler(event, context):
# Load environment variables for input and output locations
logs_input_bucket = os.environ['INPUT_BUCKET']
logs_input_prefix = os.environ['INPUT_PREFIX']
tuples_output_bucket = os.environ['OUTPUT_BUCKET']
tuples_output_key = os.environ['OUTPUT_KEY']
# Create a Boto3 session and S3 client
session = boto3.session.Session()
s3_client = session.client('s3')
# Get a list of the CloudTrail log files for the workshop
log_files = [(bucket, key) for bucket, key in get_workshop_log_files(
s3_client, logs_input_bucket, logs_input_prefix
)]
tuples = []
for bucket, key in log_files:
# Load the records from each CloudTrail log file
records = load_cloudtrail_log(s3_client, bucket, key)
# Process the CloudTrail records
for record in records:
if record['sourceIPAddress'].endswith('.amazonaws.com'):
continue # Ignore calls coming from AWS service principals
print_short_record(record)
# TODO - Uncomment next lines to get tuples for each finding
# principal, ip = get_tuple(record)
# tuples.append('{},{}'.format(principal, ip))
# Write the tuples to S3 where they can be read by the Sagemaker algorithm
if len(tuples) > 0:
logger.info('Writing tuples to s3://%s/%s',
tuples_output_bucket, tuples_output_key)
s3_client.put_object(
Bucket=tuples_output_bucket,
Key=tuples_output_key,
ContentType='text/csv',
Body='\n'.join(tuples),
)