in squid_app/squid_config_files/lambda/lambda-handler.py [0:0]
def handler(event, context):
print(json.dumps(event))
for record in event['Records']:
message = json.loads(record['Sns']['Message'])
print('Alarm state: %s' % message['NewStateValue'])
# Auto Scaling group associated to the alarm
asg_name = message['AlarmName'].split('_')[1]
print('ASG Name: %s' % asg_name)
asg = as_client.describe_auto_scaling_groups(
AutoScalingGroupNames=[asg_name]
)['AutoScalingGroups'][0]
# If the NAT instance has failed
if message['NewStateValue'] == 'ALARM':
# Set the NAT instance to Unhealthy
try:
for instance in asg['Instances']:
as_client.set_instance_health(
InstanceId=instance['InstanceId'],
HealthStatus='Unhealthy'
)
print('Set instance %s to Unhealthy' % instance['InstanceId'])
except:
pass
# Route traffic to the first health NAT instance
for healthy_alarm in cw_client.describe_alarms(
AlarmNamePrefix='squid-alarm_',
ActionPrefix=os.environ.get('TOPIC_ARN'),
StateValue='OK'
)['MetricAlarms']:
healthy_asg_name = healthy_alarm['AlarmName'].split('_')[1]
healthy_asg = as_client.describe_auto_scaling_groups(
AutoScalingGroupNames=[healthy_asg_name]
)['AutoScalingGroups'][0]
healthy_instance_id = healthy_asg['Instances'][0]['InstanceId']
print('Healthy NAT instance: %s' % healthy_instance_id)
# For each route table that currently routes traffic to the unhealthy NAT
# instance, update the default route
for route_table in ec2_client.describe_route_tables(
Filters=[{'Name': 'tag:AutoScalingGroupName', 'Values': [asg_name]}]
)['RouteTables']:
update_route(route_table['RouteTableId'], healthy_instance_id, healthy_asg_name)
break
# If the NAT instance has recovered
else:
# ID of the NAT instance launched by the Auto Scaling group
for instance in asg['Instances']:
if instance['HealthStatus'] == 'Healthy':
asg_instance_id = instance['InstanceId']
break
print('Instance launched by the ASG: %s' % asg_instance_id)
# Complete the lifecycle action if the NAT instance was just launched
lc_name = as_client.describe_lifecycle_hooks(
AutoScalingGroupName=asg_name
)['LifecycleHooks'][0]['LifecycleHookName']
try:
as_client.complete_lifecycle_action(
LifecycleHookName=lc_name,
AutoScalingGroupName=asg_name,
LifecycleActionResult='CONTINUE',
InstanceId=asg_instance_id
)
print('Lifecycle action completed')
except:
pass
# Create or update the default route for each route table that should route
# traffic to this NAT instance in a nominal situation
for route_table_id in as_client.describe_tags(
Filters=[{'Name': 'auto-scaling-group', 'Values': [asg_name]},
{'Name': 'key', 'Values': ['RouteTableIds']}]
)['Tags'][0]['Value'].split(','):
update_route(route_table_id, asg_instance_id, asg_name)