registration/conf/registration.yaml (345 lines of code) (raw):
# The infrastructure in this file is now ingested and built via CDK before deployment (i.e. this file is no longer
# directly uploaded to Riff-Raff). You can edit this file as normal, but you must update the CDK snapshot tests in order
# to get CI to pass.
AWSTemplateFormatVersion: '2010-09-09'
Description: Registration for mobile notifications
Mappings:
Constants:
App:
Value: registration
Stack:
Value: mobile-notifications
StageVariables:
CODE:
CPUAlarmPeriodLower: 300
CPUAlarmPeriodUpper: 1200
CPUAlarmThresholdLower: 20
CPUAlarmThresholdUpper: 50
NotificationAlarmPeriod: 1200
InstanceType: "t4g.small"
PROD:
CPUAlarmPeriodLower: 300
CPUAlarmPeriodUpper: 60
CPUAlarmThresholdLower: 20
CPUAlarmThresholdUpper: 50
NotificationAlarmPeriod: 1200
InstanceType: "t4g.small"
Outputs:
LoadBalancerUrl:
Value:
!GetAtt LoadBalancerToPrivateASG.DNSName
Parameters:
AMI:
Type: AWS::EC2::Image::Id
Description: AMI used by the instances
Stage:
Type: String
AllowedValues:
- CODE
- PROD
Description: Environment name
VPCSecurityGroup:
Type: AWS::EC2::SecurityGroup::Id
Description: The default security group of the VPC
AlarmTopic:
Type: String
Description: The ARN of the SNS topic to send all the cloudwatch alarms to
VpcId:
Type: AWS::EC2::VPC::Id
Description: The VPC
PublicSubnets:
Type: List<AWS::EC2::Subnet::Id>
Description: The public subnets of the VPC for the loadbalancer
PrivateSubnets:
Type: List<AWS::EC2::Subnet::Id>
Description: The private subnets of the VPC for the autoscaling group
CertArn:
Type: String
Description: ACM Certificate for app use
ASGMinSize:
Type: Number
Description: Minimum size of the autoscaling group
ASGMaxSize:
Type: Number
Description: Maximum size of the autoscaling group
DistBucket:
Type: String
Description: The name of the s3 bucket containing the server artifact
HostedZone:
Type: String
Description: The HostedZone, should contain the trailing dot zone.example.com.
DomainName:
Type: String
Description: The domain name of the ELB, should contain the trailing dot stuff.zone.example.com.
NotEnough200sThreshold:
Type: Number
Description: Alarm if less than this many 200s in half an hour
NotEnough200sPerDayThreshold:
Type: Number
Description: Alarm if less than too many 200s. This value was based on just below 2 standard deviations from the mean over 6 weeks of data.
RunbookCopy:
Type: String
Default: <<<Runbook|https://docs.google.com/document/d/1aJMytnPGeWH8YLpD2_66doxqyr8dPvAVonYIOG-zmOA>>>
Resources:
DistributionInstanceProfile:
Type: AWS::IAM::InstanceProfile
Properties:
Path: /
Roles:
- !Ref DistributionRole
DistributionRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Action: sts:AssumeRole
Effect: Allow
Principal:
Service: ec2.amazonaws.com
Path: /
ManagedPolicyArns: [ !Sub 'arn:aws:iam::${AWS::AccountId}:policy/ssm-scala-v1' ]
Policies:
- PolicyName: root
PolicyDocument:
Statement:
- Action: s3:GetObject
Effect: Allow
Resource: !Sub arn:aws:s3:::${DistBucket}/*
- Action: ec2:DescribeTags
Effect: Allow
Resource: '*'
- Action:
- cloudwatch:*
- logs:*
Effect: Allow
Resource: '*'
- Action:
- autoscaling:DescribeAutoScalingInstances
- autoscaling:DescribeAutoScalingGroups
Resource: '*'
Effect: Allow
- Effect: Allow
Action:
- kinesis:PutRecord
- kinesis:PutRecords
- kinesis:DescribeStream
Resource: !Sub arn:aws:kinesis:${AWS::Region}:${AWS::AccountId}:stream/mobile-log-aggregation-${Stage}
- PolicyName: conf
PolicyDocument:
Statement:
- Action: ssm:GetParametersByPath
Effect: Allow
Resource:
!Sub
- arn:aws:ssm:${AWS::Region}:${AWS::AccountId}:parameter/notifications/${Stage}/${Stack}
- Stack: !FindInMap [Constants, Stack, Value]
DnsRecord:
Type: AWS::Route53::RecordSet
Properties:
HostedZoneName: !Ref HostedZone
Name: !Ref DomainName
ResourceRecords:
- !GetAtt LoadBalancerToPrivateASG.DNSName
TTL: 60
Type: CNAME
GuardianAccessSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: SSH and management server access from Guardian network
SecurityGroupIngress:
- SourceSecurityGroupId: !Ref VPCSecurityGroup
FromPort: 22
IpProtocol: tcp
ToPort: 22
VpcId: !Ref VpcId
HighCPUAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmActions:
- !Ref ScaleUpPolicy
AlarmDescription:
!Sub
- Scale-Up if CPU is greater than ${CPUAlarmThresholdUpper} % over last ${CPUAlarmPeriodUpper} seconds
- CPUAlarmThresholdUpper: !FindInMap [ StageVariables, !Ref Stage, CPUAlarmThresholdUpper ]
CPUAlarmPeriodUpper: !FindInMap [ StageVariables, !Ref Stage, CPUAlarmThresholdUpper ]
ComparisonOperator: GreaterThanOrEqualToThreshold
Dimensions:
- Name: AutoScalingGroupName
Value: !Ref PrivateRegistrationAutoscalingGroup
EvaluationPeriods: 1
MetricName: CPUUtilization
Namespace: AWS/EC2
Period:
!FindInMap [ StageVariables, !Ref Stage, CPUAlarmPeriodUpper ]
Statistic: Average
Threshold:
!FindInMap [ StageVariables, !Ref Stage, CPUAlarmThresholdUpper ]
InstanceSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Open up HTTP access to load balancer
SecurityGroupEgress:
- CidrIp: 0.0.0.0/0
FromPort: 80
IpProtocol: tcp
ToPort: 80
- CidrIp: 0.0.0.0/0
FromPort: 443
IpProtocol: tcp
ToPort: 443
SecurityGroupIngress:
- FromPort: 9000
IpProtocol: tcp
SourceSecurityGroupId: !Ref LoadBalancerSecurityGroup
ToPort: 9000
VpcId: !Ref VpcId
LoadBalancerToPrivateASG:
Type: AWS::ElasticLoadBalancing::LoadBalancer
Properties:
CrossZone: true
HealthCheck:
HealthyThreshold: 2
Interval: 30
Target: HTTP:9000/healthcheck
Timeout: 10
UnhealthyThreshold: 10
Listeners:
- InstancePort: 9000
LoadBalancerPort: 443
Protocol: HTTPS
SSLCertificateId: !Sub ${CertArn}
SecurityGroups:
- !Ref LoadBalancerSecurityGroup
Subnets: !Ref PublicSubnets
LoadBalancerSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Open up HTTP access to load balancer
SecurityGroupEgress:
- CidrIp: 0.0.0.0/0
FromPort: 9000
IpProtocol: tcp
ToPort: 9000
SecurityGroupIngress:
- CidrIp: 0.0.0.0/0
FromPort: 443
IpProtocol: tcp
ToPort: 443
VpcId: !Ref VpcId
LowCPUAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmActions:
- !Ref ScaleDownPolicy
AlarmDescription: !Sub
- Scale-Down if CPU is less than ${CPUAlarmThresholdLower} % over last ${CPUAlarmPeriodLower} seconds
- CPUAlarmThresholdLower: !FindInMap [ StageVariables, !Ref Stage, CPUAlarmThresholdLower ]
CPUAlarmPeriodLower: !FindInMap [ StageVariables, !Ref Stage, CPUAlarmPeriodLower ]
ComparisonOperator: LessThanOrEqualToThreshold
Dimensions:
- Name: AutoScalingGroupName
Value: !Ref PrivateRegistrationAutoscalingGroup
EvaluationPeriods: 1
MetricName: CPUUtilization
Namespace: AWS/EC2
Period:
!FindInMap [ StageVariables, !Ref Stage, CPUAlarmPeriodLower ]
Statistic: Average
Threshold:
!FindInMap [ StageVariables, !Ref Stage, CPUAlarmThresholdLower ]
PrivateRegistrationAutoscalingGroup:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
AvailabilityZones: !GetAZs
HealthCheckGracePeriod: 400
HealthCheckType: ELB
LaunchConfigurationName: !Ref RegistrationLaunchConfig
LoadBalancerNames:
- !Ref LoadBalancerToPrivateASG
MaxSize: !Ref ASGMaxSize
MinSize: !Ref ASGMinSize
NotificationConfiguration:
NotificationTypes:
- autoscaling:EC2_INSTANCE_LAUNCH_ERROR
- autoscaling:EC2_INSTANCE_TERMINATE_ERROR
TopicARN: !Sub arn:aws:sns:eu-west-1:${AWS::AccountId}:AutoscalingNotifications${Stage}
Tags:
- Key: Stage
PropagateAtLaunch: true
Value: !Ref Stage
- Key: Stack
PropagateAtLaunch: true
Value:
!FindInMap [Constants, Stack, Value]
- Key: App
PropagateAtLaunch: true
Value:
!FindInMap [Constants, App, Value]
VPCZoneIdentifier: !Ref PrivateSubnets
RegistrationLaunchConfig:
Type: AWS::AutoScaling::LaunchConfiguration
Properties:
AssociatePublicIpAddress: false
IamInstanceProfile: !Ref DistributionInstanceProfile
ImageId: !Ref AMI
InstanceType: !FindInMap [StageVariables, !Ref Stage, InstanceType]
SecurityGroups:
- !Ref InstanceSecurityGroup
- !Ref GuardianAccessSecurityGroup
- !Ref VPCSecurityGroup
MetadataOptions:
HttpTokens: required
UserData:
Fn::Base64:
!Sub
- |
#!/bin/bash -ev
aws --region ${AWS::Region} s3 cp s3://${DistBucket}/${Stack}/${Stage}/${App}/${App}_1.0-latest_all.deb /tmp
dpkg -i /tmp/${App}_1.0-latest_all.deb
/opt/aws-kinesis-agent/configure-aws-kinesis-agent ${AWS::Region} mobile-log-aggregation-${Stage} /var/log/${App}/application.log
- Stack: !FindInMap [Constants, Stack, Value]
App: !FindInMap [Constants, App, Value]
ScaleDownPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: ChangeInCapacity
AutoScalingGroupName: !Ref PrivateRegistrationAutoscalingGroup
Cooldown: 3600
ScalingAdjustment: -1
ScaleUpPolicy:
Type: AWS::AutoScaling::ScalingPolicy
Properties:
AdjustmentType: PercentChangeInCapacity
AutoScalingGroupName: !Ref PrivateRegistrationAutoscalingGroup
Cooldown: 300
ScalingAdjustment: 100
NotEnoughHttpCode200sAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmActions: [!Ref AlarmTopic]
OKActions: [!Ref AlarmTopic]
AlarmDescription: !Sub Triggers if load balancer in ${Stage} does not have enough 200s in half an hour. ${RunbookCopy}
ComparisonOperator: LessThanThreshold
Dimensions:
- Name: LoadBalancerName
Value: !Ref LoadBalancerToPrivateASG
EvaluationPeriods: 1
MetricName: HTTPCode_Backend_2XX
Namespace: AWS/ELB
Period: 1800
Statistic: Sum
Threshold: !Ref NotEnough200sThreshold
TreatMissingData: breaching
NotEnoughHttpCode200sPerDayAlarm:
Type: AWS::CloudWatch::Alarm
Properties:
AlarmActions: [!Ref AlarmTopic]
OKActions: [!Ref AlarmTopic]
AlarmDescription: !Sub Triggers if load balancer in ${Stage} does not have enough 200s in a whole day. ${RunbookCopy}
ComparisonOperator: LessThanThreshold
Dimensions:
- Name: LoadBalancerName
Value: !Ref LoadBalancerToPrivateASG
EvaluationPeriods: 1
MetricName: HTTPCode_Backend_2XX
Namespace: AWS/ELB
Period: 86400
Statistic: Sum
Threshold: !Ref NotEnough200sPerDayThreshold
TreatMissingData: breaching