cdk/lib/frontend.ts (395 lines of code) (raw):

import { GuEc2App } from "@guardian/cdk"; import { AccessScope } from "@guardian/cdk/lib/constants"; import { GuAlarm } from "@guardian/cdk/lib/constructs/cloudwatch"; import type { GuStackProps } from "@guardian/cdk/lib/constructs/core"; import { GuStack } from "@guardian/cdk/lib/constructs/core"; import { GuAllowPolicy, GuGetS3ObjectsPolicy, GuPutCloudwatchMetricsPolicy, } from "@guardian/cdk/lib/constructs/iam"; import type { GuAsgCapacity } from "@guardian/cdk/lib/types"; import type { App } from "aws-cdk-lib"; import { Duration } from "aws-cdk-lib"; import { ComparisonOperator, Metric, TreatMissingData, } from "aws-cdk-lib/aws-cloudwatch"; import { InstanceClass, InstanceSize, InstanceType, UserData, } from "aws-cdk-lib/aws-ec2"; import { SslPolicy } from "aws-cdk-lib/aws-elasticloadbalancingv2"; import type { CfnListener } from "aws-cdk-lib/aws-elasticloadbalancingv2"; import { FilterPattern, LogGroup, MetricFilter } from "aws-cdk-lib/aws-logs"; interface FrontendProps extends GuStackProps { membershipSubPromotionsTables: string[]; domainName: string; scaling: GuAsgCapacity; shouldCreateAlarms: boolean; } export class Frontend extends GuStack { constructor(scope: App, id: string, props: FrontendProps) { const { membershipSubPromotionsTables, domainName, scaling, shouldCreateAlarms, } = props; super(scope, id, props); const app = "frontend"; const userData = UserData.forLinux(); userData.addCommands(`#!/bin/bash -ev mkdir /etc/gu aws --region ${this.region} s3 cp s3://membership-dist/${this.stack}/${this.stage}/${app}/support-frontend_1.0-SNAPSHOT_all.deb /tmp dpkg -i /tmp/support-frontend_1.0-SNAPSHOT_all.deb /opt/cloudwatch-logs/configure-logs application ${this.stack} ${this.stage} ${app} /var/log/support-frontend/application.log '%Y-%m-%dT%H:%M:%S,%f%z'`); const policies = [ // TODO: can we 'standardise' the way we load config to use the default permissons from GuEc2App? new GuAllowPolicy(this, "SSMGet", { actions: ["ssm:GetParametersByPath"], resources: [ `arn:aws:ssm:${this.region}:${this.account}:parameter/${this.stack}/${app}/${this.stage}`, ], }), new GuGetS3ObjectsPolicy(this, "PrivateBucket", { bucketName: "gu-zuora-catalog", paths: ["PROD/Zuora-PROD/catalog.json", "PROD/Zuora-CODE/catalog.json"], }), new GuGetS3ObjectsPolicy(this, "SettingsBucket", { bucketName: "support-admin-console", paths: [`${this.stage}/*`], }), new GuGetS3ObjectsPolicy(this, "PromoToolBucket", { bucketName: "gu-promotions-tool-private", paths: ["*/defaultPromos.json"], }), new GuPutCloudwatchMetricsPolicy(this), // TODO: should we move logs to kinesis? new GuAllowPolicy(this, "CloudwatchMetrics", { actions: [ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams", ], resources: ["arn:aws:logs:*:*:*"], }), new GuAllowPolicy(this, "StateMachines", { actions: [ "states:ListStateMachines", "states:StartExecution", "states:GetExecutionHistory", "states:DescribeStateMachine", ], resources: ["arn:aws:states:*:*:*"], }), new GuAllowPolicy(this, "DynamoPromotions", { actions: [ "dynamodb:GetItem", "dynamodb:Scan", "dynamodb:Query", "dynamodb:DescribeTable", ], resources: membershipSubPromotionsTables, }), new GuAllowPolicy(this, "StripeSetupIntentLambda", { actions: ["lambda:InvokeFunction"], resources: [ `arn:aws:lambda:eu-west-1:${this.account}:function:stripe-intent-${this.stage}`, ], }), new GuAllowPolicy(this, "DynamoLandingPageTests", { actions: ["dynamodb:Query"], resources: [ `arn:aws:dynamodb:*:*:table/support-admin-console-channel-tests-${this.stage}`, ], }), ]; const alarmName = (shortDescription: string) => `${shortDescription.charAt(0).toUpperCase() + shortDescription.slice(1)}`; const alarmDescription = (description: string) => `Impact - ${description}. Follow the process in https://docs.google.com/document/d/1_3El3cly9d7u_jPgTcRjLxmdG2e919zCLvmcFCLOYAk/edit`; const http5xxAlarm = shouldCreateAlarms ? { alarmName: alarmName("support-frontend is returning 5XX errors"), alarmDescription: alarmDescription( "Some or all actions on support website are failing" ), actionsEnabled: shouldCreateAlarms, tolerated5xxPercentage: 5, } : false; const ec2App = new GuEc2App(this, { applicationPort: 9000, app: "frontend", access: { scope: AccessScope.PUBLIC }, certificateProps: { domainName, hostedZoneId: "Z1E4V12LQGXFEC", }, monitoringConfiguration: { snsTopicName: `alarms-handler-topic-${this.stage}`, http5xxAlarm: http5xxAlarm, unhealthyInstancesAlarm: shouldCreateAlarms, }, userData, roleConfiguration: { additionalPolicies: policies, }, scaling, instanceType: InstanceType.of(InstanceClass.T4G, InstanceSize.SMALL), }); (ec2App.listener.node.defaultChild as CfnListener).sslPolicy = SslPolicy.TLS13_RES; // ---- Alarms ---- // if (shouldCreateAlarms) { new GuAlarm(this, "NoHealthyInstancesAlarm", { app, alarmName: alarmName("no healthy instances for support-frontend"), alarmDescription: alarmDescription( "Cannot sell any subscriptions or contributions products" ), actionsEnabled: shouldCreateAlarms, threshold: 0.5, evaluationPeriods: 2, comparisonOperator: ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "HealthyHostCount", namespace: "AWS/ApplicationELB", dimensionsMap: { LoadBalancer: ec2App.loadBalancer.loadBalancerFullName, TargetGroup: ec2App.targetGroup.targetGroupFullName, }, statistic: "Average", period: Duration.seconds(60), }), snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "ReducedHealthyInstancesAlarm", { app, alarmName: alarmName( "reduced number healthy instances for support-frontend" ), alarmDescription: alarmDescription( "Imminent issue cannot sell any subscriptions or contributions products" ), actionsEnabled: shouldCreateAlarms, threshold: scaling.minimumInstances - 1, evaluationPeriods: 2, comparisonOperator: ComparisonOperator.LESS_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "HealthyHostCount", namespace: "AWS/ApplicationELB", dimensionsMap: { LoadBalancer: ec2App.loadBalancer.loadBalancerFullName, TargetGroup: ec2App.targetGroup.targetGroupFullName, }, statistic: "Average", period: Duration.seconds(300), }), snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "LatencyNotificationAlarm", { app, alarmName: alarmName("support-frontend has high latency"), alarmDescription: alarmDescription( "support-frontend users are seeing slow responses" ), actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 2, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "TargetResponseTime", namespace: "AWS/ApplicationELB", dimensionsMap: { LoadBalancer: ec2App.loadBalancer.loadBalancerFullName, TargetGroup: ec2App.targetGroup.targetGroupFullName, }, statistic: "Average", period: Duration.seconds(60), }), snsTopicName: `alarms-handler-topic-${this.stage}`, }); // TODO: Do we still need this? new GuAlarm(this, "CatalogLoadingFailureAlarm", { app, alarmName: alarmName( "support-frontend could not load the Zuora catalog from S3" ), alarmDescription: "Impact - Cannot sell any subscriptions products. Follow the process in https://docs.google.com/document/d/1_3El3cly9d7u_jPgTcRjLxmdG2e919zCLvmcFCLOYAk/edit", actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 1, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "CatalogLoadingFailure", namespace: "support-frontend", dimensionsMap: { Environment: "PROD", }, statistic: "Average", period: Duration.seconds(60), }), snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "DefaultPromotionsLoadingFailureAlarm", { app, alarmName: alarmName( "support-frontend could not load default promo codes from S3" ), alarmDescription: "Impact - cannot display default product promotions on the support site", actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 1, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "DefaultPromotionsLoadingFailure", namespace: "support-frontend", dimensionsMap: { Environment: "PROD", }, statistic: "Sum", period: Duration.seconds(60), }), snsTopicName: `alarms-handler-topic-${this.stage}`, }); const stateMachineUnavailableMetricFilter = new MetricFilter( this, "StateMachineUnavailableMetricFilter", { logGroup: LogGroup.fromLogGroupName( this, "SupportFrontendLogGroup", `support-frontend-${this.stage}` ), metricNamespace: `support-frontend-${this.stage}`, metricName: "state-machine-unavailable", filterPattern: FilterPattern.literal( '"regular-contributions-state-machine-unavailable"' ), metricValue: "1", } ); new GuAlarm(this, "StateMachineUnavailableAlarm", { app, alarmName: alarmName("support-workers state machine unavailable"), alarmDescription: alarmDescription( "Cannot sell any subscriptions products" ), actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 2, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: stateMachineUnavailableMetricFilter.metric({ period: Duration.seconds(60), statistic: "Sum", }), treatMissingData: TreatMissingData.NOT_BREACHING, snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "ServerSideCreateFailureAlarm", { app, alarmName: alarmName( "support-frontend create recurring product call failed" ), alarmDescription: alarmDescription( "Someone pressed buy on a recurring product but received an error" ), actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 1, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "ServerSideCreateFailure", namespace: "support-frontend", dimensionsMap: { Stage: "PROD", }, statistic: "Sum", period: Duration.seconds(60), }), treatMissingData: TreatMissingData.NOT_BREACHING, snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "GetDeliveryAgentsFailure", { app, alarmName: alarmName("support-frontend GetDeliveryAgentsFailure"), alarmDescription: alarmDescription( "support-frontend failed to get delivery agents from PaperRound" ), actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 1, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "GetDeliveryAgentsFailure", namespace: "support-frontend", dimensionsMap: { Stage: this.stage, }, statistic: "Sum", period: Duration.seconds(60), }), treatMissingData: TreatMissingData.NOT_BREACHING, snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "ServerSideHighThresholdCreateFailureAlarm", { app, alarmName: alarmName( "support-frontend create recurring product call failed multiple times for a known reason" ), alarmDescription: alarmDescription( "Someone pressed buy on a recurring product but received an error. This has happened multiple times for a known reason." ), actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 60, datapointsToAlarm: 10, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "ServerSideHighThresholdCreateFailure", namespace: "support-frontend", dimensionsMap: { Stage: this.stage, }, statistic: "Sum", period: Duration.minutes(1), }), treatMissingData: TreatMissingData.NOT_BREACHING, snsTopicName: `alarms-handler-topic-${this.stage}`, }); new GuAlarm(this, "GetLandingPageTestsError", { app, alarmName: alarmName("support-frontend GetLandingPageTestsError"), alarmDescription: alarmDescription( "support-frontend failed to fetch one or more landing page tests from DynamoDb" ), actionsEnabled: shouldCreateAlarms, threshold: 1, evaluationPeriods: 1, comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD, metric: new Metric({ metricName: "GetLandingPageTestsError", namespace: "support-frontend", dimensionsMap: { Stage: this.stage, }, statistic: "Sum", period: Duration.seconds(60), }), treatMissingData: TreatMissingData.NOT_BREACHING, snsTopicName: `alarms-handler-topic-${this.stage}`, }); } } }