packages/cdk/lib/cloudquery/cluster.ts (102 lines of code) (raw):
import type { AppIdentity, GuStack } from '@guardian/cdk/lib/constructs/core';
import type { GuSecurityGroup } from '@guardian/cdk/lib/constructs/ec2';
import type { ISecurityGroup, IVpc } from 'aws-cdk-lib/aws-ec2';
import { Cluster, type RepositoryImage, Secret } from 'aws-cdk-lib/aws-ecs';
import type { Schedule } from 'aws-cdk-lib/aws-events';
import type { IManagedPolicy, PolicyStatement } from 'aws-cdk-lib/aws-iam';
import type { DatabaseInstance } from 'aws-cdk-lib/aws-rds';
import type { Secret as SecretsManager } from 'aws-cdk-lib/aws-secretsmanager';
import type { CloudqueryConfig } from './config';
import { CloudqueryWriteMode } from './config';
import { ScheduledCloudqueryTask } from './task';
export interface CloudquerySource {
/**
* The name of the source.
* This will get added to the `Name` tag of the task definition.
*/
name: string;
/**
* Purely descriptive, not used for anything runtime related.
*/
description: string;
/**
* The rate at which to collect data.
*
* If this schedule is daily or weekly you should add an equivalent entry to the `cloudquery_table_frequency` table.
*/
schedule: Schedule;
/**
* Cloudquery config (aka 'spec') for this source.
*
* This should be the JS version of whatever YAML config you want to use for this source.
*/
config: CloudqueryConfig;
/**
* Policies required by this source.
*/
policies?: PolicyStatement[];
/**
* Managed policies required by this source.
*/
managedPolicies?: IManagedPolicy[];
/**
* Any secrets to pass to the ServiceCatalogue container.
*
* @see https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_ecs.ContainerDefinitionOptions.html#secrets
* @see https://repost.aws/knowledge-center/ecs-data-security-container-task
*/
secrets?: Record<string, Secret>;
/**
* Additional commands to run within the ServiceCatalogue container, executed first.
*/
additionalCommands?: string[];
/**
* The amount (in MiB) of memory used by the task.
*/
memoryLimitMiB?: 512 | 1024 | 2048 | 3072 | 4096 | 8192 | 16384 | 32768;
/**
* The number of cpu units used by the task.
*/
cpu?: 256 | 512 | 1024 | 2048 | 4096 | 8192 | 16384;
/**
* Any additional security groups applied to the task.
* For example, a group allowing access to Riff-Raff.
*/
additionalSecurityGroups?: ISecurityGroup[];
/**
* Run this task as a singleton?
* Useful to help avoid overlapping runs.
*
* @default false
*/
runAsSingleton?: boolean;
/**
* The image of a CloudQuery plugin that is distributed via Docker,
* i.e. plugins not written in Go.
*
* This image will be run on its own, exposing the GRPC server on localhost:7777.
* The CloudQuery source config should be configured with a registry of grpc, and path of localhost:7777.
*
* @see https://docs.cloudquery.io/docs/reference/source-spec
*/
dockerDistributedPluginImage?: RepositoryImage;
/**
* Specifies the update method to use when inserting rows to Postgres.
*
* @default {@link CloudqueryWriteMode.OverwriteDeleteStale}
*/
writeMode?: CloudqueryWriteMode;
}
interface CloudqueryClusterProps extends AppIdentity {
/**
* The VPC to create the cluster in.
*/
vpc: IVpc;
/**
* The database for ServiceCatalogue to write to.
*/
db: DatabaseInstance;
/**
* The security group that provides access to the database.
*/
dbAccess: GuSecurityGroup;
/**
* Which tables to collect at a frequency other than once a day.
*/
sources: CloudquerySource[];
loggingStreamName: string;
logShippingPolicy: PolicyStatement;
cloudqueryApiKey: SecretsManager;
/**
* Each CloudQuery data collection task has a schedule.
* When true, the schedule will be enabled, and data collection will occur as defined.
* When false, the schedule will be disabled. Tasks will need to be run manually using the CLI.
*/
enableCloudquerySchedules: boolean;
}
/**
* An ECS cluster for running ServiceCatalogue. The cluster and its tasks will be
* created in the private subnets of the VPC provided.
*/
export class CloudqueryCluster extends Cluster {
constructor(scope: GuStack, id: string, props: CloudqueryClusterProps) {
super(scope, id, {
vpc: props.vpc,
enableFargateCapacityProviders: true,
containerInsights: true,
});
const {
app,
db,
dbAccess,
sources,
loggingStreamName,
logShippingPolicy,
cloudqueryApiKey,
enableCloudquerySchedules,
} = props;
const taskProps = {
app,
cluster: this,
db,
dbAccess,
loggingStreamName,
};
sources.forEach(
({
name,
schedule,
config,
managedPolicies = [],
policies = [],
secrets,
additionalCommands,
memoryLimitMiB,
cpu,
additionalSecurityGroups,
runAsSingleton = false,
dockerDistributedPluginImage,
writeMode = CloudqueryWriteMode.OverwriteDeleteStale,
}) => {
new ScheduledCloudqueryTask(scope, `CloudquerySource-${name}`, {
...taskProps,
enabled: enableCloudquerySchedules,
name,
managedPolicies,
policies: [logShippingPolicy, ...policies],
schedule,
sourceConfig: config,
secrets,
additionalCommands,
memoryLimitMiB,
cpu,
additionalSecurityGroups,
runAsSingleton,
cloudQueryApiKey: Secret.fromSecretsManager(
cloudqueryApiKey,
'api-key',
),
dockerDistributedPluginImage,
writeMode,
});
},
);
}
}