packages/cdk/lib/cloudquery/config.ts (302 lines of code) (raw):

import { GuardianOrganisationalUnits } from '@guardian/private-infrastructure-config'; import { Versions } from './versions'; export type CloudqueryConfig = { spec: { tables?: string[]; [k: string]: unknown; }; [k: string]: unknown; }; interface CloudqueryTableConfig { tables?: string[]; skipTables?: string[]; concurrency?: number; } interface GitHubCloudqueryTableConfig extends CloudqueryTableConfig { org: string; } /** * Specifies the update method to use when inserting rows to Postgres. * * @see https://cli-docs.cloudquery.io/docs/reference/destination-spec#write_mode */ export enum CloudqueryWriteMode { /** * Overwrite existing rows with the same primary key, and delete rows that are no longer present in the cloud. */ OverwriteDeleteStale = 'overwrite-delete-stale', /** * Same as {@link CloudqueryWriteMode.OverwriteDeleteStale}, but doesn't delete stale rows from previous syncs. */ Overwrite = 'overwrite', /** * Rows are never overwritten or deleted, only appended. */ Append = 'append', } /** * Create a ServiceCatalogue destination configuration for Postgres. */ export function postgresDestinationConfig( writeMode: CloudqueryWriteMode, ): CloudqueryConfig { return { kind: 'destination', spec: { name: 'postgresql', registry: 'github', path: 'cloudquery/postgresql', version: `v${Versions.CloudqueryPostgresDestination}`, write_mode: writeMode, migrate_mode: 'forced', spec: { connection_string: [ 'user=${DB_USERNAME}', 'password=${DB_PASSWORD}', 'host=${DB_HOST}', 'port=5432', 'dbname=postgres', 'sslmode=verify-full', ].join(' '), }, }, }; } export function awsSourceConfig( tableConfig: CloudqueryTableConfig, extraConfig: Record<string, unknown> = {}, ): CloudqueryConfig { const { tables, skipTables, concurrency } = tableConfig; if (!tables && !skipTables) { throw new Error('Must specify either tables or skipTables'); } return { kind: 'source', spec: { name: 'aws', path: 'cloudquery/aws', version: `v${Versions.CloudqueryAws}`, tables, skip_dependent_tables: false, skip_tables: skipTables, destinations: ['postgresql'], otel_endpoint: '0.0.0.0:4318', otel_endpoint_insecure: true, spec: { concurrency, ...extraConfig, }, }, }; } /** * Create a ServiceCatalogue configuration for all AWS accounts in the organisation. * @param tableConfig Which tables to include or exclude. * @param extraConfig Extra spec fields. * @see https://www.cloudquery.io/docs/plugins/sources/aws/configuration#org */ export function awsSourceConfigForOrganisation( tableConfig: CloudqueryTableConfig, extraConfig: Record<string, unknown> = {}, ): CloudqueryConfig { return awsSourceConfig(tableConfig, { org: { // See: https://github.com/guardian/aws-account-setup/pull/58 member_role_name: 'cloudquery-access', organization_units: [GuardianOrganisationalUnits.Root], }, ...extraConfig, }); } /** * Create a ServiceCatalogue configuration for a single AWS account. * Use this for those services running across the organisation which are aggregated in a single account. * For example, Access Analyzer. * * @param accountNumber The AWS account to query. ServiceCatalogue will assume the role `cloudquery-access` in this account. * @param tableConfig Which tables to include or exclude. * @param extraConfig Extra spec fields. * @see https://www.cloudquery.io/docs/plugins/sources/aws/configuration#account */ export function awsSourceConfigForAccount( accountNumber: string, tableConfig: CloudqueryTableConfig, extraConfig: Record<string, unknown> = {}, ): CloudqueryConfig { return awsSourceConfig(tableConfig, { accounts: [ { id: `cq-for-${accountNumber}`, role_arn: `arn:aws:iam::${accountNumber}:role/cloudquery-access`, }, ], ...extraConfig, }); } export function githubSourceConfig( tableConfig: GitHubCloudqueryTableConfig, ): CloudqueryConfig { const { tables, skipTables, org } = tableConfig; if (!tables && !skipTables) { throw new Error('Must specify either tables or skipTables'); } return { kind: 'source', spec: { name: 'github', path: 'cloudquery/github', version: `v${Versions.CloudqueryGithub}`, tables, skip_dependent_tables: false, skip_tables: skipTables, destinations: ['postgresql'], spec: { concurrency: 1000, // TODO what's the ideal value here?! orgs: [org], app_auth: [ { org, // For simplicity, read all configuration from disk. private_key_path: `${serviceCatalogueConfigDirectory}/github-private-key`, app_id: '${' + `file:${serviceCatalogueConfigDirectory}/github-app-id` + '}', installation_id: '${' + `file:${serviceCatalogueConfigDirectory}/github-installation-id` + '}', }, ], include_archived_repos: true, }, }, }; } /** * Configuration for the Fastly source plugin. * @see https://www.cloudquery.io/docs/plugins/sources/fastly/overview#configuration */ export function fastlySourceConfig( tableConfig: CloudqueryTableConfig, ): CloudqueryConfig { const { tables, skipTables } = tableConfig; if (!tables && !skipTables) { throw new Error('Must specify either tables or skipTables'); } return { kind: 'source', spec: { name: 'fastly', path: 'cloudquery/fastly', version: `v${Versions.CloudqueryFastly}`, tables, skip_dependent_tables: false, skip_tables: skipTables, destinations: ['postgresql'], spec: { // The Fastly API is rate limited to 1000 requests per hour. // See https://docs.fastly.com/en/guides/resource-limits#rate-and-time-limits. // TODO what's the ideal value here?! concurrency: 1000, fastly_api_key: '${FASTLY_API_KEY}', }, }, }; } export function galaxiesSourceConfig(bucketName: string): CloudqueryConfig { return { kind: 'source', spec: { name: 'galaxies', path: 'guardian/galaxies', registry: 'github', version: `v${Versions.CloudqueryGalaxies}`, destinations: ['postgresql'], tables: [ 'galaxies_people_table', 'galaxies_teams_table', 'galaxies_streams_table', 'galaxies_people_profile_info_table', ], spec: { bucket: bucketName, }, }, }; } export function ns1SourceConfig(): CloudqueryConfig { return { kind: 'source', spec: { name: 'ns1', registry: 'grpc', path: 'localhost:7777', // This property is required, but only relevant for GitHub hosted plugins. // Use a fake value to satisfy the config parser. // See https://docs.cloudquery.io/docs/reference/source-spec#version version: 'v0.0.0', tables: ['ns1_*'], destinations: ['postgresql'], spec: { apiKey: '${NS1_API_KEY}', }, }, }; } export function riffraffSourcesConfig(): CloudqueryConfig { return { kind: 'source', spec: { name: 'postgresql', path: 'cloudquery/postgresql', version: `v${Versions.CloudqueryPostgresSource}`, destinations: ['postgresql'], tables: ['riffraff_*'], skip_tables: ['riffraff_deploy_logs'], spec: { connection_string: [ 'user=${RIFFRAFF_DB_USERNAME}', 'password=${RIFFRAFF_DB_PASSWORD}', 'host=${RIFFRAFF_DB_HOST}', 'port=5432', 'dbname=riffraff', 'sslmode=verify-full', ].join(' '), }, }, }; } export function githubLanguagesConfig(): CloudqueryConfig { return { kind: 'source', spec: { name: 'github-languages', path: 'guardian/github-languages', version: `v${Versions.CloudqueryGithubLanguages}`, destinations: ['postgresql'], tables: ['github_languages'], registry: 'github', }, }; } export function amigoBakePackagesConfig( baseImagesTableName: string, recipesTableName: string, bakesTableName: string, packagesBucketName: string, ): CloudqueryConfig { return { kind: 'source', spec: { name: 'image-packages', registry: 'github', path: 'guardian/image-packages', version: `v${Versions.CloudqueryImagePackages}`, destinations: ['postgresql'], tables: ['amigo_bake_packages'], spec: { base_images_table: baseImagesTableName, recipes_table: recipesTableName, bakes_table: bakesTableName, bucket: packagesBucketName, }, }, }; } // Tables we are skipping because they are slow and or uninteresting to us. export const skipTables = [ 'aws_ec2_vpc_endpoint_services', // this resource includes services that are available from AWS as well as other AWS Accounts 'aws_cloudtrail_events', 'aws_docdb_cluster_parameter_groups', 'aws_docdb_engine_versions', 'aws_ec2_instance_types', 'aws_elasticache_engine_versions', 'aws_elasticache_parameter_groups', 'aws_elasticache_reserved_cache_nodes_offerings', 'aws_elasticache_service_updates', 'aws_emr_supported_instance_types', 'aws_neptune_cluster_parameter_groups', 'aws_neptune_db_parameter_groups', 'aws_rds_cluster_parameter_groups', 'aws_rds_db_parameter_groups', 'aws_rds_engine_versions', 'aws_servicequotas_services', 'aws_identitystore_users', 'aws_identitystore_groups', 'aws_quicksight_data_sets', 'aws_quicksight_dashboards', 'aws_quicksight_analyses', 'aws_quicksight_users', 'aws_quicksight_templates', 'aws_quicksight_groups', 'aws_quicksight_folders', 'aws_quicksight_data_sources', 'aws_amp_workspaces', 'aws_ssoadmin_instances', 'aws_glue_connections', 'aws_computeoptimizer_ecs_service_recommendations', 'aws_xray_sampling_rules', 'aws_xray_resource_policies', 'aws_xray_groups', // We don't really use Wellarchictected and it confuses our Tagging obligation // due to it having many resources managed by AWS without tags 'aws_wellarchitected_*', // These appear to be heavily rate limited, and not too interesting (yet). // Don't collect them to reduce execution time. 'aws_stepfunctions_map_runs', 'aws_stepfunctions_map_run_executions', 'aws_stepfunctions_executions', 'aws_scheduler_schedules', ]; export const serviceCatalogueConfigDirectory = '/usr/share/cloudquery';