packages/constructs/L3/governance/glue-catalog-l3-construct/lib/glue-catalog-l3-construct.ts (392 lines of code) (raw):
/*!
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
import { MdaaCatalogSettings } from '@aws-mdaa/glue-constructs';
import { MdaaKmsKey, DECRYPT_ACTIONS, ENCRYPT_ACTIONS } from '@aws-mdaa/kms-constructs';
import { MdaaLambdaFunction, MdaaLambdaRole } from '@aws-mdaa/lambda-constructs';
import { MdaaL3Construct, MdaaL3ConstructProps } from '@aws-mdaa/l3-construct';
import { CustomResource, Duration } from 'aws-cdk-lib';
import { CfnDataCatalog } from 'aws-cdk-lib/aws-athena';
import {
ArnPrincipal,
Effect,
IPrincipal,
PolicyDocument,
PolicyStatement,
ServicePrincipal,
} from 'aws-cdk-lib/aws-iam';
import { Key } from 'aws-cdk-lib/aws-kms';
import { Code, Runtime } from 'aws-cdk-lib/aws-lambda';
import { StringParameter } from 'aws-cdk-lib/aws-ssm';
import { Provider } from 'aws-cdk-lib/custom-resources';
import { MdaaNagSuppressions } from '@aws-mdaa/construct'; //NOSONAR
import { Construct } from 'constructs';
const GLUE_READ_ACTIONS: string[] = ['glue:Get*', 'glue:List*'];
const GLUE_WRITE_ACTIONS: string[] = [...GLUE_READ_ACTIONS];
const GLUE_SHARE_RESOURCE_ACTIONS: string[] = ['glue:ShareResource'];
export interface CatalogAccessPolicyProps {
/**
* Arns for principals which will be provided read access to the catalog resources via resource policy statement
*/
readonly readPrincipalArns?: string[];
/**
* Arns for principals which will be provided read/write access to the catalog resources via resource policy statement
*/
readonly writePrincipalArns?: string[];
/**
* List of resources to which access is being granted.
*/
readonly resources: string[];
}
export interface GlueCatalogL3ConstructProps extends MdaaL3ConstructProps {
/**
* Map of access policy names to access policy definitions
*/
readonly accessPolicies?: { [key: string]: CatalogAccessPolicyProps };
/**
* List of accounts which will be provided read access to the catalog
*/
readonly consumerAccounts?: { [key: string]: string };
/**
* List of accounts for which additional Athena catalogs will be created pointing to the producer account Glue catalog
*/
readonly producerAccounts?: { [key: string]: string };
/**
* List of accounts which will be provided read access to the catalog KMS key only
*/
readonly kmsKeyConsumerAccounts?: { [key: string]: string };
}
export class GlueCatalogL3Construct extends MdaaL3Construct {
protected readonly props: GlueCatalogL3ConstructProps;
private catalogResourcePolicyProvider?: Provider;
private consumerAccounts?: { [key: string]: string };
private kmsKeyConsumerAccounts?: { [key: string]: string };
private producerAccounts?: { [key: string]: string };
constructor(scope: Construct, id: string, props: GlueCatalogL3ConstructProps) {
super(scope, id, props);
this.props = props;
this.consumerAccounts = Object.fromEntries(
Object.entries(this.props.consumerAccounts || []).filter(x => x[1] != this.account),
);
this.kmsKeyConsumerAccounts = Object.fromEntries(
Object.entries(this.props.kmsKeyConsumerAccounts || []).filter(x => x[1] != this.account),
);
this.producerAccounts = Object.fromEntries(
Object.entries(this.props.producerAccounts || []).filter(x => x[1] != this.account),
);
const allReadPrincipalArns: string[] = [];
const allWritePrincipalArns: string[] = [];
const resourcePolicyDocument = new PolicyDocument();
Object.keys(this.props.accessPolicies || {}).forEach(accessPolicyName => {
console.log(accessPolicyName);
const accessPolicy = (this.props.accessPolicies || {})[accessPolicyName];
console.log(accessPolicy);
allReadPrincipalArns.push(...(accessPolicy.readPrincipalArns || []));
allWritePrincipalArns.push(...(accessPolicy.writePrincipalArns || []));
const statements = this.createResourcePolicyStatements(
accessPolicyName,
accessPolicy.resources,
accessPolicy.readPrincipalArns,
accessPolicy.writePrincipalArns,
);
resourcePolicyDocument.addStatements(...statements);
});
if (this.consumerAccounts && Object.keys(this.consumerAccounts).length > 0) {
const readPrincipalArns = Object.entries(this.consumerAccounts).map(
x => `arn:${this.partition}:iam::${x[1]}:root`,
);
const statements = this.createResourcePolicyStatements('accounts', ['*'], readPrincipalArns);
resourcePolicyDocument.addStatements(...statements);
}
if (resourcePolicyDocument.statementCount > 0) {
//Required as per https://docs.aws.amazon.com/lake-formation/latest/dg/hybrid-cross-account.html
const shareResourceStatement = this.getShareResourcePolicyStatement();
resourcePolicyDocument.addStatements(shareResourceStatement);
const catalogCrProvider = this.getGlueCatalogResourcePolicyCrProvider();
const catalogResourcePolicy = new CustomResource(this.scope, `catalog-resource-policy`, {
serviceToken: catalogCrProvider.serviceToken,
properties: {
resourcePolicyJson: resourcePolicyDocument.toJSON(),
account: this.account,
policyHashParam: this.props.naming.ssmPath('policyHash'),
},
});
new StringParameter(this.scope, 'catalog-resource-policy-hash-ssm', {
parameterName: this.props.naming.ssmPath('policyHash'),
stringValue: catalogResourcePolicy.getAttString('PolicyHash'),
});
}
if (this.producerAccounts && Object.keys(this.producerAccounts).length > 0) {
Object.entries(this.producerAccounts).forEach(producerAcct => {
const acctName = producerAcct[0];
const acctId = producerAcct[1];
new CfnDataCatalog(this.scope, `athena-catalog-${acctName}`, {
name: acctName,
type: 'GLUE',
parameters: {
'catalog-id': acctId,
},
});
});
}
const catalogKmsKeyConsumerAccounts = Object.entries({
...(this.consumerAccounts || {}),
...(this.kmsKeyConsumerAccounts || {}),
}).map(x => x[1]);
//Use some private helper functions to create the catalog resources
const catalogKmsKey = this.createCatalogKmsKey(
allReadPrincipalArns,
allWritePrincipalArns,
catalogKmsKeyConsumerAccounts,
);
new MdaaCatalogSettings(this.scope, 'glue-catalog-settings', {
naming: this.props.naming,
catalogId: this.account,
catalogKmsKey: catalogKmsKey,
});
return this;
}
/**
* Creates a new PolicyStatement with ALLOW set as a default Effect.
*
* @param policyName Name to give to the policy
* @param resources List of resources
* @param principalArns List of ARNs
* @param actions List of Actions
* @returns PolicyStatement
*/
private createPolicyStatement(
policyName: string,
resources: string[],
principalArns: IPrincipal[],
actions: string[],
): PolicyStatement {
return new PolicyStatement({
sid: policyName,
actions: actions,
principals: principalArns,
resources: resources,
});
}
/**
* Returns a new policy statement to allow `ram.amazonaws.com` to access all databases and tables.
* The purpose is to allow cross-account data sharing.
* Based on https://docs.aws.amazon.com/lake-formation/latest/dg/cross-account-prereqs.html
*
* @returns PolicyStatement
*/
private getShareResourcePolicyStatement(): PolicyStatement {
const glueResourceArns: string[] = [];
glueResourceArns.push(`arn:${this.partition}:glue:${this.region}:${this.account}:catalog`);
glueResourceArns.push(`arn:${this.partition}:glue:${this.region}:${this.account}:database/*`);
glueResourceArns.push(`arn:${this.partition}:glue:${this.region}:${this.account}:table/*/*`);
const glueShareResourcePolicyStatement = this.createPolicyStatement(
'allow-ram-sharing',
glueResourceArns,
[new ServicePrincipal('ram.amazonaws.com')],
GLUE_SHARE_RESOURCE_ACTIONS,
);
return glueShareResourcePolicyStatement;
}
private createResourcePolicyStatements(
accessPolicyName: string,
resources: string[],
readPrincipalArns?: string[],
writePrincipalArns?: string[],
): PolicyStatement[] {
const policyStatements: PolicyStatement[] = [];
const glueResourceArns = resources.map(resource => {
if (resource.includes('*')) {
console.warn(
`Glue resource access '${resource}' contains wildcard (*). Consider revising to specific resources.`,
);
}
return `arn:${this.partition}:glue:${this.region}:${this.account}:${resource}`;
});
glueResourceArns.push(`arn:${this.partition}:glue:${this.region}:${this.account}:catalog`);
glueResourceArns.push(`arn:${this.partition}:glue:${this.region}:${this.account}:database/default`);
if (readPrincipalArns && readPrincipalArns.length > 0) {
const readPolicyStatement = this.createPolicyStatement(
`${accessPolicyName}-read`,
glueResourceArns,
readPrincipalArns.map(x => new ArnPrincipal(x)),
GLUE_READ_ACTIONS,
);
policyStatements.push(readPolicyStatement);
}
if (writePrincipalArns && writePrincipalArns.length > 0) {
const writePolicyStatement = this.createPolicyStatement(
`${accessPolicyName}-write`,
glueResourceArns,
writePrincipalArns.map(x => new ArnPrincipal(x)),
GLUE_WRITE_ACTIONS,
);
policyStatements.push(writePolicyStatement);
}
return policyStatements;
}
private createCatalogKmsKey(readPrincipalArns: string[], writePrincipalArns: string[], readAccounts?: string[]): Key {
// This catalog KMS key will be used to encrypt all data written by the catalog to the catalog bucket
const catalogKmsKey = new MdaaKmsKey(this.scope, 'kms-cmk', {
description: `KMS Key for ${this.props.naming.resourceName()}`,
naming: this.props.naming,
});
const usageAccounts = [this.account, ...(readAccounts || [])];
usageAccounts.forEach(account => {
//Add a statement that allows anyone in the account to use the key as long as it is via Glue
const accountKeyUsagePolicyStatement = new PolicyStatement({
effect: Effect.ALLOW,
// Use of * mirrors what is done in the CDK methods for adding policy helpers.
resources: ['*'],
actions: [...DECRYPT_ACTIONS, ...ENCRYPT_ACTIONS, 'kms:DescribeKey', 'kms:CreateGrant'],
});
accountKeyUsagePolicyStatement.addAnyPrincipal();
accountKeyUsagePolicyStatement.addCondition('StringEquals', {
'kms:CallerAccount': account,
'kms:ViaService': `glue.${this.region}.amazonaws.com`,
});
catalogKmsKey.addToResourcePolicy(accountKeyUsagePolicyStatement);
});
if (readPrincipalArns.length > 0) {
const readPrincipalPolicyStatement = new PolicyStatement({
effect: Effect.ALLOW,
// Use of * mirrors what is done in the CDK methods for adding policy helpers.
resources: ['*'],
actions: [...DECRYPT_ACTIONS, 'kms:DescribeKey'],
principals: readPrincipalArns.map(x => new ArnPrincipal(x)),
});
catalogKmsKey.addToResourcePolicy(readPrincipalPolicyStatement);
}
if (writePrincipalArns.length > 0) {
const writePrincipalPolicyStatement = new PolicyStatement({
effect: Effect.ALLOW,
// Use of * mirrors what is done in the CDK methods for adding policy helpers.
resources: ['*'],
actions: [...DECRYPT_ACTIONS, ...ENCRYPT_ACTIONS, 'kms:DescribeKey', 'kms:CreateGrant'],
principals: writePrincipalArns.map(x => new ArnPrincipal(x)),
});
catalogKmsKey.addToResourcePolicy(writePrincipalPolicyStatement);
}
return catalogKmsKey;
}
private getGlueCatalogResourcePolicyCrProvider(): Provider {
if (this.catalogResourcePolicyProvider) {
return this.catalogResourcePolicyProvider;
}
const catalogCrFunctionRole = new MdaaLambdaRole(this.scope, 'catalog-function-role', {
description: 'CR Role',
roleName: 'catalog-cr',
naming: this.props.naming,
logGroupNames: [this.props.naming.resourceName('catalog-cr')],
createParams: false,
createOutputs: false,
});
//Permissions for managing Glue Resource Policies
const manageCatalogStatement = new PolicyStatement({
effect: Effect.ALLOW,
resources: [`arn:${this.partition}:glue:${this.region}:${this.account}:catalog`],
actions: ['glue:PutResourcePolicy', 'glue:DeleteResourcePolicy'],
});
catalogCrFunctionRole.addToPolicy(manageCatalogStatement);
//Permissions for managing Glue Resource Policies
const queryRamStatement = new PolicyStatement({
effect: Effect.ALLOW,
resources: [`arn:${this.partition}:ram:${this.region}:${this.account}:resource-share/*`],
actions: ['ram:ListResources'],
});
catalogCrFunctionRole.addToPolicy(queryRamStatement);
MdaaNagSuppressions.addCodeResourceSuppressions(
catalogCrFunctionRole,
[
{
id: 'NIST.800.53.R5-IAMNoInlinePolicy',
reason: 'Role is for Custom Resource. Inline policy specific to custom resource.',
},
],
true,
);
const sourceDir = `${__dirname}/../src/python/glue_catalog_resource_policy`;
// This Lambda is used as a Custom Resource in order to create the Data Lake Folder
const catalogResourcePolicyLambda = new MdaaLambdaFunction(this.scope, 'catalog-cr-function', {
functionName: 'catalog-cr',
code: Code.fromAsset(sourceDir),
handler: 'glue_catalog_resource_policy.lambda_handler',
runtime: Runtime.PYTHON_3_13,
timeout: Duration.seconds(120),
role: catalogCrFunctionRole,
naming: this.props.naming,
createParams: false,
createOutputs: false,
environment: {
LOG_LEVEL: 'INFO',
},
});
MdaaNagSuppressions.addCodeResourceSuppressions(
catalogResourcePolicyLambda,
[
{
id: 'NIST.800.53.R5-LambdaDLQ',
reason: 'Function is for custom resource and error handling will be handled by CloudFormation.',
},
{
id: 'NIST.800.53.R5-LambdaInsideVPC',
reason: 'Function is for custom resource and will interact only with S3.',
},
{
id: 'NIST.800.53.R5-LambdaConcurrency',
reason:
'Function is for custom resource and will only execute during stack deployement. Reserved concurrency not appropriate.',
},
{
id: 'HIPAA.Security-LambdaDLQ',
reason: 'Function is for custom resource and error handling will be handled by CloudFormation.',
},
{
id: 'PCI.DSS.321-LambdaDLQ',
reason: 'Function is for custom resource and error handling will be handled by CloudFormation.',
},
{
id: 'HIPAA.Security-LambdaInsideVPC',
reason: 'Function is for custom resource and will interact only with S3.',
},
{
id: 'PCI.DSS.321-LambdaInsideVPC',
reason: 'Function is for custom resource and will interact only with S3.',
},
{
id: 'HIPAA.Security-LambdaConcurrency',
reason:
'Function is for custom resource and will only execute during stack deployement. Reserved concurrency not appropriate.',
},
{
id: 'PCI.DSS.321-LambdaConcurrency',
reason:
'Function is for custom resource and will only execute during stack deployement. Reserved concurrency not appropriate.',
},
],
true,
);
const catalogCrProviderFunctionName = this.props.naming.resourceName('catalog-cr-prov', 64);
const catalogCrProviderRole = new MdaaLambdaRole(this.scope, 'catalog-provider-role', {
description: 'CR Role',
roleName: 'catalog-provider-role',
naming: this.props.naming,
logGroupNames: [catalogCrProviderFunctionName],
createParams: false,
createOutputs: false,
});
const catalogResourcePolicyProvider = new Provider(this.scope, 'datalake-catalog-cr-provider', {
providerFunctionName: catalogCrProviderFunctionName,
onEventHandler: catalogResourcePolicyLambda,
role: catalogCrProviderRole,
});
MdaaNagSuppressions.addCodeResourceSuppressions(
catalogCrProviderRole,
[
{
id: 'NIST.800.53.R5-IAMNoInlinePolicy',
reason: 'Role is for Custom Resource Provider. Inline policy automatically added.',
},
],
true,
);
MdaaNagSuppressions.addCodeResourceSuppressions(
catalogResourcePolicyProvider,
[
{ id: 'AwsSolutions-L1', reason: 'Lambda function Runtime set by CDK Provider Framework' },
{
id: 'NIST.800.53.R5-LambdaDLQ',
reason: 'Function is for custom resource and error handling will be handled by CloudFormation.',
},
{
id: 'NIST.800.53.R5-LambdaInsideVPC',
reason: 'Function is for custom resource and will interact only with S3.',
},
{
id: 'NIST.800.53.R5-LambdaConcurrency',
reason:
'Function is for custom resource and will only execute during stack deployement. Reserved concurrency not appropriate.',
},
{
id: 'HIPAA.Security-LambdaDLQ',
reason: 'Function is for custom resource and error handling will be handled by CloudFormation.',
},
{
id: 'PCI.DSS.321-LambdaDLQ',
reason: 'Function is for custom resource and error handling will be handled by CloudFormation.',
},
{
id: 'HIPAA.Security-LambdaInsideVPC',
reason: 'Function is for custom resource and will interact only with S3.',
},
{
id: 'PCI.DSS.321-LambdaInsideVPC',
reason: 'Function is for custom resource and will interact only with S3.',
},
{
id: 'HIPAA.Security-LambdaConcurrency',
reason:
'Function is for custom resource and will only execute during stack deployement. Reserved concurrency not appropriate.',
},
{
id: 'PCI.DSS.321-LambdaConcurrency',
reason:
'Function is for custom resource and will only execute during stack deployement. Reserved concurrency not appropriate.',
},
],
true,
);
this.catalogResourcePolicyProvider = catalogResourcePolicyProvider;
return catalogResourcePolicyProvider;
}
}