miscellaneous/distributed_tensorflow_mask_rcnn/cfn-sm.yaml (538 lines of code) (raw):
AWSTemplateFormatVersion: 2010-09-09
Description: >-
Creates VPC and subnets for SageMaker training.
It also creates an EFS file-system for SageMaker training input.
Finally, it creates a SageMaker notebook instance that mounts
the EFS file-system for staging Training data on EFS file-system.
Parameters:
VpcCIDR:
Default: 172.30.0.0/25
Description: Vpc CIDR
AllowedPattern: '(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})'
Type: String
PublicSubnetCIDR:
Default: 172.30.0.0/27
Description: Public Subnet CIDR in Vpc CIDR
AllowedPattern: '(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})'
Type: String
PrivateSubnet1CIDR:
Default: 172.30.0.32/27
Description: Private Subnet1 CIDR in Vpc CIDR
AllowedPattern: '(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})'
Type: String
PrivateSubnet2CIDR:
Default: 172.30.0.64/27
Description: Private Subnet2 CIDR in Vpc CIDR
AllowedPattern: '(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})'
Type: String
PrivateSubnet3CIDR:
Default: 172.30.0.96/27
Description: Private Subnet2 CIDR in Vpc CIDR
AllowedPattern: '(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/(\d{1,2})'
Type: String
EbsVolumeSize:
Default: '100'
Description: Notebook instance EBS volume size
Type: String
AllowedValues:
- '100'
- '150'
- '200'
- '250'
- '300'
- '350'
- '400'
- '450'
- '500'
NotebookInstanceType:
Description: SageMaker Notebook instance type.
Type: String
Default: 'ml.m5.2xlarge'
AllowedValues:
- 'ml.m5.2xlarge'
- 'ml.m4.2xlarge'
ConstraintDescription: Subset of supported SageMaker Notebook instance types
EFSFileSystemId:
Description: >-
Existing Amazon EFS File System Id, or leave it blank to create a new EFS
File System.
Type: String
AllowedPattern: '(^fs-[0-9a-f]{8,8})$|()$'
Default: ''
ConstraintDescription: Should be a Valid EFS File System Id
EFSSharedDir:
Description: >-
EFS shared file-system directory
Type: String
Default: '/'
ConstraintDescription: Should be a valid EFS File System absolute directory path
EFSMountPoint:
Description: >-
EFS mount point
Type: String
Default: '/home/ec2-user/efs'
ConstraintDescription: Should be a valid local file-system absolute directory path
GitHubUsername:
Description: GitHub username
Type: String
GitHubToken:
Description: GitHub Token
Type: String
CodeRepoName:
Description: Code Repository name
Type: String
Default: sagemaker-github-repo
GitHubRepoUrl:
Description: GitHub Repository Url
Type: String
GitHubRepoBranch:
Description: GitHub Repository branch
Type: String
Default: 'master'
NotebookInstanceName:
Description: SageMaker Notebook instance name
Type: String
Default: 'sagemaker-notebook'
S3BucketName:
Description: SageMaker S3 Bucket Name
Type: String
Conditions:
CreateNewFileSystem: !Equals [ !Ref EFSFileSystemId, '']
CreateCodeRepo: !Not [!Equals [ !Ref GitHubRepoUrl, ''] ]
CreateGitHubRepoSecret: !And
- !Not [ !Equals [!Ref GitHubToken, ''] ]
- !Not [ !Equals [!Ref GitHubUsername, ''] ]
Resources:
Vpc:
Type: 'AWS::EC2::VPC'
Properties:
CidrBlock: !Ref VpcCIDR
EnableDnsSupport: 'true'
EnableDnsHostnames: 'true'
Tags:
- Key: Name
Value: !Ref 'AWS::StackName'
InternetGateway:
Type: 'AWS::EC2::InternetGateway'
Properties:
Tags:
- Key: Network
Value: Public
- Key: Name
Value: !Ref 'AWS::StackName'
GatewayToInternet:
Type: 'AWS::EC2::VPCGatewayAttachment'
Properties:
VpcId: !Ref Vpc
InternetGatewayId: !Ref InternetGateway
PublicSubnet:
Type: 'AWS::EC2::Subnet'
Properties:
VpcId: !Ref Vpc
AvailabilityZone:
Fn::Select:
- 0
- Fn::GetAZs: ""
CidrBlock: !Ref PublicSubnetCIDR
Tags:
- Key: Network
Value: Public
- Key: Name
Value: !Ref 'AWS::StackName'
PrivateSubnet1:
Type: 'AWS::EC2::Subnet'
Properties:
VpcId: !Ref Vpc
AvailabilityZone:
Fn::Select:
- 0
- Fn::GetAZs: ""
CidrBlock: !Ref PrivateSubnet1CIDR
Tags:
- Key: Network
Value: Private
- Key: Name
Value: !Ref 'AWS::StackName'
PrivateSubnet2:
Type: 'AWS::EC2::Subnet'
Properties:
VpcId: !Ref Vpc
AvailabilityZone:
Fn::Select:
- 1
- Fn::GetAZs: ""
CidrBlock: !Ref PrivateSubnet2CIDR
Tags:
- Key: Network
Value: Private
- Key: Name
Value: !Ref 'AWS::StackName'
PrivateSubnet3:
Type: 'AWS::EC2::Subnet'
Properties:
VpcId: !Ref Vpc
AvailabilityZone:
Fn::Select:
- 2
- Fn::GetAZs: ""
CidrBlock: !Ref PrivateSubnet3CIDR
Tags:
- Key: Network
Value: Private
- Key: Name
Value: !Ref 'AWS::StackName'
NATGatewayEIP:
Type: 'AWS::EC2::EIP'
Properties:
Domain: vpc
NATGateway:
Type: 'AWS::EC2::NatGateway'
DependsOn: GatewayToInternet
Properties:
AllocationId: !GetAtt
- NATGatewayEIP
- AllocationId
SubnetId: !Ref PublicSubnet
PublicRouteTable:
Type: 'AWS::EC2::RouteTable'
DependsOn: GatewayToInternet
Properties:
VpcId: !Ref Vpc
Tags:
- Key: Network
Value: Public
- Key: Name
Value: !Ref 'AWS::StackName'
PublicRoute:
Type: 'AWS::EC2::Route'
Properties:
RouteTableId: !Ref PublicRouteTable
DestinationCidrBlock: 0.0.0.0/0
GatewayId: !Ref InternetGateway
PublicSubnetRouteAssociation:
Type: 'AWS::EC2::SubnetRouteTableAssociation'
DependsOn: [ PublicRoute ]
Properties:
SubnetId: !Ref PublicSubnet
RouteTableId: !Ref PublicRouteTable
PrivateRouteTable:
Type: 'AWS::EC2::RouteTable'
Properties:
VpcId: !Ref Vpc
Tags:
- Key: Network
Value: Private
- Key: Name
Value: !Ref 'AWS::StackName'
PrivateRoute:
Type: 'AWS::EC2::Route'
DependsOn: [ NATGateway ]
Properties:
RouteTableId: !Ref PrivateRouteTable
DestinationCidrBlock: 0.0.0.0/0
NatGatewayId: !Ref NATGateway
PrivateSubnet1RouteAssociation:
Type: 'AWS::EC2::SubnetRouteTableAssociation'
DependsOn: [ PrivateRoute ]
Properties:
SubnetId: !Ref PrivateSubnet1
RouteTableId: !Ref PrivateRouteTable
PrivateSubnet2RouteAssociation:
Type: 'AWS::EC2::SubnetRouteTableAssociation'
DependsOn: [ PrivateRoute ]
Properties:
SubnetId: !Ref PrivateSubnet2
RouteTableId: !Ref PrivateRouteTable
PrivateSubnet3RouteAssociation:
Type: 'AWS::EC2::SubnetRouteTableAssociation'
DependsOn: [ PrivateRoute ]
Properties:
SubnetId: !Ref PrivateSubnet3
RouteTableId: !Ref PrivateRouteTable
VpcS3Endpoint:
Type: AWS::EC2::VPCEndpoint
Properties:
PolicyDocument:
Version: 2012-10-17
Statement:
-
Effect: "Allow"
Principal: "*"
Action:
- "s3:*"
Resource: "*"
RouteTableIds:
- !Ref PrivateRouteTable
ServiceName: !Sub com.amazonaws.${AWS::Region}.s3
VpcId: !Ref Vpc
FileSystem:
Type: 'AWS::EFS::FileSystem'
Condition: CreateNewFileSystem
DeletionPolicy: Retain
Properties:
PerformanceMode: generalPurpose
FileSystemTags:
- Key: Name
Value: !Ref 'AWS::StackName'
VpcSecurityGroup:
Type: 'AWS::EC2::SecurityGroup'
Properties:
GroupDescription: Security group
VpcId: !Ref Vpc
Tags:
- Key: Name
Value: !Ref 'AWS::StackName'
VpcSecurityGroupIngress:
Type: 'AWS::EC2::SecurityGroupIngress'
Properties:
GroupId: !GetAtt VpcSecurityGroup.GroupId
IpProtocol: tcp
FromPort: 0
ToPort: 65535
SourceSecurityGroupId: !GetAtt VpcSecurityGroup.GroupId
MountTarget1:
Type: 'AWS::EFS::MountTarget'
DependsOn: [ PrivateSubnet1RouteAssociation, VpcSecurityGroupIngress ]
Properties:
FileSystemId: !If
- CreateNewFileSystem
- !Ref FileSystem
- !Ref EFSFileSystemId
SubnetId: !Ref PrivateSubnet1
SecurityGroups:
- !Ref VpcSecurityGroup
MountTarget2:
Type: 'AWS::EFS::MountTarget'
DependsOn: [ PrivateSubnet2RouteAssociation, VpcSecurityGroupIngress ]
Properties:
FileSystemId: !If
- CreateNewFileSystem
- !Ref FileSystem
- !Ref EFSFileSystemId
SubnetId: !Ref PrivateSubnet2
SecurityGroups:
- !Ref VpcSecurityGroup
MountTarget3:
Type: 'AWS::EFS::MountTarget'
DependsOn: [ PrivateSubnet3RouteAssociation, VpcSecurityGroupIngress ]
Properties:
FileSystemId: !If
- CreateNewFileSystem
- !Ref FileSystem
- !Ref EFSFileSystemId
SubnetId: !Ref PrivateSubnet3
SecurityGroups:
- !Ref VpcSecurityGroup
GitHubRepoSecret:
Type: 'AWS::SecretsManager::Secret'
Condition: CreateGitHubRepoSecret
Properties:
Description: GitHub Sign In
SecretString:
Fn::Join:
- ''
- - '{'
- '"username":'
- !Ref GitHubUsername
- ','
- '"password":'
- !Ref GitHubToken
- '}'
Tags:
- Key: Name
Value: !Ref 'AWS::StackName'
CodeRepo:
Type: AWS::SageMaker::CodeRepository
Condition: CreateCodeRepo
Properties:
CodeRepositoryName: !Ref CodeRepoName
GitConfig:
Branch: !Ref GitHubRepoBranch
RepositoryUrl: !Ref GitHubRepoUrl
SecretArn: !If
- CreateGitHubRepoSecret
- !Ref GitHubRepoSecret
- !Ref 'AWS::NoValue'
NotebookStartConfig:
Type: AWS::SageMaker::NotebookInstanceLifecycleConfig
Properties:
OnStart:
- Content: !Base64
'Fn::Join':
- ''
- - '#!/bin/bash'
- |+
- set -e
- |+
- mkdir -p
- ' '
- !Ref EFSMountPoint
- |+
- sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2
- ' '
- !If
- CreateNewFileSystem
- !Ref FileSystem
- !Ref EFSFileSystemId
- .efs.
- !Ref 'AWS::Region'
- '.amazonaws.com:'
- !Ref EFSSharedDir
- ' '
- !Ref EFSMountPoint
NotebookInstance:
Type: "AWS::SageMaker::NotebookInstance"
DependsOn: [ MountTarget1, MountTarget2, MountTarget3, VpcS3Endpoint ]
Properties:
NotebookInstanceName: !Ref NotebookInstanceName
InstanceType: !Ref NotebookInstanceType
RoleArn: !GetAtt ExecutionRole.Arn
RootAccess: Enabled
SecurityGroupIds:
- !GetAtt VpcSecurityGroup.GroupId
SubnetId: !Ref PrivateSubnet1
DirectInternetAccess: Disabled
AdditionalCodeRepositories: !If
- CreateCodeRepo
- [!GetAtt CodeRepo.CodeRepositoryName]
- !Ref 'AWS::NoValue'
LifecycleConfigName: !GetAtt NotebookStartConfig.NotebookInstanceLifecycleConfigName
VolumeSizeInGB: !Ref EbsVolumeSize
Tags:
- Key: Name
Value: !Ref 'AWS::StackName'
ExecutionRole:
Type: "AWS::IAM::Role"
Properties:
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
-
Effect: "Allow"
Principal:
Service:
- "sagemaker.amazonaws.com"
Action:
- "sts:AssumeRole"
Path: "/"
Policies:
-
PolicyName: "sagemaker-policy"
PolicyDocument:
Version: "2012-10-17"
Statement:
-
Effect: "Allow"
Action:
- "ecr:GetAuthorizationToken"
- "ecr:BatchCheckLayerAvailability"
- "ecr:GetDownloadUrlForLayer"
- "ecr:GetRepositoryPolicy"
- "ecr:DescribeRepositories"
- "ecr:ListImages"
- "ecr:DescribeImages"
- "ecr:BatchGetImage"
- "ecr:InitiateLayerUpload"
- "ecr:UploadLayerPart"
- "ecr:CompleteLayerUpload"
- "ecr:PutImage"
- "ecr:CreateRepository"
- "fsx:*"
- "sagemaker:*"
- "cloudformation:*"
- "ec2:CreateNetworkInterface"
- "ec2:CreateNetworkInterfacePermission"
- "ec2:CreateVpcEndpoint"
- "ec2:DeleteNetworkInterface"
- "ec2:DeleteNetworkInterfacePermission"
- "ec2:DescribeDhcpOptions"
- "ec2:DescribeNetworkInterfaces"
- "ec2:DescribeRouteTables"
- "ec2:DescribeSecurityGroups"
- "ec2:DescribeSubnets"
- "ec2:DescribeVpcEndpoints"
- "ec2:DescribeVpcs"
- "logs:CreateLogGroup"
- "logs:CreateLogStream"
- "logs:DescribeLogStreams"
- "logs:GetLogEvents"
- "logs:PutLogEvents"
- "cloudwatch:DeleteAlarms"
- "cloudwatch:DescribeAlarms"
- "cloudwatch:GetMetricData"
- "cloudwatch:GetMetricStatistics"
- "cloudwatch:ListMetrics"
- "cloudwatch:PutMetricAlarm"
- "cloudwatch:PutMetricData"
- "iam:PassRole"
- "iam:ListRoles"
- "kms:DescribeKey"
- "kms:ListAliases"
- "elasticfilesystem:DescribeMountTargets"
Resource: "*"
-
Effect: "Allow"
Action:
- "iam:CreateServiceLinkedRole"
- "iam:AttachRolePolicy"
- "iam:PutRolePolicy"
Resource: "arn:aws:iam::*:role/aws-service-role/*"
-
Effect: "Allow"
Action:
- "s3:ListBucket*"
- "s3:GetBucket*"
Resource:
Fn::Join:
- ''
- - "arn:aws:s3:::"
- !Ref S3BucketName
-
Effect: "Allow"
Action:
- "s3:AbortMultipartUpload"
- "s3:ListMultipartUploadParts"
- "s3:PutObject*"
- "s3:DeleteObject*"
- "s3:GetObject*"
Resource:
-
Fn::Join:
- ''
- - "arn:aws:s3:::"
- !Ref S3BucketName
-
Fn::Join:
- ''
- - "arn:aws:s3:::"
- !Ref S3BucketName
- "/*"
Outputs:
SageMakerExecutionRoleArn:
Value: !GetAtt ExecutionRole.Arn
SecurityGroup:
Value:
Fn::Join:
- ''
- - '['
- ''''
- !GetAtt VpcSecurityGroup.GroupId
- ''''
- ']'
Subnets:
Value:
Fn::Join:
- ''
- - '['
- ''''
- !Ref PrivateSubnet1
- ''''
- ','
- ''''
- !Ref PrivateSubnet2
- ''''
- ','
- ''''
- !Ref PrivateSubnet3
- ''''
- ']'