infrastructure/slurm-accounting/accounting-cluster-template.yaml (288 lines of code) (raw):
---
AWSTemplateFormatVersion: '2010-09-09'
Description: >-
Template for creating the basic infrastructure required for AWS ParallelCluster with Slurm job accounting enabled
Metadata:
AWS::CloudFormation::Interface:
ParameterGroups:
- Label:
default: "Slurm Database Settings"
Parameters:
- EnvironmentName
- VpcId
- SlurmDbSubnets
- SlurmDbInstanceClass
- SlurmDbInstanceIdentifier
- SlurmDbAllocatedStorage
- SlurmDbBackupRetentionPeriod
- SlurmDbDeleteAutoBackup
- SlurmDbDeleteProtect
- SlurmDbPort
- SlurmDbEngine
- SlurmDbEngineVersion
- SlurmDbName
- SlurmDbSecret
- SlurmDbUsername
ParameterLabels:
EnvironmentName:
default: "Prefix added to resource names"
VpcId:
default: "VPC ID"
SlurmDbSubnets:
default: "Database subnet IDs"
SlurmDbInstanceClass:
default: "Database instance class"
SlurmDbInstanceIdentifier:
default: "Database instance name"
SlurmDbAllocatedStorage:
default: "Database instance storage"
SlurmDbBackupRetentionPeriod:
default: "Database backup retention period"
SlurmDbDeleteAutoBackup:
default: "Automatic backup deletion"
SlurmDbDeleteProtect:
default: "Database deletion protection"
SlurmDbPort:
default: "Database port"
SlurmDbEngine:
default: "Database engine"
SlurmDbEngineVersion:
default: "Database engine version"
SlurmDbName:
default: "Database name"
SlurmDbSecret:
default: "Database password secret"
SlurmDbUsername:
default: "Database username"
Parameters:
EnvironmentName:
Description: "20 characters max."
Type: String
AllowedPattern: ^[a-zA-Z0-9-_]{1,20}$
Default: slurm-accounting
VpcId:
Description: "Existing VPC where Slurm accounting database will be created."
Type: AWS::EC2::VPC::Id
SlurmDbSubnets:
Description: "List of existing subnets that will be used to create database subnet group."
Type: List<AWS::EC2::Subnet::Id>
ConstraintDescription: "Must be a list of at least two existing subnets in two different availability zones. They reside in the VPC specified above."
SlurmDbAllocatedStorage:
Description: "Specify size in GiB."
Type: Number
Default: 50
SlurmDbBackupRetentionPeriod:
Description: "Number of days for which automated backups are kept."
Type: Number
MinValue: "0"
MaxValue: "35"
ConstraintDescription: "Must be a value between 0 and 35."
Default: 1
SlurmDbDeleteAutoBackup:
Description: "Indicates whether automated backups are deleted after the DB instance is deleted (Manual backups are retained)."
Type: String
AllowedValues:
- 'true'
- 'false'
Default: 'true'
SlurmDbDeleteProtect:
Description: "If enabled, database instance can't be deleted unless protection is disabled."
Type: String
AllowedValues:
- 'true'
- 'false'
Default: 'false'
SlurmDbEngine:
Type: String
AllowedValues:
- mariadb
- mysql
Default: mariadb
SlurmDbEngineVersion:
Description: "MySQL version 8 or MariaDB version 10."
Type: String
AllowedValues:
- 8.0.11
- 8.0.13
- 8.0.15
- 8.0.16
- 8.0.17
- 8.0.19
- 8.0.20
- 8.0.21
- 8.0.23
- 8.0.25
- 8.0.26
- 8.0.27
- 8.0.28
- 10.2.32
- 10.2.37
- 10.2.39
- 10.2.40
- 10.2.41
- 10.3.23
- 10.3.28
- 10.3.31
- 10.3.32
- 10.4.13
- 10.4.18
- 10.4.21
- 10.4.22
- 10.5.12
- 10.5.13
- 10.6.5
Default: 10.4.22
SlurmDbInstanceClass:
Description: "Standard, memory optimized, and burstable instance classes."
Type: String
AllowedValues:
- db.t3.micro
- db.t3.small
- db.t3.medium
- db.t3.large
- db.t3.xlarge
- db.t3.2xlarge
- db.t4g.micro
- db.t4g.small
- db.t4g.medium
- db.t4g.large
- db.t4g.xlarge
- db.t4g.2xlarge
- db.m5.large
- db.m5.xlarge
- db.m5.2xlarge
- db.m5.4xlarge
- db.m5.8xlarge
- db.m5.12xlarge
- db.m5.16xlarge
- db.m5.24xlarge
- db.m6g.large
- db.m6g.xlarge
- db.m6g.2xlarge
- db.m6g.4xlarge
- db.m6g.8xlarge
- db.m6g.12xlarge
- db.m6g.16xlarge
- db.r5.large
- db.r5.xlarge
- db.r5.2xlarge
- db.r5.4xlarge
- db.r5.8xlarge
- db.r5.12xlarge
- db.r5.16xlarge
- db.r5.24xlarge
- db.r6g.large
- db.r6g.xlarge
- db.r6g.2xlarge
- db.r6g.4xlarge
- db.r6g.8xlarge
- db.r6g.12xlarge
- db.r6g.16xlarge
- db.x2g.large
- db.x2g.xlarge
- db.x2g.2xlarge
- db.x2g.4xlarge
- db.x2g.8xlarge
- db.x2g.12xlarge
- db.x2g.16xlarge
Default: db.t4g.medium
SlurmDbInstanceIdentifier:
Description: "Name will be converted to lowercase."
Type: String
Default: slurmdb-rds-instance
SlurmDbSecret:
Description: >-
Name of a secret in AWS Secrets Manager storing the Slurm database username and password.
Must be in the key-value form 'password:<value>'.
Leave as AUTO to automatically generate a new secret.
Type: String
Default: AUTO
SlurmDbPort:
Description: "Port on which the database accepts connections."
Type: Number
Default: 3306
SlurmDbName:
Description: "Name of database to create when DB instance is created."
Type: String
Default: slurmdb
SlurmDbUsername:
Description: "Administrator user name for DB instance."
Type: String
Default: admin
Conditions:
CreateSlurmDbPasswordSecret: !Equals [!Ref SlurmDbSecret, AUTO]
Resources:
SlurmDbPasswordSecret:
Type: AWS::SecretsManager::Secret
Condition: CreateSlurmDbPasswordSecret
Properties:
Name: !Sub DbPasswdSecret-${AWS::StackName}
Description: Slurm database password secret
GenerateSecretString:
ExcludeCharacters: "\"@/\\#|"
GenerateStringKey: password
PasswordLength: 15
SecretStringTemplate: !Sub '{"username": "${SlurmDbUsername}"}'
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-db-secret
- Key: StackName
Value: !Ref AWS::StackName
SlurmDbSubnetGroup:
Type: AWS::RDS::DBSubnetGroup
Properties:
DBSubnetGroupDescription: Database Subnet Group
DBSubnetGroupName: !Sub ${EnvironmentName}-db-subnetgroup
SubnetIds: !Ref SlurmDbSubnets
SlurmDbSecurityGroupIngress:
Type: AWS::EC2::SecurityGroupIngress
Properties:
FromPort: !Ref SlurmDbPort
GroupId: !GetAtt SlurmDbSecurityGroup.GroupId
IpProtocol: tcp
SourceSecurityGroupId: !GetAtt SlurmDbSecurityGroup.GroupId
ToPort: !Ref SlurmDbPort
SlurmDbSecurityGroup:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Database access security group
VpcId: !Ref VpcId
Tags:
- Key: Name
Value: !Sub ${EnvironmentName}-sg
- Key: StackName
Value: !Ref AWS::StackName
SlurmDbInstance:
Type: AWS::RDS::DBInstance
Properties:
AllocatedStorage: !Ref SlurmDbAllocatedStorage
BackupRetentionPeriod: !Ref SlurmDbBackupRetentionPeriod
DBInstanceClass: !Ref SlurmDbInstanceClass
DBInstanceIdentifier: !Ref SlurmDbInstanceIdentifier
DBName: !Ref SlurmDbName
DBSubnetGroupName: !Ref SlurmDbSubnetGroup
DeleteAutomatedBackups: !Ref SlurmDbDeleteAutoBackup
DeletionProtection: !Ref SlurmDbDeleteProtect
Engine: !Ref SlurmDbEngine
EngineVersion: !Ref SlurmDbEngineVersion
MasterUserPassword: !Join
- ""
- - "{{resolve:secretsmanager:"
- !If [CreateSlurmDbPasswordSecret, !Ref SlurmDbPasswordSecret, !Ref SlurmDbSecret]
- ":SecretString:password}}"
MasterUsername: !Ref SlurmDbUsername
MultiAZ: false
PubliclyAccessible: false
VPCSecurityGroups:
- Ref: SlurmDbSecurityGroup
Outputs:
SlurmDbEndpoint:
Description: "RDS Endpoint: Database endpoint address."
Value: !GetAtt SlurmDbInstance.Endpoint.Address
SlurmDbPasswordSecretArn:
Description: "Secret ARN: Database username and password secret ARN."
Condition: CreateSlurmDbPasswordSecret
Value: !Ref SlurmDbPasswordSecret
SlurmDbSecurityGroupId:
Description: The security group ID for the RDS instance
Value: !Ref SlurmDbSecurityGroup