packages/aws-cdk-lib/aws-batch/lib/managed-compute-environment.ts (464 lines of code) (raw):
import { Construct } from 'constructs';
import { CfnComputeEnvironment } from './batch.generated';
import { IComputeEnvironment, ComputeEnvironmentBase, ComputeEnvironmentProps } from './compute-environment-base';
import * as ec2 from '../../aws-ec2';
import * as eks from '../../aws-eks';
import * as iam from '../../aws-iam';
import { IRole } from '../../aws-iam';
import { ArnFormat, Duration, ITaggable, Lazy, Resource, Stack, TagManager, TagType, Token, ValidationError } from '../../core';
import { addConstructMetadata, MethodMetadata } from '../../core/lib/metadata-resource';
/**
* Represents a Managed ComputeEnvironment. Batch will provision EC2 Instances to
* meet the requirements of the jobs executing in this ComputeEnvironment.
*/
export interface IManagedComputeEnvironment extends IComputeEnvironment, ec2.IConnectable, ITaggable {
/**
* The maximum vCpus this `ManagedComputeEnvironment` can scale up to.
*
* *Note*: if this Compute Environment uses EC2 resources (not Fargate) with either `AllocationStrategy.BEST_FIT_PROGRESSIVE` or
* `AllocationStrategy.SPOT_CAPACITY_OPTIMIZED`, or `AllocationStrategy.BEST_FIT` with Spot instances,
* The scheduler may exceed this number by at most one of the instances specified in `instanceTypes`
* or `instanceClasses`.
*/
readonly maxvCpus: number;
/**
* Specifies whether this Compute Environment is replaced if an update is made that requires
* replacing its instances. To enable more properties to be updated,
* set this property to `false`. When changing the value of this property to false,
* do not change any other properties at the same time.
* If other properties are changed at the same time,
* and the change needs to be rolled back but it can't,
* it's possible for the stack to go into the UPDATE_ROLLBACK_FAILED state.
* You can't update a stack that is in the UPDATE_ROLLBACK_FAILED state.
* However, if you can continue to roll it back,
* you can return the stack to its original settings and then try to update it again.
*
* The properties which require a replacement of the Compute Environment are:
*
* @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-batch-computeenvironment.html#cfn-batch-computeenvironment-replacecomputeenvironment
* @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-updating-stacks-continueupdaterollback.html
*/
readonly replaceComputeEnvironment?: boolean;
/**
* Whether or not to use spot instances.
* Spot instances are less expensive EC2 instances that can be
* reclaimed by EC2 at any time; your job will be given two minutes
* of notice before reclamation.
*
* @default false
*/
readonly spot?: boolean;
/**
* Only meaningful if `terminateOnUpdate` is `false`. If so,
* when an infrastructure update is triggered, any running jobs
* will be allowed to run until `updateTimeout` has expired.
*
* @see https://docs.aws.amazon.com/batch/latest/userguide/updating-compute-environments.html
* @default 30 minutes
*/
readonly updateTimeout?: Duration;
/**
* Whether or not any running jobs will be immediately terminated when an infrastructure update
* occurs. If this is enabled, any terminated jobs may be retried, depending on the job's
* retry policy.
*
* @see https://docs.aws.amazon.com/batch/latest/userguide/updating-compute-environments.html
*
* @default false
*/
readonly terminateOnUpdate?: boolean;
/**
* The security groups this Compute Environment will launch instances in.
*/
readonly securityGroups: ec2.ISecurityGroup[];
/**
* The VPC Subnets this Compute Environment will launch instances in.
*/
readonly vpcSubnets?: ec2.SubnetSelection;
/**
* Whether or not the AMI is updated to the latest one supported by Batch
* when an infrastructure update occurs.
*
* If you specify a specific AMI, this property will be ignored.
*
* Note: the CDK will never set this value by default, `false` will set by CFN.
* This is to avoid a deployment failure that occurs when this value is set.
*
* @see https://github.com/aws/aws-cdk/issues/27054
*
* @default false
*/
readonly updateToLatestImageVersion?: boolean;
}
/**
* Props for a ManagedComputeEnvironment
*/
export interface ManagedComputeEnvironmentProps extends ComputeEnvironmentProps {
/**
* The maximum vCpus this `ManagedComputeEnvironment` can scale up to.
* Each vCPU is equivalent to 1024 CPU shares.
*
* *Note*: if this Compute Environment uses EC2 resources (not Fargate) with either `AllocationStrategy.BEST_FIT_PROGRESSIVE` or
* `AllocationStrategy.SPOT_CAPACITY_OPTIMIZED`, or `AllocationStrategy.BEST_FIT` with Spot instances,
* The scheduler may exceed this number by at most one of the instances specified in `instanceTypes`
* or `instanceClasses`.
*
* @default 256
*/
readonly maxvCpus?: number;
/**
* Specifies whether this Compute Environment is replaced if an update is made that requires
* replacing its instances. To enable more properties to be updated,
* set this property to `false`. When changing the value of this property to false,
* do not change any other properties at the same time.
* If other properties are changed at the same time,
* and the change needs to be rolled back but it can't,
* it's possible for the stack to go into the UPDATE_ROLLBACK_FAILED state.
* You can't update a stack that is in the UPDATE_ROLLBACK_FAILED state.
* However, if you can continue to roll it back,
* you can return the stack to its original settings and then try to update it again.
*
* The properties which require a replacement of the Compute Environment are:
*
* @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-batch-computeenvironment.html#cfn-batch-computeenvironment-replacecomputeenvironment
* @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-updating-stacks-continueupdaterollback.html
*
* @default false
*/
readonly replaceComputeEnvironment?: boolean;
/**
* Whether or not to use spot instances.
* Spot instances are less expensive EC2 instances that can be
* reclaimed by EC2 at any time; your job will be given two minutes
* of notice before reclamation.
*
* @default false
*/
readonly spot?: boolean;
/**
* Only meaningful if `terminateOnUpdate` is `false`. If so,
* when an infrastructure update is triggered, any running jobs
* will be allowed to run until `updateTimeout` has expired.
*
* @see https://docs.aws.amazon.com/batch/latest/userguide/updating-compute-environments.html
*
* @default 30 minutes
*/
readonly updateTimeout?: Duration;
/**
* Whether or not any running jobs will be immediately terminated when an infrastructure update
* occurs. If this is enabled, any terminated jobs may be retried, depending on the job's
* retry policy.
*
* @see https://docs.aws.amazon.com/batch/latest/userguide/updating-compute-environments.html
*
* @default false
*/
readonly terminateOnUpdate?: boolean;
/**
* VPC in which this Compute Environment will launch Instances
*/
readonly vpc: ec2.IVpc;
/**
* The security groups this Compute Environment will launch instances in.
*
* @default new security groups will be created
*/
readonly securityGroups?: ec2.ISecurityGroup[];
/**
* The VPC Subnets this Compute Environment will launch instances in.
*
* @default new subnets will be created
*/
readonly vpcSubnets?: ec2.SubnetSelection;
/**
* Whether or not the AMI is updated to the latest one supported by Batch
* when an infrastructure update occurs.
*
* If you specify a specific AMI, this property will be ignored.
*
* Note: the CDK will never set this value by default, `false` will set by CFN.
* This is to avoid a deployment failure that occurs when this value is set.
*
* @see https://github.com/aws/aws-cdk/issues/27054
*
* @default false
*/
readonly updateToLatestImageVersion?: boolean;
}
/**
* Abstract base class for ManagedComputeEnvironments
* @internal
*/
export abstract class ManagedComputeEnvironmentBase extends ComputeEnvironmentBase implements IManagedComputeEnvironment {
public readonly maxvCpus: number;
public readonly replaceComputeEnvironment?: boolean;
public readonly spot?: boolean;
public readonly updateTimeout?: Duration;
public readonly terminateOnUpdate?: boolean;
public readonly securityGroups: ec2.ISecurityGroup[];
public readonly updateToLatestImageVersion?: boolean;
public readonly tags: TagManager = new TagManager(TagType.MAP, 'AWS::Batch::ComputeEnvironment');
public readonly connections: ec2.Connections;
constructor(scope: Construct, id: string, props: ManagedComputeEnvironmentProps) {
super(scope, id, props);
this.maxvCpus = props.maxvCpus ?? DEFAULT_MAX_VCPUS;
this.replaceComputeEnvironment = props.replaceComputeEnvironment ?? false;
this.spot = props.spot;
this.updateTimeout = props.updateTimeout;
this.terminateOnUpdate = props.terminateOnUpdate;
this.updateToLatestImageVersion = props.updateToLatestImageVersion;
this.securityGroups = props.securityGroups ?? [
new ec2.SecurityGroup(this, 'SecurityGroup', {
vpc: props.vpc,
}),
];
this.connections = new ec2.Connections({
securityGroups: this.securityGroups,
});
}
}
/**
* A ManagedComputeEnvironment that uses ECS orchestration on EC2 instances.
*/
export interface IManagedEc2EcsComputeEnvironment extends IManagedComputeEnvironment {
/**
* Configure which AMIs this Compute Environment can launch.
*
* Leave this `undefined` to allow Batch to choose the latest AMIs it supports for each instance that it launches.
*
* @default
* - ECS_AL2 compatible AMI ids for non-GPU instances, ECS_AL2_NVIDIA compatible AMI ids for GPU instances
*/
readonly images?: EcsMachineImage[];
/**
* The allocation strategy to use if not enough instances of
* the best fitting instance type can be allocated.
*
* @default - `BEST_FIT_PROGRESSIVE` if not using Spot instances,
* `SPOT_CAPACITY_OPTIMIZED` if using Spot instances.
*/
readonly allocationStrategy?: AllocationStrategy;
/**
* The maximum percentage that a Spot Instance price can be when compared with the
* On-Demand price for that instance type before instances are launched.
* For example, if your maximum percentage is 20%, the Spot price must be
* less than 20% of the current On-Demand price for that Instance.
* You always pay the lowest market price and never more than your maximum percentage.
* For most use cases, Batch recommends leaving this field empty.
*
* @default - 100%
*/
readonly spotBidPercentage?: number;
/**
* The service-linked role that Spot Fleet needs to launch instances on your behalf.
*
* @see https://docs.aws.amazon.com/batch/latest/userguide/spot_fleet_IAM_role.html
*
* @default - a new Role will be created
*/
readonly spotFleetRole?: iam.IRole;
/**
* The instance types that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
*/
readonly instanceTypes: ec2.InstanceType[];
/**
* The instance classes that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
* Batch will automatically choose the size.
*/
readonly instanceClasses: ec2.InstanceClass[];
/**
* Whether or not to use batch's optimal instance type.
* The optimal instance type is equivalent to adding the
* C4, M4, and R4 instance classes. You can specify other instance classes
* (of the same architecture) in addition to the optimal instance classes.
*
* @default true
*/
readonly useOptimalInstanceClasses?: boolean;
/**
* The execution Role that instances launched by this Compute Environment will use.
*
* @default - a role will be created
*/
readonly instanceRole?: iam.IRole;
/**
* The Launch Template that this Compute Environment
* will use to provision EC2 Instances.
*
* *Note*: if `securityGroups` is specified on both your
* launch template and this Compute Environment, **the
* `securityGroup`s on the Compute Environment override the
* ones on the launch template.
*
* @default no launch template
*/
readonly launchTemplate?: ec2.ILaunchTemplate;
/**
* The minimum vCPUs that an environment should maintain,
* even if the compute environment is DISABLED.
*
* @default 0
*/
readonly minvCpus?: number;
/**
* The EC2 placement group to associate with your compute resources.
* If you intend to submit multi-node parallel jobs to this Compute Environment,
* you should consider creating a cluster placement group and associate it with your compute resources.
* This keeps your multi-node parallel job on a logical grouping of instances
* within a single Availability Zone with high network flow potential.
*
* @see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
*
* @default - no placement group
*/
readonly placementGroup?: ec2.IPlacementGroup;
/**
* Add an instance type to this compute environment
*/
addInstanceType(instanceType: ec2.InstanceType): void;
/**
* Add an instance class to this compute environment
*/
addInstanceClass(instanceClass: ec2.InstanceClass): void;
}
/**
* Base interface for containing all information needed to
* configure a MachineImage in Batch
*/
interface MachineImage {
/**
* The machine image to use
*
* @default - chosen by batch
*/
readonly image?: ec2.IMachineImage;
}
/**
* A Batch MachineImage that is compatible with ECS
*/
export interface EcsMachineImage extends MachineImage {
/**
* Tells Batch which instance type to launch this image on
*
* @default - 'ECS_AL2' for non-gpu instances, 'ECS_AL2_NVIDIA' for gpu instances
*/
readonly imageType?: EcsMachineImageType;
}
/**
* A Batch MachineImage that is compatible with EKS
*/
export interface EksMachineImage extends MachineImage{
/**
* Tells Batch which instance type to launch this image on
*
* @default - 'EKS_AL2' for non-gpu instances, 'EKS_AL2_NVIDIA' for gpu instances
*/
readonly imageType?: EksMachineImageType;
}
/**
* Maps the image to instance types
*/
export enum EcsMachineImageType {
/**
* Tells Batch that this machine image runs on non-GPU AL2 instances
*/
ECS_AL2 = 'ECS_AL2',
/**
* Tells Batch that this machine image runs on non-GPU AL2023 instances.
* Amazon Linux 2023 does not support A1 instances.
*/
ECS_AL2023 = 'ECS_AL2023',
/**
* Tells Batch that this machine image runs on GPU instances
*/
ECS_AL2_NVIDIA = 'ECS_AL2_NVIDIA',
}
/**
* Maps the image to instance types
*/
export enum EksMachineImageType {
/**
* Tells Batch that this machine image runs on non-GPU instances
*/
EKS_AL2 = 'EKS_AL2',
/**
* Tells Batch that this machine image runs on GPU instances
*/
EKS_AL2_NVIDIA = 'EKS_AL2_NVIDIA',
}
/**
* Determines how this compute environment chooses instances to spawn
*
* @see https://aws.amazon.com/blogs/compute/optimizing-for-cost-availability-and-throughput-by-selecting-your-aws-batch-allocation-strategy/
*/
export enum AllocationStrategy {
/**
* Batch chooses the lowest-cost instance type that fits all the jobs in the queue.
* If instances of that type are not available, the queue will not choose a new type;
* instead, it will wait for the instance to become available.
* This can stall your `Queue`, with your compute environment only using part of its max capacity
* (or none at all) until the `BEST_FIT` instance becomes available.
* This allocation strategy keeps costs lower but can limit scaling.
* `BEST_FIT` isn't supported when updating compute environments
*/
BEST_FIT = 'BEST_FIT',
/**
* This is the default Allocation Strategy if `spot` is `false` or unspecified.
* This strategy will examine the Jobs in the queue and choose whichever instance type meets the requirements
* of the jobs in the queue and with the lowest cost per vCPU, just as `BEST_FIT`.
* However, if not all of the capacity can be filled with this instance type,
* it will choose a new next-best instance type to run any jobs that couldn’t fit into the `BEST_FIT` capacity.
* To make the most use of this allocation strategy,
* it is recommended to use as many instance classes as is feasible for your workload.
*/
BEST_FIT_PROGRESSIVE = 'BEST_FIT_PROGRESSIVE',
/**
* If your workflow tolerates interruptions, you should enable `spot` on your `ComputeEnvironment`
* and use `SPOT_CAPACITY_OPTIMIZED` (this is the default if `spot` is enabled).
* This will tell Batch to choose the instance types from the ones you’ve specified that have
* the most spot capacity available to minimize the chance of interruption.
* To get the most benefit from your spot instances,
* you should allow Batch to choose from as many different instance types as possible.
*/
SPOT_CAPACITY_OPTIMIZED = 'SPOT_CAPACITY_OPTIMIZED',
/**
* The price and capacity optimized allocation strategy looks at both price and capacity
* to select the Spot Instance pools that are the least likely to be interrupted
* and have the lowest possible price.
*
* The Batch team recommends this over `SPOT_CAPACITY_OPTIMIZED` in most instances.
*/
SPOT_PRICE_CAPACITY_OPTIMIZED = 'SPOT_PRICE_CAPACITY_OPTIMIZED',
}
/**
* Props for a ManagedEc2EcsComputeEnvironment
*/
export interface ManagedEc2EcsComputeEnvironmentProps extends ManagedComputeEnvironmentProps {
/**
* Whether or not to use batch's optimal instance type.
* The optimal instance type is equivalent to adding the
* C4, M4, and R4 instance classes. You can specify other instance classes
* (of the same architecture) in addition to the optimal instance classes.
*
* @default true
*/
readonly useOptimalInstanceClasses?: boolean;
/**
* Configure which AMIs this Compute Environment can launch.
* If you specify this property with only `image` specified, then the
* `imageType` will default to `ECS_AL2`. *If your image needs GPU resources,
* specify `ECS_AL2_NVIDIA`; otherwise, the instances will not be able to properly
* join the ComputeEnvironment*.
*
* @default
* - ECS_AL2 for non-GPU instances, ECS_AL2_NVIDIA for GPU instances
*/
readonly images?: EcsMachineImage[];
/**
* The allocation strategy to use if not enough instances of
* the best fitting instance type can be allocated.
*
* @default - `BEST_FIT_PROGRESSIVE` if not using Spot instances,
* `SPOT_CAPACITY_OPTIMIZED` if using Spot instances.
*/
readonly allocationStrategy?: AllocationStrategy;
/**
* The maximum percentage that a Spot Instance price can be when compared with the
* On-Demand price for that instance type before instances are launched.
* For example, if your maximum percentage is 20%, the Spot price must be
* less than 20% of the current On-Demand price for that Instance.
* You always pay the lowest market price and never more than your maximum percentage.
* For most use cases, Batch recommends leaving this field empty.
*
* Implies `spot == true` if set
*
* @default 100%
*/
readonly spotBidPercentage?: number;
/**
* The service-linked role that Spot Fleet needs to launch instances on your behalf.
*
* @see https://docs.aws.amazon.com/batch/latest/userguide/spot_fleet_IAM_role.html
*
* @default - a new role will be created
*/
readonly spotFleetRole?: iam.IRole;
/**
* The instance types that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
*
* @default - the instances Batch considers will be used (currently C4, M4, and R4)
*/
readonly instanceTypes?: ec2.InstanceType[];
/**
* The instance classes that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
* Batch will automatically choose the instance size.
*
* @default - the instances Batch considers will be used (currently C4, M4, and R4)
*/
readonly instanceClasses?: ec2.InstanceClass[];
/**
* The execution Role that instances launched by this Compute Environment will use.
*
* @default - a role will be created
*/
readonly instanceRole?: iam.IRole;
/**
* The Launch Template that this Compute Environment
* will use to provision EC2 Instances.
*
* *Note*: if `securityGroups` is specified on both your
* launch template and this Compute Environment, **the
* `securityGroup`s on the Compute Environment override the
* ones on the launch template.
*
* @default no launch template
*/
readonly launchTemplate?: ec2.ILaunchTemplate;
/**
* The minimum vCPUs that an environment should maintain,
* even if the compute environment is DISABLED.
*
* @default 0
*/
readonly minvCpus?: number;
/**
* The EC2 placement group to associate with your compute resources.
* If you intend to submit multi-node parallel jobs to this Compute Environment,
* you should consider creating a cluster placement group and associate it with your compute resources.
* This keeps your multi-node parallel job on a logical grouping of instances
* within a single Availability Zone with high network flow potential.
*
* @see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
*
* @default - no placement group
*/
readonly placementGroup?: ec2.IPlacementGroup;
}
/**
* A ManagedComputeEnvironment that uses ECS orchestration on EC2 instances.
*
* @resource AWS::Batch::ComputeEnvironment
*/
export class ManagedEc2EcsComputeEnvironment extends ManagedComputeEnvironmentBase implements IManagedEc2EcsComputeEnvironment {
/**
* refer to an existing ComputeEnvironment by its arn.
*/
public static fromManagedEc2EcsComputeEnvironmentArn(
scope: Construct, id: string, managedEc2EcsComputeEnvironmentArn: string,
): IManagedEc2EcsComputeEnvironment {
const stack = Stack.of(scope);
const computeEnvironmentName = stack.splitArn(managedEc2EcsComputeEnvironmentArn, ArnFormat.SLASH_RESOURCE_NAME).resourceName!;
class Import extends Resource implements IManagedEc2EcsComputeEnvironment {
public readonly computeEnvironmentArn = managedEc2EcsComputeEnvironmentArn;
public readonly computeEnvironmentName = computeEnvironmentName;
public readonly enabled = true;
public readonly instanceClasses = [];
public readonly instanceTypes = [];
public readonly maxvCpus = 1;
public readonly connections = { } as any;
public readonly securityGroups = [];
public readonly tags: TagManager = new TagManager(TagType.MAP, 'AWS::Batch::ComputeEnvironment');
public addInstanceClass(_instanceClass: ec2.InstanceClass): void {
throw new ValidationError(`cannot add instance class to imported ComputeEnvironment '${id}'`, this);
}
public addInstanceType(_instanceType: ec2.InstanceType): void {
throw new ValidationError(`cannot add instance type to imported ComputeEnvironment '${id}'`, this);
}
}
return new Import(scope, id);
}
public readonly computeEnvironmentArn: string;
public readonly computeEnvironmentName: string;
public readonly images?: EcsMachineImage[];
public readonly allocationStrategy?: AllocationStrategy;
public readonly spotBidPercentage?: number;
public readonly spotFleetRole?: iam.IRole;
public readonly instanceTypes: ec2.InstanceType[];
public readonly instanceClasses: ec2.InstanceClass[];
public readonly instanceRole?: iam.IRole;
public readonly launchTemplate?: ec2.ILaunchTemplate;
public readonly minvCpus?: number;
public readonly placementGroup?: ec2.IPlacementGroup;
private readonly instanceProfile: iam.CfnInstanceProfile;
constructor(scope: Construct, id: string, props: ManagedEc2EcsComputeEnvironmentProps) {
super(scope, id, props);
// Enhanced CDK Analytics Telemetry
addConstructMetadata(this, props);
this.images = props.images;
this.allocationStrategy = determineAllocationStrategy(this, props.allocationStrategy, this.spot);
this.spotBidPercentage = props.spotBidPercentage;
this.spotFleetRole = props.spotFleetRole ?? (
this.spot && this.allocationStrategy === AllocationStrategy.BEST_FIT
? createSpotFleetRole(this)
: undefined
);
this.instanceTypes = props.instanceTypes ?? [];
this.instanceClasses = props.instanceClasses ?? [];
if (this.images?.find(image => image.imageType === EcsMachineImageType.ECS_AL2023) &&
(this.instanceClasses.includes(ec2.InstanceClass.A1) ||
this.instanceTypes.find(instanceType => instanceType.sameInstanceClassAs(ec2.InstanceType.of(ec2.InstanceClass.A1, ec2.InstanceSize.LARGE))))
) {
throw new ValidationError('Amazon Linux 2023 does not support A1 instances.', this);
}
const { instanceRole, instanceProfile } = createInstanceRoleAndProfile(this, props.instanceRole);
this.instanceRole = instanceRole;
this.instanceProfile = instanceProfile;
this.launchTemplate = props.launchTemplate;
this.minvCpus = props.minvCpus ?? DEFAULT_MIN_VCPUS;
this.placementGroup = props.placementGroup;
validateVCpus(this, this.minvCpus, this.maxvCpus);
validateSpotConfig(this, this.spot, this.spotBidPercentage, this.spotFleetRole);
const { subnetIds } = props.vpc.selectSubnets(props.vpcSubnets);
const resource = new CfnComputeEnvironment(this, 'Resource', {
...baseManagedResourceProperties(this, subnetIds),
computeEnvironmentName: props.computeEnvironmentName,
computeResources: {
...baseManagedResourceProperties(this, subnetIds).computeResources as CfnComputeEnvironment.ComputeResourcesProperty,
minvCpus: this.minvCpus,
instanceRole: this.instanceProfile.attrArn, // this is not a typo; this property actually takes a profile, not a standard role
instanceTypes: Lazy.list({
produce: () => renderInstances(this.instanceTypes, this.instanceClasses, props.useOptimalInstanceClasses),
}),
type: this.spot ? 'SPOT' : 'EC2',
spotIamFleetRole: this.spotFleetRole?.roleArn,
allocationStrategy: this.allocationStrategy,
bidPercentage: this.spotBidPercentage,
launchTemplate: this.launchTemplate ? {
launchTemplateId: this.launchTemplate?.launchTemplateId,
} : undefined,
ec2Configuration: this.images?.map((image) => {
return {
imageIdOverride: image.image?.getImage(this).imageId,
imageType: image.imageType ?? EcsMachineImageType.ECS_AL2,
};
}),
placementGroup: this.placementGroup?.placementGroupName,
tags: this.tags.renderedTags as any,
},
});
this.computeEnvironmentName = this.getResourceNameAttribute(resource.ref);
this.computeEnvironmentArn = this.getResourceArnAttribute(resource.attrComputeEnvironmentArn, {
service: 'batch',
resource: 'compute-environment',
resourceName: this.physicalName,
});
this.node.addValidation({ validate: () => validateInstances(this.instanceTypes, this.instanceClasses, props.useOptimalInstanceClasses) });
}
@MethodMetadata()
public addInstanceType(instanceType: ec2.InstanceType): void {
this.instanceTypes.push(instanceType);
}
@MethodMetadata()
public addInstanceClass(instanceClass: ec2.InstanceClass): void {
this.instanceClasses.push(instanceClass);
}
}
/**
* A ManagedComputeEnvironment that uses EKS orchestration on EC2 instances.
*/
interface IManagedEc2EksComputeEnvironment extends IManagedComputeEnvironment {
/**
* The namespace of the Cluster
*
* Cannot be 'default', start with 'kube-', or be longer than 64 characters.
*
* @see https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
*/
readonly kubernetesNamespace?: string;
/**
* The cluster that backs this Compute Environment. Required
* for Compute Environments running Kubernetes jobs.
*
* Please ensure that you have followed the steps at
*
* https://docs.aws.amazon.com/batch/latest/userguide/getting-started-eks.html
*
* before attempting to deploy a `ManagedEc2EksComputeEnvironment` that uses this cluster.
* If you do not follow the steps in the link, the deployment fail with a message that the
* compute environment did not stabilize.
*/
readonly eksCluster: eks.ICluster;
/**
* Configure which AMIs this Compute Environment can launch.
*
* @default
* EKS_AL2 for non-GPU instances, EKS_AL2_NVIDIA for GPU instances,
*/
readonly images?: EksMachineImage[];
/**
* The allocation strategy to use if not enough instances of
* the best fitting instance type can be allocated.
*
* @default - `BEST_FIT_PROGRESSIVE` if not using Spot instances,
* `SPOT_CAPACITY_OPTIMIZED` if using Spot instances.
*/
readonly allocationStrategy?: AllocationStrategy;
/**
* The maximum percentage that a Spot Instance price can be when compared with the
* On-Demand price for that instance type before instances are launched.
* For example, if your maximum percentage is 20%, the Spot price must be
* less than 20% of the current On-Demand price for that Instance.
* You always pay the lowest market price and never more than your maximum percentage.
* For most use cases, Batch recommends leaving this field empty.
*
* Implies `spot == true` if set
*
* @default - 100%
*/
readonly spotBidPercentage?: number;
/**
* The instance types that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
*/
readonly instanceTypes: ec2.InstanceType[];
/**
* The instance types that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
*/
readonly instanceClasses: ec2.InstanceClass[];
/**
* The execution Role that instances launched by this Compute Environment will use.
*
* @default - a role will be created
*/
readonly instanceRole?: iam.IRole;
/**
* The Launch Template that this Compute Environment
* will use to provision EC2 Instances.
*
* *Note*: if `securityGroups` is specified on both your
* launch template and this Compute Environment, **the
* `securityGroup`s on the Compute Environment override the
* ones on the launch template.
*
* @default - no launch template
*/
readonly launchTemplate?: ec2.ILaunchTemplate;
/**
* The minimum vCPUs that an environment should maintain,
* even if the compute environment is DISABLED.
*
* @default 0
*/
readonly minvCpus?: number;
/**
* The EC2 placement group to associate with your compute resources.
* If you intend to submit multi-node parallel jobs to this Compute Environment,
* you should consider creating a cluster placement group and associate it with your compute resources.
* This keeps your multi-node parallel job on a logical grouping of instances
* within a single Availability Zone with high network flow potential.
*
* @see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
*
* @default - no placement group
*/
readonly placementGroup?: ec2.IPlacementGroup;
/**
* Add an instance type to this compute environment
*/
addInstanceType(instanceType: ec2.InstanceType): void;
/**
* Add an instance class to this compute environment
*/
addInstanceClass(instanceClass: ec2.InstanceClass): void;
}
/**
* Props for a ManagedEc2EksComputeEnvironment
*/
export interface ManagedEc2EksComputeEnvironmentProps extends ManagedComputeEnvironmentProps {
/**
* The namespace of the Cluster
*/
readonly kubernetesNamespace: string;
/**
* The cluster that backs this Compute Environment. Required
* for Compute Environments running Kubernetes jobs.
*
* Please ensure that you have followed the steps at
*
* https://docs.aws.amazon.com/batch/latest/userguide/getting-started-eks.html
*
* before attempting to deploy a `ManagedEc2EksComputeEnvironment` that uses this cluster.
* If you do not follow the steps in the link, the deployment fail with a message that the
* compute environment did not stabilize.
*/
readonly eksCluster: eks.ICluster;
/**
* Whether or not to use batch's optimal instance type.
* The optimal instance type is equivalent to adding the
* C4, M4, and R4 instance classes. You can specify other instance classes
* (of the same architecture) in addition to the optimal instance classes.
*
* @default true
*/
readonly useOptimalInstanceClasses?: boolean;
/**
* Configure which AMIs this Compute Environment can launch.
*
* @default
* If `imageKubernetesVersion` is specified,
* - EKS_AL2 for non-GPU instances, EKS_AL2_NVIDIA for GPU instances,
* Otherwise,
* - ECS_AL2 for non-GPU instances, ECS_AL2_NVIDIA for GPU instances,
*/
readonly images?: EksMachineImage[];
/**
* The allocation strategy to use if not enough instances of
* the best fitting instance type can be allocated.
*
* @default - `BEST_FIT_PROGRESSIVE` if not using Spot instances,
* `SPOT_CAPACITY_OPTIMIZED` if using Spot instances.
*/
readonly allocationStrategy?: AllocationStrategy;
/**
* The maximum percentage that a Spot Instance price can be when compared with the
* On-Demand price for that instance type before instances are launched.
* For example, if your maximum percentage is 20%, the Spot price must be
* less than 20% of the current On-Demand price for that Instance.
* You always pay the lowest market price and never more than your maximum percentage.
* For most use cases, Batch recommends leaving this field empty.
*
* Implies `spot == true` if set
*
* @default - 100%
*/
readonly spotBidPercentage?: number;
/**
* The instance types that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
*
* @default - the instances Batch considers will be used (currently C4, M4, and R4)
*/
readonly instanceTypes?: ec2.InstanceType[];
/**
* The instance types that this Compute Environment can launch.
* Which one is chosen depends on the `AllocationStrategy` used.
* Batch will automatically choose the instance size.
*
* @default - the instances Batch considers will be used (currently C4, M4, and R4)
*/
readonly instanceClasses?: ec2.InstanceClass[];
/**
* The execution Role that instances launched by this Compute Environment will use.
*
* @default - a role will be created
*/
readonly instanceRole?: iam.IRole;
/**
* The Launch Template that this Compute Environment
* will use to provision EC2 Instances.
*
* *Note*: if `securityGroups` is specified on both your
* launch template and this Compute Environment, **the
* `securityGroup`s on the Compute Environment override the
* ones on the launch template.**
*
* @default - no launch template
*/
readonly launchTemplate?: ec2.ILaunchTemplate;
/**
* The minimum vCPUs that an environment should maintain,
* even if the compute environment is DISABLED.
*
* @default 0
*/
readonly minvCpus?: number;
/**
* The EC2 placement group to associate with your compute resources.
* If you intend to submit multi-node parallel jobs to this Compute Environment,
* you should consider creating a cluster placement group and associate it with your compute resources.
* This keeps your multi-node parallel job on a logical grouping of instances
* within a single Availability Zone with high network flow potential.
*
* @see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html
*
* @default - no placement group
*/
readonly placementGroup?: ec2.IPlacementGroup;
}
/**
* A ManagedComputeEnvironment that uses ECS orchestration on EC2 instances.
*
* @resource AWS::Batch::ComputeEnvironment
*/
export class ManagedEc2EksComputeEnvironment extends ManagedComputeEnvironmentBase implements IManagedEc2EksComputeEnvironment {
public readonly kubernetesNamespace?: string;
public readonly eksCluster: eks.ICluster;
public readonly computeEnvironmentName: string;
public readonly computeEnvironmentArn: string;
public readonly images?: EksMachineImage[];
public readonly allocationStrategy?: AllocationStrategy;
public readonly spotBidPercentage?: number;
public readonly instanceTypes: ec2.InstanceType[];
public readonly instanceClasses: ec2.InstanceClass[];
public readonly instanceRole?: iam.IRole;
public readonly launchTemplate?: ec2.ILaunchTemplate;
public readonly minvCpus?: number;
public readonly placementGroup?: ec2.IPlacementGroup;
private readonly instanceProfile: iam.CfnInstanceProfile;
constructor(scope: Construct, id: string, props: ManagedEc2EksComputeEnvironmentProps) {
super(scope, id, props);
// Enhanced CDK Analytics Telemetry
addConstructMetadata(this, props);
this.kubernetesNamespace = props.kubernetesNamespace;
this.eksCluster = props.eksCluster;
this.images = props.images;
this.allocationStrategy = determineAllocationStrategy(this, props.allocationStrategy, this.spot);
if (this.allocationStrategy === AllocationStrategy.BEST_FIT) {
throw new ValidationError(`ManagedEc2EksComputeEnvironment '${id}' uses invalid allocation strategy 'AllocationStrategy.BEST_FIT'`, this);
}
this.spotBidPercentage = props.spotBidPercentage;
this.instanceTypes = props.instanceTypes ?? [];
this.instanceClasses = props.instanceClasses ?? [];
const { instanceRole, instanceProfile } = createInstanceRoleAndProfile(this, props.instanceRole);
this.instanceRole = instanceRole;
this.instanceProfile = instanceProfile;
this.launchTemplate = props.launchTemplate;
this.minvCpus = props.minvCpus ?? DEFAULT_MIN_VCPUS;
this.placementGroup = props.placementGroup;
validateVCpus(this, this.minvCpus, this.maxvCpus);
validateSpotConfig(this, this.spot, this.spotBidPercentage);
const { subnetIds } = props.vpc.selectSubnets(props.vpcSubnets);
const resource = new CfnComputeEnvironment(this, 'Resource', {
...baseManagedResourceProperties(this, subnetIds),
computeEnvironmentName: props.computeEnvironmentName,
eksConfiguration: {
eksClusterArn: this.eksCluster.clusterArn,
kubernetesNamespace: this.kubernetesNamespace,
},
computeResources: {
...baseManagedResourceProperties(this, subnetIds).computeResources as CfnComputeEnvironment.ComputeResourcesProperty,
minvCpus: this.minvCpus,
instanceRole: this.instanceProfile.attrArn, // this is not a typo; this property actually takes a profile, not a standard role
instanceTypes: Lazy.list({ produce: () => renderInstances(this.instanceTypes, this.instanceClasses, props.useOptimalInstanceClasses) }),
type: this.spot ? 'SPOT' : 'EC2',
allocationStrategy: this.allocationStrategy,
bidPercentage: this.spotBidPercentage,
launchTemplate: this.launchTemplate ? {
launchTemplateId: this.launchTemplate?.launchTemplateId,
} : undefined,
ec2Configuration: this.images?.map((image) => {
return {
imageIdOverride: image.image?.getImage(this).imageId,
imageType: image.imageType ?? EksMachineImageType.EKS_AL2,
};
}),
placementGroup: this.placementGroup?.placementGroupName,
tags: this.tags.renderedTags as any,
},
});
this.computeEnvironmentName = this.getResourceNameAttribute(resource.ref);
this.computeEnvironmentArn = this.getResourceArnAttribute(resource.attrComputeEnvironmentArn, {
service: 'batch',
resource: 'compute-environment',
resourceName: this.physicalName,
});
this.node.addValidation({ validate: () => validateInstances(this.instanceTypes, this.instanceClasses, props.useOptimalInstanceClasses) });
}
@MethodMetadata()
public addInstanceType(instanceType: ec2.InstanceType): void {
this.instanceTypes.push(instanceType);
}
@MethodMetadata()
public addInstanceClass(instanceClass: ec2.InstanceClass): void {
this.instanceClasses.push(instanceClass);
}
}
/**
* A ManagedComputeEnvironment that uses ECS orchestration on Fargate instances.
*/
export interface IFargateComputeEnvironment extends IManagedComputeEnvironment { }
/**
* Props for a FargateComputeEnvironment
*/
export interface FargateComputeEnvironmentProps extends ManagedComputeEnvironmentProps { }
/**
* A ManagedComputeEnvironment that uses ECS orchestration on Fargate instances.
*
* @resource AWS::Batch::ComputeEnvironment
*/
export class FargateComputeEnvironment extends ManagedComputeEnvironmentBase implements IFargateComputeEnvironment {
/**
* Reference an existing FargateComputeEnvironment by its arn
*/
public static fromFargateComputeEnvironmentArn(scope: Construct, id: string, fargateComputeEnvironmentArn: string): IFargateComputeEnvironment {
const stack = Stack.of(scope);
const computeEnvironmentName = stack.splitArn(fargateComputeEnvironmentArn, ArnFormat.SLASH_RESOURCE_NAME).resourceName!;
class Import extends Resource implements IFargateComputeEnvironment {
public readonly computeEnvironmentArn = fargateComputeEnvironmentArn;
public readonly computeEnvironmentName = computeEnvironmentName;
public readonly enabled = true;
public readonly maxvCpus = 1;
public readonly connections = { } as any;
public readonly securityGroups = [];
public readonly tags: TagManager = new TagManager(TagType.MAP, 'AWS::Batch::ComputeEnvironment');
}
return new Import(scope, id);
}
public readonly computeEnvironmentName: string;
public readonly computeEnvironmentArn: string;
constructor(scope: Construct, id: string, props: FargateComputeEnvironmentProps) {
super(scope, id, props);
// Enhanced CDK Analytics Telemetry
addConstructMetadata(this, props);
const { subnetIds } = props.vpc.selectSubnets(props.vpcSubnets);
const resource = new CfnComputeEnvironment(this, 'Resource', {
...baseManagedResourceProperties(this, subnetIds),
computeEnvironmentName: props.computeEnvironmentName,
computeResources: {
...baseManagedResourceProperties(this, subnetIds).computeResources as CfnComputeEnvironment.ComputeResourcesProperty,
type: this.spot ? 'FARGATE_SPOT' : 'FARGATE',
},
});
this.computeEnvironmentName = this.getResourceNameAttribute(resource.ref);
this.computeEnvironmentArn = this.getResourceArnAttribute(resource.attrComputeEnvironmentArn, {
service: 'batch',
resource: 'compute-environment',
resourceName: this.physicalName,
});
}
}
function renderInstances(types?: ec2.InstanceType[], classes?: ec2.InstanceClass[], useOptimalInstanceClasses?: boolean): string[] {
const instances = [];
for (const instanceType of types ?? []) {
instances.push(instanceType.toString());
}
for (const instanceClass of classes ?? []) {
instances.push(instanceClass);
}
if (useOptimalInstanceClasses || useOptimalInstanceClasses === undefined) {
instances.push('optimal');
}
return instances;
}
function createInstanceRoleAndProfile(scope: Construct, instanceRole?: iam.IRole) {
const result: any = {};
result.instanceRole = instanceRole ?? new iam.Role(scope, 'InstanceProfileRole', {
assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com'),
managedPolicies: [iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonEC2ContainerServiceforEC2Role')],
});
result.instanceProfile = new iam.CfnInstanceProfile(scope, 'InstanceProfile', {
roles: [result.instanceRole.roleName],
});
return result;
}
function createSpotFleetRole(scope: Construct): IRole {
return new iam.Role(scope, 'SpotFleetRole', {
assumedBy: new iam.ServicePrincipal('spotfleet.amazonaws.com'),
});
}
function determineAllocationStrategy(scope: Construct, allocationStrategy?: AllocationStrategy, spot?: boolean): AllocationStrategy | undefined {
let result = allocationStrategy;
if (!allocationStrategy) {
result = spot ? AllocationStrategy.SPOT_PRICE_CAPACITY_OPTIMIZED : AllocationStrategy.BEST_FIT_PROGRESSIVE;
} else if (allocationStrategy === AllocationStrategy.SPOT_PRICE_CAPACITY_OPTIMIZED && !spot) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' specifies 'AllocationStrategy.SPOT_PRICE_CAPACITY_OPTIMIZED' without using spot instances`, scope);
} else if (allocationStrategy === AllocationStrategy.SPOT_CAPACITY_OPTIMIZED && !spot) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' specifies 'AllocationStrategy.SPOT_CAPACITY_OPTIMIZED' without using spot instances`, scope);
}
return result;
}
function validateInstances(types?: ec2.InstanceType[], classes?: ec2.InstanceClass[], useOptimalInstanceClasses?: boolean): string[] {
if (renderInstances(types, classes, useOptimalInstanceClasses).length === 0) {
return ["Specifies 'useOptimalInstanceClasses: false' without specifying any instance types or classes"];
}
return [];
}
function validateSpotConfig(scope: Construct, spot?: boolean, spotBidPercentage?: number, spotFleetRole?: iam.IRole): void {
if (spotBidPercentage) {
if (!spot) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' specifies 'spotBidPercentage' without specifying 'spot'`, scope);
}
if (!Token.isUnresolved(spotBidPercentage)) {
if (spotBidPercentage > 100) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' specifies 'spotBidPercentage' > 100`, scope);
} else if (spotBidPercentage < 0) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' specifies 'spotBidPercentage' < 0`, scope);
}
}
}
if (spotFleetRole) {
if (!spot) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' specifies 'spotFleetRole' without specifying 'spot'`, scope);
}
}
}
function validateVCpus(scope: Construct, minvCpus: number, maxvCpus: number): void {
if (!Token.isUnresolved(minvCpus) && minvCpus < 0) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' has 'minvCpus' = ${minvCpus} < 0; 'minvCpus' cannot be less than zero`, scope);
}
if (!Token.isUnresolved(minvCpus) && !Token.isUnresolved(maxvCpus) && minvCpus > maxvCpus) {
throw new ValidationError(`Managed ComputeEnvironment '${scope.node.id}' has 'minvCpus' = ${minvCpus} > 'maxvCpus' = ${maxvCpus}; 'minvCpus' cannot be greater than 'maxvCpus'`, scope);
}
}
function baseManagedResourceProperties(baseComputeEnvironment: ManagedComputeEnvironmentBase, subnetIds: string[]) {
return {
serviceRole: baseComputeEnvironment.serviceRole?.roleArn,
state: baseComputeEnvironment.enabled ? 'ENABLED' : 'DISABLED',
computeResources: {
maxvCpus: baseComputeEnvironment.maxvCpus,
type: 'managed',
updateToLatestImageVersion: baseComputeEnvironment.updateToLatestImageVersion,
securityGroupIds: baseComputeEnvironment.securityGroups.map((securityGroup) => securityGroup.securityGroupId),
subnets: subnetIds,
},
updatePolicy: {
terminateJobsOnUpdate: baseComputeEnvironment.terminateOnUpdate,
jobExecutionTimeoutMinutes: baseComputeEnvironment.updateTimeout?.toMinutes(),
},
replaceComputeEnvironment: baseComputeEnvironment.replaceComputeEnvironment,
type: 'managed',
};
}
const DEFAULT_MIN_VCPUS = 0;
const DEFAULT_MAX_VCPUS = 256;