pkg/cloudprovider/cloudprovider.go (379 lines of code) (raw):
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cloudprovider
import (
"context"
stderrors "errors"
"fmt"
"time"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/awslabs/operatorpkg/status"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/log"
coreapis "sigs.k8s.io/karpenter/pkg/apis"
karpv1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/events"
"sigs.k8s.io/karpenter/pkg/scheduling"
"sigs.k8s.io/karpenter/pkg/utils/resources"
"github.com/aws/karpenter-provider-aws/pkg/apis"
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
"github.com/aws/karpenter-provider-aws/pkg/operator/options"
"github.com/aws/karpenter-provider-aws/pkg/utils"
"github.com/samber/lo"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
cloudproviderevents "github.com/aws/karpenter-provider-aws/pkg/cloudprovider/events"
"github.com/aws/karpenter-provider-aws/pkg/providers/amifamily"
"github.com/aws/karpenter-provider-aws/pkg/providers/capacityreservation"
"github.com/aws/karpenter-provider-aws/pkg/providers/instance"
"github.com/aws/karpenter-provider-aws/pkg/providers/instancetype"
"github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup"
"sigs.k8s.io/karpenter/pkg/cloudprovider"
)
var _ cloudprovider.CloudProvider = (*CloudProvider)(nil)
type CloudProvider struct {
kubeClient client.Client
recorder events.Recorder
instanceTypeProvider instancetype.Provider
instanceProvider instance.Provider
amiProvider amifamily.Provider
securityGroupProvider securitygroup.Provider
capacityReservationProvider capacityreservation.Provider
}
func New(
instanceTypeProvider instancetype.Provider,
instanceProvider instance.Provider,
recorder events.Recorder,
kubeClient client.Client,
amiProvider amifamily.Provider,
securityGroupProvider securitygroup.Provider,
capacityReservationProvider capacityreservation.Provider,
) *CloudProvider {
return &CloudProvider{
instanceTypeProvider: instanceTypeProvider,
instanceProvider: instanceProvider,
kubeClient: kubeClient,
amiProvider: amiProvider,
securityGroupProvider: securityGroupProvider,
capacityReservationProvider: capacityReservationProvider,
recorder: recorder,
}
}
// Create a NodeClaim given the constraints.
func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) (*karpv1.NodeClaim, error) {
nodeClass, err := c.resolveNodeClassFromNodeClaim(ctx, nodeClaim)
if err != nil {
if errors.IsNotFound(err) {
// We treat a failure to resolve the NodeClass as an ICE since this means there is no capacity possibilities for this NodeClaim
c.recorder.Publish(cloudproviderevents.NodeClaimFailedToResolveNodeClass(nodeClaim))
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("resolving node class, %w", err))
}
// Transient error when resolving the NodeClass
return nil, fmt.Errorf("resolving node class, %w", err)
}
nodeClassReady := nodeClass.StatusConditions().Get(status.ConditionReady)
if nodeClassReady.IsFalse() {
return nil, cloudprovider.NewNodeClassNotReadyError(stderrors.New(nodeClassReady.Message))
}
if nodeClassReady.IsUnknown() {
return nil, cloudprovider.NewCreateError(fmt.Errorf("resolving NodeClass readiness, NodeClass is in Ready=Unknown, %s", nodeClassReady.Message), "NodeClassReadinessUnknown", "NodeClass is in Ready=Unknown")
}
instanceTypes, err := c.resolveInstanceTypes(ctx, nodeClaim, nodeClass)
if err != nil {
return nil, cloudprovider.NewCreateError(fmt.Errorf("resolving instance types, %w", err), "InstanceTypeResolutionFailed", "Error resolving instance types")
}
if len(instanceTypes) == 0 {
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("all requested instance types were unavailable during launch"))
}
tags, err := utils.GetTags(nodeClass, nodeClaim, options.FromContext(ctx).ClusterName)
if err != nil {
return nil, cloudprovider.NewNodeClassNotReadyError(err)
}
instance, err := c.instanceProvider.Create(ctx, nodeClass, nodeClaim, tags, instanceTypes)
if err != nil {
return nil, fmt.Errorf("creating instance, %w", err)
}
if instance.CapacityType == karpv1.CapacityTypeReserved {
c.capacityReservationProvider.MarkLaunched(instance.CapacityReservationID)
}
instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool {
return i.Name == string(instance.Type)
})
nc := c.instanceToNodeClaim(instance, instanceType, nodeClass)
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
v1.AnnotationEC2NodeClassHash: nodeClass.Hash(),
v1.AnnotationEC2NodeClassHashVersion: v1.EC2NodeClassHashVersion,
})
return nc, nil
}
func (c *CloudProvider) List(ctx context.Context) ([]*karpv1.NodeClaim, error) {
instances, err := c.instanceProvider.List(ctx)
if err != nil {
return nil, fmt.Errorf("listing instances, %w", err)
}
var nodeClaims []*karpv1.NodeClaim
for _, instance := range instances {
instanceType, err := c.resolveInstanceTypeFromInstance(ctx, instance)
if err != nil {
return nil, fmt.Errorf("resolving instance type, %w", err)
}
nc, err := c.resolveNodeClassFromInstance(ctx, instance)
if client.IgnoreNotFound(err) != nil {
return nil, fmt.Errorf("resolving nodeclass, %w", err)
}
nodeClaims = append(nodeClaims, c.instanceToNodeClaim(instance, instanceType, nc))
}
return nodeClaims, nil
}
func (c *CloudProvider) Get(ctx context.Context, providerID string) (*karpv1.NodeClaim, error) {
id, err := utils.ParseInstanceID(providerID)
if err != nil {
return nil, fmt.Errorf("getting instance ID, %w", err)
}
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", id))
instance, err := c.instanceProvider.Get(ctx, id)
if err != nil {
return nil, fmt.Errorf("getting instance, %w", err)
}
instanceType, err := c.resolveInstanceTypeFromInstance(ctx, instance)
if err != nil {
return nil, fmt.Errorf("resolving instance type, %w", err)
}
nc, err := c.resolveNodeClassFromInstance(ctx, instance)
if client.IgnoreNotFound(err) != nil {
return nil, fmt.Errorf("resolving nodeclass, %w", err)
}
return c.instanceToNodeClaim(instance, instanceType, nc), nil
}
// GetInstanceTypes returns all available InstanceTypes
func (c *CloudProvider) GetInstanceTypes(ctx context.Context, nodePool *karpv1.NodePool) ([]*cloudprovider.InstanceType, error) {
nodeClass, err := c.resolveNodeClassFromNodePool(ctx, nodePool)
if err != nil {
if errors.IsNotFound(err) {
// If we can't resolve the NodeClass, then it's impossible for us to resolve the instance types
c.recorder.Publish(cloudproviderevents.NodePoolFailedToResolveNodeClass(nodePool))
return nil, nil
}
return nil, fmt.Errorf("resolving node class, %w", err)
}
// TODO, break this coupling
instanceTypes, err := c.instanceTypeProvider.List(ctx, nodeClass)
if err != nil {
return nil, err
}
return instanceTypes, nil
}
func (c *CloudProvider) Delete(ctx context.Context, nodeClaim *karpv1.NodeClaim) error {
id, err := utils.ParseInstanceID(nodeClaim.Status.ProviderID)
if err != nil {
return fmt.Errorf("getting instance ID, %w", err)
}
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("id", id))
err = c.instanceProvider.Delete(ctx, id)
if id := nodeClaim.Labels[cloudprovider.ReservationIDLabel]; id != "" && cloudprovider.IsNodeClaimNotFoundError(err) {
c.capacityReservationProvider.MarkTerminated(id)
}
return err
}
func (c *CloudProvider) DisruptionReasons() []karpv1.DisruptionReason {
return nil
}
func (c *CloudProvider) IsDrifted(ctx context.Context, nodeClaim *karpv1.NodeClaim) (cloudprovider.DriftReason, error) {
// Not needed when GetInstanceTypes removes nodepool dependency
nodePoolName, ok := nodeClaim.Labels[karpv1.NodePoolLabelKey]
if !ok {
return "", nil
}
nodePool := &karpv1.NodePool{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodePoolName}, nodePool); err != nil {
return "", client.IgnoreNotFound(err)
}
if nodePool.Spec.Template.Spec.NodeClassRef == nil {
return "", nil
}
nodeClass, err := c.resolveNodeClassFromNodePool(ctx, nodePool)
if err != nil {
if errors.IsNotFound(err) {
// We can't determine the drift status for the NodeClaim if we can no longer resolve the NodeClass
c.recorder.Publish(cloudproviderevents.NodePoolFailedToResolveNodeClass(nodePool))
return "", nil
}
return "", fmt.Errorf("resolving node class, %w", err)
}
driftReason, err := c.isNodeClassDrifted(ctx, nodeClaim, nodePool, nodeClass)
if err != nil {
return "", err
}
return driftReason, nil
}
// Name returns the CloudProvider implementation name.
func (c *CloudProvider) Name() string {
return "aws"
}
func (c *CloudProvider) GetSupportedNodeClasses() []status.Object {
return []status.Object{&v1.EC2NodeClass{}}
}
func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy {
return []cloudprovider.RepairPolicy{
// Supported Kubelet Node Conditions
{
ConditionType: corev1.NodeReady,
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: corev1.NodeReady,
ConditionStatus: corev1.ConditionUnknown,
TolerationDuration: 30 * time.Minute,
},
// Support Node Monitoring Agent Conditions
//
{
ConditionType: "AcceleratedHardwareReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 10 * time.Minute,
},
{
ConditionType: "StorageReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "NetworkingReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "KernelReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "ContainerRuntimeReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
}
}
func (c *CloudProvider) resolveNodeClassFromNodeClaim(ctx context.Context, nodeClaim *karpv1.NodeClaim) (*v1.EC2NodeClass, error) {
nodeClass := &v1.EC2NodeClass{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Spec.NodeClassRef.Name}, nodeClass); err != nil {
return nil, err
}
// For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound
if !nodeClass.DeletionTimestamp.IsZero() {
// For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound,
// but we return a different error message to be clearer to users
return nil, newTerminatingNodeClassError(nodeClass.Name)
}
return nodeClass, nil
}
func (c *CloudProvider) resolveNodeClassFromNodePool(ctx context.Context, nodePool *karpv1.NodePool) (*v1.EC2NodeClass, error) {
nodeClass := &v1.EC2NodeClass{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodePool.Spec.Template.Spec.NodeClassRef.Name}, nodeClass); err != nil {
return nil, err
}
if !nodeClass.DeletionTimestamp.IsZero() {
// For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound,
// but we return a different error message to be clearer to users
return nil, newTerminatingNodeClassError(nodeClass.Name)
}
return nodeClass, nil
}
func (c *CloudProvider) resolveInstanceTypes(ctx context.Context, nodeClaim *karpv1.NodeClaim, nodeClass *v1.EC2NodeClass) ([]*cloudprovider.InstanceType, error) {
instanceTypes, err := c.instanceTypeProvider.List(ctx, nodeClass)
if err != nil {
return nil, fmt.Errorf("getting instance types, %w", err)
}
reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...)
instanceTypes = lo.Filter(instanceTypes, func(i *cloudprovider.InstanceType, _ int) bool {
return reqs.Compatible(i.Requirements, scheduling.AllowUndefinedWellKnownLabels) == nil &&
len(i.Offerings.Compatible(reqs).Available()) > 0 &&
resources.Fits(nodeClaim.Spec.Resources.Requests, i.Allocatable())
})
// Filter out exotic instance types, spot instance types more expensive than the cheapest on-demand instance type, etc.
var rejectedInstanceTypes []*cloudprovider.InstanceType
instanceTypes, rejectedInstanceTypes, err = instance.FilterRejectInstanceTypes(nodeClaim, instanceTypes)
if err != nil {
return nil, fmt.Errorf("filtering instance types, %w", err)
}
if len(rejectedInstanceTypes) > 0 {
log.FromContext(ctx).WithValues("instance-types", utils.PrettySlice(lo.Map(rejectedInstanceTypes, func(i *cloudprovider.InstanceType, _ int) string { return i.Name }), 10)).V(1).Info("filtered out instance types from launch")
}
return instanceTypes, nil
}
func (c *CloudProvider) resolveInstanceTypeFromInstance(ctx context.Context, instance *instance.Instance) (*cloudprovider.InstanceType, error) {
nodePool, err := c.resolveNodePoolFromInstance(ctx, instance)
if err != nil {
// If we can't resolve the NodePool, we fall back to not getting instance type info
return nil, client.IgnoreNotFound(fmt.Errorf("resolving nodepool, %w", err))
}
instanceTypes, err := c.GetInstanceTypes(ctx, nodePool)
if err != nil {
// If we can't resolve the NodePool, we fall back to not getting instance type info
return nil, client.IgnoreNotFound(fmt.Errorf("resolving nodeclass, %w", err))
}
instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool {
return i.Name == string(instance.Type)
})
return instanceType, nil
}
func (c *CloudProvider) resolveNodeClassFromInstance(ctx context.Context, instance *instance.Instance) (*v1.EC2NodeClass, error) {
name, ok := instance.Tags[v1.NodeClassTagKey]
if !ok {
return nil, errors.NewNotFound(schema.GroupResource{Group: apis.Group, Resource: "ec2nodeclasses"}, "")
}
nc := &v1.EC2NodeClass{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: name}, nc); err != nil {
return nil, fmt.Errorf("resolving ec2nodeclass, %w", err)
}
if !nc.DeletionTimestamp.IsZero() {
// For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound,
// but we return a different error message to be clearer to users
return nil, newTerminatingNodeClassError(nc.Name)
}
return nc, nil
}
func (c *CloudProvider) resolveNodePoolFromInstance(ctx context.Context, instance *instance.Instance) (*karpv1.NodePool, error) {
if nodePoolName, ok := instance.Tags[karpv1.NodePoolLabelKey]; ok {
nodePool := &karpv1.NodePool{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodePoolName}, nodePool); err != nil {
return nil, err
}
return nodePool, nil
}
return nil, errors.NewNotFound(schema.GroupResource{Group: coreapis.Group, Resource: "nodepools"}, "")
}
//nolint:gocyclo
func (c *CloudProvider) instanceToNodeClaim(i *instance.Instance, instanceType *cloudprovider.InstanceType, nodeClass *v1.EC2NodeClass) *karpv1.NodeClaim {
nodeClaim := &karpv1.NodeClaim{}
labels := map[string]string{}
annotations := map[string]string{}
if instanceType != nil {
for key, req := range instanceType.Requirements {
// We only want to add a label based on the instance type requirements if there is a single value for that
// requirement. For example, we can't add a label for zone based on this if the requirement is compatible with
// three. Capacity reservation IDs are a special case since we don't have a way to represent that the label may or
// may not exist. Since this requirement will be present regardless of the capacity type, we can't insert it here.
// Otherwise, you may end up with spot and on-demand NodeClaims with a reservation ID label.
if req.Len() == 1 && req.Key != cloudprovider.ReservationIDLabel {
labels[key] = req.Values()[0]
}
}
resourceFilter := func(n corev1.ResourceName, v resource.Quantity) bool {
if resources.IsZero(v) {
return false
}
// The nodeclaim should only advertise an EFA resource if it was requested. EFA network interfaces are only
// added to the launch template if they're requested, otherwise the instance is launched with a normal ENI.
if n == v1.ResourceEFA {
return i.EFAEnabled
}
return true
}
nodeClaim.Status.Capacity = lo.PickBy(instanceType.Capacity, resourceFilter)
nodeClaim.Status.Allocatable = lo.PickBy(instanceType.Allocatable(), resourceFilter)
}
labels[corev1.LabelTopologyZone] = i.Zone
// Attempt to resolve the zoneID from the instance's EC2NodeClass' status condition.
// If the EC2NodeClass is nil, we know we're in the List or Get paths, where we don't care about the zone-id value.
// If we're in the Create path, we've already validated the EC2NodeClass exists. In this case, we resolve the zone-id from the status condition
// both when creating offerings and when adding the label.
if nodeClass != nil {
if subnet, ok := lo.Find(nodeClass.Status.Subnets, func(s v1.Subnet) bool {
return s.Zone == i.Zone
}); ok && subnet.ZoneID != "" {
labels[v1.LabelTopologyZoneID] = subnet.ZoneID
}
}
labels[karpv1.CapacityTypeLabelKey] = i.CapacityType
if i.CapacityType == karpv1.CapacityTypeReserved {
labels[cloudprovider.ReservationIDLabel] = i.CapacityReservationID
}
if v, ok := i.Tags[karpv1.NodePoolLabelKey]; ok {
labels[karpv1.NodePoolLabelKey] = v
}
nodeClaim.Labels = labels
nodeClaim.Annotations = annotations
nodeClaim.CreationTimestamp = metav1.Time{Time: i.LaunchTime}
// Set the deletionTimestamp to be the current time if the instance is currently terminating
if i.State == ec2types.InstanceStateNameShuttingDown || i.State == ec2types.InstanceStateNameTerminated {
nodeClaim.DeletionTimestamp = &metav1.Time{Time: time.Now()}
}
nodeClaim.Status.ProviderID = fmt.Sprintf("aws:///%s/%s", i.Zone, i.ID)
nodeClaim.Status.ImageID = i.ImageID
return nodeClaim
}
// newTerminatingNodeClassError returns a NotFound error for handling by
func newTerminatingNodeClassError(name string) *errors.StatusError {
qualifiedResource := schema.GroupResource{Group: apis.Group, Resource: "ec2nodeclasses"}
err := errors.NewNotFound(qualifiedResource, name)
err.ErrStatus.Message = fmt.Sprintf("%s %q is terminating, treating as not found", qualifiedResource.String(), name)
return err
}