in pkg/cloudprovider/cloudprovider.go [86:148]
func (c *CloudProvider) Create(ctx context.Context, nodeClaim *karpv1.NodeClaim) (*karpv1.NodeClaim, error) {
nodeClass, err := c.resolveNodeClassFromNodeClaim(ctx, nodeClaim)
if err != nil {
if errors.IsNotFound(err) {
c.recorder.Publish(cloudproviderevents.NodeClaimFailedToResolveNodeClass(nodeClaim))
}
// We treat a failure to resolve the NodeClass as an ICE since this means there is no capacity possibilities for this NodeClaim
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("resolving node class, %w", err))
}
/*
// TODO: Remove this after v1
nodePool, err := utils.ResolveNodePoolFromNodeClaim(ctx, c.kubeClient, nodeClaim)
if err != nil {
return nil, err
}
kubeletHash, err := utils.GetHashKubelet(nodePool, nodeClass)
if err != nil {
return nil, err
}
*/
nodeClassReady := nodeClass.StatusConditions().Get(status.ConditionReady)
if nodeClassReady.IsFalse() {
return nil, cloudprovider.NewNodeClassNotReadyError(stderrors.New(nodeClassReady.Message))
}
if nodeClassReady.IsUnknown() {
return nil, fmt.Errorf("resolving NodeClass readiness, NodeClass is in Ready=Unknown, %s", nodeClassReady.Message)
}
if _, err = nodeClass.GetKubernetesVersion(); err != nil {
return nil, err
}
instanceTypes, err := c.resolveInstanceTypes(ctx, nodeClaim, nodeClass)
if err != nil {
return nil, fmt.Errorf("resolving instance types, %w", err)
}
if len(instanceTypes) == 0 {
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("all requested instance types were unavailable during launch"))
}
instancePromise, err := c.instanceProvider.BeginCreate(ctx, nodeClass, nodeClaim, instanceTypes)
if err != nil {
return nil, fmt.Errorf("creating instance, %w", err)
}
// Launch a single goroutine to poll the returned promise.
// Note that we could store the LRO details on the NodeClaim, but we don't bother today because Karpenter
// crashes should be rare, and even in the case of a crash, as long as the node comes up successfully there's
// no issue. If the node doesn't come up successfully in that case, the node and the linked claim will
// be garbage collected after the TTL, but the cause of the nodes issue will be lost, as the LRO URL was
// only held in memory.
go c.waitOnPromise(ctx, instancePromise, nodeClaim)
instance := instancePromise.VM
instanceType, _ := lo.Find(instanceTypes, func(i *cloudprovider.InstanceType) bool {
return i.Name == string(lo.FromPtr(instance.Properties.HardwareProfile.VMSize))
})
nc, err := c.instanceToNodeClaim(ctx, instance, instanceType)
nc.Annotations = lo.Assign(nc.Annotations, map[string]string{
v1alpha2.AnnotationAKSNodeClassHash: nodeClass.Hash(),
v1alpha2.AnnotationAKSNodeClassHashVersion: v1alpha2.AKSNodeClassHashVersion,
})
return nc, err
}