in pkg/cloudprovider/cloudprovider.go [150:192]
func (c *CloudProvider) waitOnPromise(ctx context.Context, promise *instance.VirtualMachinePromise, nodeClaim *karpv1.NodeClaim) {
defer func() {
if r := recover(); r != nil {
err := fmt.Errorf("%v", r)
log.FromContext(ctx).Error(err, "panic during waitOnPromise")
}
}()
err := promise.Wait()
// Wait until the claim is Launched, to avoid racing with creation.
// This isn't strictly required, but without this, failure test scenarios are harder
// to write because the nodeClaim gets deleted by error handling below before
// the EnsureApplied call finishes, so EnsureApplied creates it again (which is wrong/isn't how
// it would actually happen in production).
c.waitUntilLaunched(ctx, nodeClaim)
if err != nil {
c.recorder.Publish(cloudproviderevents.NodeClaimFailedToRegister(nodeClaim, err))
log.FromContext(ctx).Error(err, "failed launching nodeclaim")
// TODO: This won't clean up leaked NICs if the VM doesn't exist... intentional?
vmName := lo.FromPtr(promise.VM.Name)
err = c.instanceProvider.Delete(ctx, vmName)
if cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil {
log.FromContext(ctx).Error(err, fmt.Sprintf("failed to delete VM %s", vmName))
}
if err = c.kubeClient.Delete(ctx, nodeClaim); err != nil {
err = client.IgnoreNotFound(err)
if err != nil {
log.FromContext(ctx).Error(err, "failed to delete nodeclaim %s, will wait for liveness TTL", nodeClaim.Name)
}
}
metrics.NodeClaimsDisruptedTotal.Inc(map[string]string{
metrics.ReasonLabel: "async_provisioning",
metrics.NodePoolLabel: nodeClaim.Labels[karpv1.NodePoolLabelKey],
metrics.CapacityTypeLabel: nodeClaim.Labels[karpv1.CapacityTypeLabelKey],
})
return
}
}