func()

in pkg/cloudprovider/cloudprovider.go [150:192]


func (c *CloudProvider) waitOnPromise(ctx context.Context, promise *instance.VirtualMachinePromise, nodeClaim *karpv1.NodeClaim) {
	defer func() {
		if r := recover(); r != nil {
			err := fmt.Errorf("%v", r)
			log.FromContext(ctx).Error(err, "panic during waitOnPromise")
		}
	}()

	err := promise.Wait()

	// Wait until the claim is Launched, to avoid racing with creation.
	// This isn't strictly required, but without this, failure test scenarios are harder
	// to write because the nodeClaim gets deleted by error handling below before
	// the EnsureApplied call finishes, so EnsureApplied creates it again (which is wrong/isn't how
	// it would actually happen in production).
	c.waitUntilLaunched(ctx, nodeClaim)

	if err != nil {
		c.recorder.Publish(cloudproviderevents.NodeClaimFailedToRegister(nodeClaim, err))
		log.FromContext(ctx).Error(err, "failed launching nodeclaim")

		// TODO: This won't clean up leaked NICs if the VM doesn't exist... intentional?
		vmName := lo.FromPtr(promise.VM.Name)
		err = c.instanceProvider.Delete(ctx, vmName)
		if cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil {
			log.FromContext(ctx).Error(err, fmt.Sprintf("failed to delete VM %s", vmName))
		}

		if err = c.kubeClient.Delete(ctx, nodeClaim); err != nil {
			err = client.IgnoreNotFound(err)
			if err != nil {
				log.FromContext(ctx).Error(err, "failed to delete nodeclaim %s, will wait for liveness TTL", nodeClaim.Name)
			}
		}
		metrics.NodeClaimsDisruptedTotal.Inc(map[string]string{
			metrics.ReasonLabel:       "async_provisioning",
			metrics.NodePoolLabel:     nodeClaim.Labels[karpv1.NodePoolLabelKey],
			metrics.CapacityTypeLabel: nodeClaim.Labels[karpv1.CapacityTypeLabelKey],
		})

		return
	}
}