in pkg/providers/instance/instance.go [534:640]
func (p *DefaultProvider) beginLaunchInstance(
ctx context.Context,
nodeClass *v1alpha2.AKSNodeClass,
nodeClaim *karpv1.NodeClaim,
instanceTypes []*corecloudprovider.InstanceType,
) (*VirtualMachinePromise, error) {
instanceType, capacityType, zone := p.pickSkuSizePriorityAndZone(ctx, nodeClaim, instanceTypes)
if instanceType == nil {
return nil, corecloudprovider.NewInsufficientCapacityError(fmt.Errorf("no instance types available"))
}
launchTemplate, err := p.getLaunchTemplate(ctx, nodeClass, nodeClaim, instanceType, capacityType)
if err != nil {
return nil, fmt.Errorf("getting launch template: %w", err)
}
// set nodepool tag for NIC, VM, and Disk
setNodePoolNameTag(launchTemplate.Tags, nodeClaim)
// resourceName for the NIC, VM, and Disk
resourceName := GenerateResourceName(nodeClaim.Name)
backendPools, err := p.loadBalancerProvider.LoadBalancerBackendPools(ctx)
if err != nil {
return nil, fmt.Errorf("getting backend pools: %w", err)
}
networkPlugin := options.FromContext(ctx).NetworkPlugin
networkPluginMode := options.FromContext(ctx).NetworkPluginMode
// TODO: Not returning after launching this LRO because
// TODO: doing so would bypass the capacity and other errors that are currently handled by
// TODO: core pkg/controllers/nodeclaim/lifecycle/controller.go - in particular, there are metrics/events
// TODO: emitted in capacity failure cases that we probably want.
nicReference, err := p.createNetworkInterface(
ctx,
&createNICOptions{
NICName: resourceName,
NetworkPlugin: networkPlugin,
NetworkPluginMode: networkPluginMode,
MaxPods: utils.GetMaxPods(nodeClass, networkPlugin, networkPluginMode),
LaunchTemplate: launchTemplate,
BackendPools: backendPools,
InstanceType: instanceType,
},
)
if err != nil {
return nil, err
}
result, err := p.createVirtualMachine(ctx, &createVMOptions{
VMName: resourceName,
NicReference: nicReference,
Zone: zone,
CapacityType: capacityType,
Location: p.location,
SSHPublicKey: options.FromContext(ctx).SSHPublicKey,
NodeIdentities: options.FromContext(ctx).NodeIdentities,
NodeClass: nodeClass,
LaunchTemplate: launchTemplate,
InstanceType: instanceType,
ProvisionMode: p.provisionMode,
UseSIG: options.FromContext(ctx).UseSIG,
})
if err != nil {
azErr := p.handleResponseErrors(ctx, instanceType, zone, capacityType, err)
return nil, azErr
}
// Patch the VM object to fill out a few fields that are needed later.
// This is a bit of a hack that saves us doing a GET now.
// The reason to avoid a GET is that it can fail, and if it does the future above will be lost,
// which we don't want.
result.VM.ID = lo.ToPtr(fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/virtualMachines/%s", p.subscriptionID, p.resourceGroup, resourceName))
result.VM.Properties.TimeCreated = lo.ToPtr(time.Now())
return &VirtualMachinePromise{
Wait: func() error {
if result.Poller == nil {
// Poller is nil means the VM existed already and we're done.
// TODO: if the VM doesn't have extensions this will still happen and we will have to
// TODO: wait for the TTL for the claim to be deleted and recreated. This will most likely
// TODO: happen during Karpenter pod restart.
return nil
}
_, err = result.Poller.PollUntilDone(ctx, nil)
if err != nil {
azErr := p.handleResponseErrors(ctx, instanceType, zone, capacityType, err)
return azErr
}
if p.provisionMode == consts.ProvisionModeBootstrappingClient {
err = p.createCSExtension(ctx, resourceName, launchTemplate.CustomScriptsCSE, launchTemplate.IsWindows)
if err != nil {
// An error here is handled by CloudProvider create and calls instanceProvider.Delete (which cleans up the azure resources)
return err
}
}
err = p.createAKSIdentifyingExtension(ctx, resourceName)
if err != nil {
return err
}
return nil
},
VM: result.VM,
}, nil
}