in cluster-autoscaler/cloudprovider/magnum/magnum_manager_impl.go [224:369]
func (mgr *magnumManagerImpl) getNodes(nodegroup string) ([]cloudprovider.Instance, error) {
var nodes []cloudprovider.Instance
stackInfo, err := mgr.fetchNodeGroupStackIDs(nodegroup)
if err != nil {
return nil, fmt.Errorf("could not fetch stack IDs for node group %s: %v", nodegroup, err)
}
minionResourcesPages, err := stackresources.List(mgr.heatClient, stackInfo.kubeMinionsStackName, stackInfo.kubeMinionsStackID, nil).AllPages()
if err != nil {
return nil, fmt.Errorf("could not list minion resources: %v", err)
}
minionResources, err := stackresources.ExtractResources(minionResourcesPages)
if err != nil {
return nil, fmt.Errorf("could not extract minion resources: %v", err)
}
stack, err := stacks.Get(mgr.heatClient, stackInfo.kubeMinionsStackName, stackInfo.kubeMinionsStackID).Extract()
if err != nil {
return nil, fmt.Errorf("could not get kube_minions stack from heat: %v", err)
}
// mapping from minion index to server ID e.g
// "0": "4c30961a-6e2f-42be-be01-5270e1546a89"
//
// The value in refs_map goes through several stages:
// 1. The initial value is the node index (same as the key).
// 2. It then changes to be "kube-minion".
// 3. When a server has been created it changes to the server ID.
refsMap := make(map[string]string)
for _, output := range stack.Outputs {
if output["output_key"] == "refs_map" {
refsMapOutput := output["output_value"].(map[string]interface{})
for index, ID := range refsMapOutput {
refsMap[index] = ID.(string)
}
}
}
for _, minion := range minionResources {
// Prepare fake provider ID in the format "fake:///nodegroup/index" in case the minion does not yet have a server ID in refs_map.
// This fake provider ID is necessary to have in case a server can not be created (e.g quota exceeded).
// The minion.Name is its index e.g "2".
fakeName := fmt.Sprintf("fake:///%s/%s", nodegroup, minion.Name)
instance := cloudprovider.Instance{Id: fakeName, Status: &cloudprovider.InstanceStatus{}}
switch minion.Status {
case "DELETE_COMPLETE":
// Don't return this instance
continue
case "DELETE_IN_PROGRESS":
serverID, found := refsMap[minion.Name]
if !found || serverID == "kube-minion" {
// If a server ID can't be found for this minion, assume it is already deleted.
klog.V(4).Infof("Minion %q is DELETE_IN_PROGRESS but has no refs_map entry", minion.Name)
continue
}
instance.Id = fmt.Sprintf("openstack:///%s", serverID)
instance.Status.State = cloudprovider.InstanceDeleting
case "INIT_COMPLETE", "CREATE_IN_PROGRESS":
instance.Status.State = cloudprovider.InstanceCreating
case "UPDATE_IN_PROGRESS":
// UPDATE_IN_PROGRESS can either be a creating node that was moved to updating by a separate
// stack update before going to a complete status, or an old node that has already completed and is
// only temporarily in an updating status.
// We need to differentiate between these two states for the instance status.
// If the minion is not yet in the refs_map it must still be creating.
serverID, found := refsMap[minion.Name]
if !found || serverID == "kube-minion" {
instance.Status.State = cloudprovider.InstanceCreating
klog.V(4).Infof("Minion %q is UPDATE_IN_PROGRESS but has no refs_map entry", minion.Name)
break
}
instance.Id = fmt.Sprintf("openstack:///%s", serverID)
// Otherwise, have to check the stack resources for this minion, as they do not change even when the stack is updated.
// There are several resources but the two important ones are kube-minion (provisioning the server)
// and node_config_deployment (the heat-container-agent running on the node).
// If these two are CREATE_COMPLETE then this node must be a running node, not one being created.
minionStackID := minion.PhysicalID
// Only the stack ID is known, not the stack name, so this operation has to be a Find.
minionResources, err := stackresources.Find(mgr.heatClient, minionStackID).Extract()
if err != nil {
return nil, fmt.Errorf("could not get stack resources for minion %q", minion.Name)
}
// The Find returns a list of all resources for this node, we have to loop through and find
// the right statuses.
var minionServerStatus string
var minionNodeDeploymentStatus string
for _, resource := range minionResources {
switch resource.Name {
case "kube-minion":
minionServerStatus = resource.Status
case "node_config_deployment":
minionNodeDeploymentStatus = resource.Status
}
}
if minionServerStatus == "CREATE_COMPLETE" && minionNodeDeploymentStatus == "CREATE_COMPLETE" {
// The minion is one that is already running.
klog.V(4).Infof("Minion %q in UPDATE_IN_PROGRESS is an already running node", minion.Name)
instance.Status.State = cloudprovider.InstanceRunning
} else {
// The minion is one that is still being created.
klog.V(4).Infof("Minion %q in UPDATE_IN_PROGRESS is a new node", minion.Name)
instance.Status.State = cloudprovider.InstanceCreating
}
case "CREATE_FAILED", "UPDATE_FAILED":
instance.Status.State = cloudprovider.InstanceCreating
errorClass := cloudprovider.OtherErrorClass
// Check if the error message is for exceeding the project quota.
if strings.Contains(strings.ToLower(minion.StatusReason), "quota") {
errorClass = cloudprovider.OutOfResourcesErrorClass
}
instance.Status.ErrorInfo = &cloudprovider.InstanceErrorInfo{
ErrorClass: errorClass,
ErrorMessage: minion.StatusReason,
}
klog.V(3).Infof("Instance %s failed with reason: %s", minion.Name, minion.StatusReason)
case "CREATE_COMPLETE", "UPDATE_COMPLETE":
if serverID, found := refsMap[minion.Name]; found && serverID != "kube-minion" {
instance.Id = fmt.Sprintf("openstack:///%s", serverID)
}
instance.Status.State = cloudprovider.InstanceRunning
default:
// If the minion is in an unknown state.
klog.V(3).Infof("Ignoring minion %s in state %s", minion.Name, minion.Status)
continue
}
nodes = append(nodes, instance)
}
return nodes, nil
}