func()

in src/terraform/providers/terraform-provider-avere/averevfxt.go [459:544]


func (a *AvereVfxt) BlockUntilHealthy(fullHealthCheck bool) error {
	for retries := 0; ; retries++ {

		healthy := true

		if healthy {
			// verify no activities, needed for operations
			activities, err := a.GetActivities()
			if err != nil {
				return err
			}
			for _, activity := range activities {
				switch activity.Status {
				case StatusComplete:
				case StatusCompleted:
				case StatusNodeRemoved:
					continue
				default:
					if activity.Percent != CompletedPercent {
						log.Printf("[WARN] vfxt: cluster still has running activity %v", activity)
						healthy = false
						break
					}
				}
			}
		}

		if healthy {
			// verify no active alerts, needed for operations
			alerts, err := a.GetAlerts()
			if err != nil {
				return err
			}
			for _, alert := range alerts {
				// ignore green and yellow alerts
				if alert.Severity != AlertSeverityGreen && alert.Severity != AlertSeverityYellow {
					log.Printf("[WARN] [%d/%d] vfxt: cluster still has active alert %v", retries, ClusterStableRetryCount, alert)
					healthy = false
					break
				}
			}
		}

		if healthy && fullHealthCheck {
			// verify all nodes healthy
			nodes, err := a.GetExistingNodes()
			if err != nil {
				return err
			}
			for _, node := range nodes {
				if node.State != NodeUp {
					log.Printf("[WARN] [%d/%d] node %v not up and in state %v", retries, ClusterStableRetryCount, node, node.State)
					healthy = false
					break
				}
			}
		}

		if fullHealthCheck {
			// the following checks are useful to run before returning to customer

			if healthy && fullHealthCheck {
				// verify vserver is pingable
				result, err := a.VServerIPsPingable()
				if err != nil {
					return err
				}
				healthy = result
				if !healthy {
					log.Printf("[WARN] [%d/%d] vfxt: not all vserver IP addresses are pingable", retries, ClusterStableRetryCount)
				}
			}
		}

		if healthy {
			// the cluster is stable
			break
		}

		if retries > ClusterStableRetryCount {
			return fmt.Errorf("Failure for cluster to become stable after %d retries", retries)
		}
		time.Sleep(ClusterStableRetrySleepSeconds * time.Second)
	}
	return nil
}