in integration_test/gce-testing-internal/gce/gce_testing.go [2170:2231]
func waitForStartLinux(ctx context.Context, logger *log.Logger, vm *VM) error {
var backoffPolicy backoff.BackOff
backoffPolicy = backoff.NewConstantBackOff(vmInitBackoffDuration)
if IsSUSEImageSpec(vm.ImageSpec) {
// Give up early on SUSE due to b/186426190. If this step times out, the
// error will be retried with a fresh VM.
backoffPolicy = backoff.WithMaxRetries(backoffPolicy, uint64((5*time.Minute)/vmInitBackoffDuration))
}
backoffPolicy = backoff.WithContext(backoffPolicy, ctx)
// Returns an error if system startup is still ongoing.
// Hopefully, waiting for system startup to finish will avoid some
// hard-to-debug flaky issues like:
// * b/180518814 (ubuntu, sles)
// * b/148612123 (sles)
isStartupDone := func() error {
ctx, cancel := context.WithTimeout(ctx, vmInitPokeSSHTimeout)
defer cancel()
output, err := RunRemotely(ctx, logger, vm, "systemctl is-system-running")
// There are a few cases for what is-system-running returns:
// https://www.freedesktop.org/software/systemd/man/systemctl.html#is-system-running
// If the command failed due to SSH issues, the stdout should be "".
state := strings.TrimSpace(output.Stdout)
if state == "running" {
return nil
}
if state == "degraded" {
// Even though some services failed to start, it's worth continuing
// to run the test. There are various unnecessary services that could be
// failing, see b/185473981 and b/185182238 for some examples.
// But let's at least print out which services failed into the logs.
RunRemotely(ctx, logger, vm, "systemctl --failed")
return nil
}
// There are several reasons this could be failing, but usually if we get
// here, that just means that ssh is not ready yet or the VM is in some
// kind of non-ready state, like "starting".
return err
}
if err := backoff.Retry(isStartupDone, backoffPolicy); err != nil {
return fmt.Errorf("%v. Last err=%v", startupFailedMessage, err)
}
if IsSUSEImageSpec(vm.ImageSpec) {
// TODO(b/259122953): SUSE needs additional startup time. Remove once we have more
// sensible/deterministic workarounds for each of the individual problems.
time.Sleep(slesStartupDelay)
// TODO(b/259122953): wait until sudo is ready
backoffPolicy := backoff.WithContext(backoff.WithMaxRetries(backoff.NewConstantBackOff(slesStartupSudoDelay), slesStartupSudoMaxAttempts), ctx)
err := backoff.Retry(func() error {
_, err := RunRemotely(ctx, logger, vm, "sudo ls /root")
return err
}, backoffPolicy)
if err != nil {
return fmt.Errorf("exceeded retries trying to get sudo: %v", err)
}
}
return nil
}