func()

in pkg/providers/v1/aws.go [2429:2544]


func (d *awsDisk) waitForAttachmentStatus(status string, expectedInstance, expectedDevice string, alreadyAttached bool) (*ec2.VolumeAttachment, error) {
	backoff := wait.Backoff{
		Duration: volumeAttachmentStatusPollDelay,
		Factor:   volumeAttachmentStatusFactor,
		Steps:    volumeAttachmentStatusSteps,
	}

	// Because of rate limiting, we often see errors from describeVolume.
	// Or AWS eventual consistency returns unexpected data.
	// So we tolerate a limited number of failures.
	// But once we see more than 10 errors in a row, we return the error.
	errorCount := 0

	// Attach/detach usually takes time. It does not make sense to start
	// polling DescribeVolumes before some initial delay to let AWS
	// process the request.
	time.Sleep(getInitialAttachDetachDelay(status))

	var attachment *ec2.VolumeAttachment

	err := wait.ExponentialBackoff(backoff, func() (bool, error) {
		info, err := d.describeVolume()
		if err != nil {
			// The VolumeNotFound error is special -- we don't need to wait for it to repeat
			if isAWSErrorVolumeNotFound(err) {
				if status == volumeDetachedStatus {
					// The disk doesn't exist, assume it's detached, log warning and stop waiting
					klog.Warningf("Waiting for volume %q to be detached but the volume does not exist", d.awsID)
					stateStr := "detached"
					attachment = &ec2.VolumeAttachment{
						State: &stateStr,
					}
					return true, nil
				}
				if status == volumeAttachedStatus {
					// The disk doesn't exist, complain, give up waiting and report error
					klog.Warningf("Waiting for volume %q to be attached but the volume does not exist", d.awsID)
					return false, err
				}
			}
			errorCount++
			if errorCount > volumeAttachmentStatusConsecutiveErrorLimit {
				// report the error
				return false, err
			}

			klog.Warningf("Ignoring error from describe volume for volume %q; will retry: %q", d.awsID, err)
			return false, nil
		}

		if len(info.Attachments) > 1 {
			// Shouldn't happen; log so we know if it is
			klog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info)
		}
		attachmentStatus := ""
		for _, a := range info.Attachments {
			if attachmentStatus != "" {
				// Shouldn't happen; log so we know if it is
				klog.Warningf("Found multiple attachments for volume %q: %v", d.awsID, info)
			}
			if a.State != nil {
				attachment = a
				attachmentStatus = *a.State
			} else {
				// Shouldn't happen; log so we know if it is
				klog.Warningf("Ignoring nil attachment state for volume %q: %v", d.awsID, a)
			}
		}
		if attachmentStatus == "" {
			attachmentStatus = volumeDetachedStatus
		}
		if attachment != nil {
			// AWS eventual consistency can go back in time.
			// For example, we're waiting for a volume to be attached as /dev/xvdba, but AWS can tell us it's
			// attached as /dev/xvdbb, where it was attached before and it was already detached.
			// Retry couple of times, hoping AWS starts reporting the right status.
			device := aws.StringValue(attachment.Device)
			if expectedDevice != "" && device != "" && device != expectedDevice {
				klog.Warningf("Expected device %s %s for volume %s, but found device %s %s", expectedDevice, status, d.name, device, attachmentStatus)
				errorCount++
				if errorCount > volumeAttachmentStatusConsecutiveErrorLimit {
					// report the error
					return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device)
				}
				return false, nil
			}
			instanceID := aws.StringValue(attachment.InstanceId)
			if expectedInstance != "" && instanceID != "" && instanceID != expectedInstance {
				klog.Warningf("Expected instance %s/%s for volume %s, but found instance %s/%s", expectedInstance, status, d.name, instanceID, attachmentStatus)
				errorCount++
				if errorCount > volumeAttachmentStatusConsecutiveErrorLimit {
					// report the error
					return false, fmt.Errorf("attachment of disk %q failed: requested device %q but found %q", d.name, expectedDevice, device)
				}
				return false, nil
			}
		}

		// if we expected volume to be attached and it was reported as already attached via DescribeInstance call
		// but DescribeVolume told us volume is detached, we will short-circuit this long wait loop and return error
		// so as AttachDisk can be retried without waiting for 20 minutes.
		if (status == volumeAttachedStatus) && alreadyAttached && (attachmentStatus != status) {
			return false, fmt.Errorf("attachment of disk %q failed, expected device to be attached but was %s", d.name, attachmentStatus)
		}

		if attachmentStatus == status {
			// Attachment is in requested state, finish waiting
			return true, nil
		}
		// continue waiting
		errorCount = 0
		klog.V(2).Infof("Waiting for volume %q state: actual=%s, desired=%s", d.awsID, attachmentStatus, status)
		return false, nil
	})
	return attachment, err
}