in tools/watch/retrywatcher.go [102:240]
func (rw *RetryWatcher) doReceive() (bool, time.Duration) {
watcher, err := rw.watcherClient.Watch(metav1.ListOptions{
ResourceVersion: rw.lastResourceVersion,
AllowWatchBookmarks: true,
})
// We are very unlikely to hit EOF here since we are just establishing the call,
// but it may happen that the apiserver is just shutting down (e.g. being restarted)
// This is consistent with how it is handled for informers
switch err {
case nil:
break
case io.EOF:
// watch closed normally
return false, 0
case io.ErrUnexpectedEOF:
klog.V(1).InfoS("Watch closed with unexpected EOF", "err", err)
return false, 0
default:
msg := "Watch failed"
if net.IsProbableEOF(err) || net.IsTimeout(err) {
klog.V(5).InfoS(msg, "err", err)
// Retry
return false, 0
}
klog.ErrorS(err, msg)
// Retry
return false, 0
}
if watcher == nil {
klog.ErrorS(nil, "Watch returned nil watcher")
// Retry
return false, 0
}
ch := watcher.ResultChan()
defer watcher.Stop()
for {
select {
case <-rw.stopChan:
klog.V(4).InfoS("Stopping RetryWatcher.")
return true, 0
case event, ok := <-ch:
if !ok {
klog.V(4).InfoS("Failed to get event! Re-creating the watcher.", "resourceVersion", rw.lastResourceVersion)
return false, 0
}
// We need to inspect the event and get ResourceVersion out of it
switch event.Type {
case watch.Added, watch.Modified, watch.Deleted, watch.Bookmark:
metaObject, ok := event.Object.(resourceVersionGetter)
if !ok {
_ = rw.send(watch.Event{
Type: watch.Error,
Object: &apierrors.NewInternalError(errors.New("retryWatcher: doesn't support resourceVersion")).ErrStatus,
})
// We have to abort here because this might cause lastResourceVersion inconsistency by skipping a potential RV with valid data!
return true, 0
}
resourceVersion := metaObject.GetResourceVersion()
if resourceVersion == "" {
_ = rw.send(watch.Event{
Type: watch.Error,
Object: &apierrors.NewInternalError(fmt.Errorf("retryWatcher: object %#v doesn't support resourceVersion", event.Object)).ErrStatus,
})
// We have to abort here because this might cause lastResourceVersion inconsistency by skipping a potential RV with valid data!
return true, 0
}
// All is fine; send the non-bookmark events and update resource version.
if event.Type != watch.Bookmark {
ok = rw.send(event)
if !ok {
return true, 0
}
}
rw.lastResourceVersion = resourceVersion
continue
case watch.Error:
// This round trip allows us to handle unstructured status
errObject := apierrors.FromObject(event.Object)
statusErr, ok := errObject.(*apierrors.StatusError)
if !ok {
klog.Error(spew.Sprintf("Received an error which is not *metav1.Status but %#+v", event.Object))
// Retry unknown errors
return false, 0
}
status := statusErr.ErrStatus
statusDelay := time.Duration(0)
if status.Details != nil {
statusDelay = time.Duration(status.Details.RetryAfterSeconds) * time.Second
}
switch status.Code {
case http.StatusGone:
// Never retry RV too old errors
_ = rw.send(event)
return true, 0
case http.StatusGatewayTimeout, http.StatusInternalServerError:
// Retry
return false, statusDelay
default:
// We retry by default. RetryWatcher is meant to proceed unless it is certain
// that it can't. If we are not certain, we proceed with retry and leave it
// up to the user to timeout if needed.
// Log here so we have a record of hitting the unexpected error
// and we can whitelist some error codes if we missed any that are expected.
klog.V(5).Info(spew.Sprintf("Retrying after unexpected error: %#+v", event.Object))
// Retry
return false, statusDelay
}
default:
klog.Errorf("Failed to recognize Event type %q", event.Type)
_ = rw.send(watch.Event{
Type: watch.Error,
Object: &apierrors.NewInternalError(fmt.Errorf("retryWatcher failed to recognize Event type %q", event.Type)).ErrStatus,
})
// We are unable to restart the watch and have to stop the loop or this might cause lastResourceVersion inconsistency by skipping a potential RV with valid data!
return true, 0
}
}
}
}