hyperbahn/advertise.go (78 lines of code) (raw):

// Copyright (c) 2015 Uber Technologies, Inc. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package hyperbahn import ( "fmt" "math/rand" "time" "github.com/uber/tchannel-go" ) const ( // maxAdvertiseFailures is the number of consecutive advertise failures after // which we give up and trigger an OnError event. maxAdvertiseFailures = 5 // advertiseInterval is the base time interval between advertisements. advertiseInterval = 50 * time.Second // advertiseFuzzInterval is the maximum fuzz period to add to advertiseInterval. advertiseFuzzInterval = 20 * time.Second // advertiseRetryInterval is the unfuzzed base duration to wait before retry on the first // advertise failure. Successive retries will use 2 * previous base duration. advertiseRetryInterval = 1 * time.Second ) // ErrAdvertiseFailed is triggered when advertise fails. type ErrAdvertiseFailed struct { // WillRetry is set to true if advertise will be retried. WillRetry bool // Cause is the underlying error returned from the advertise call. Cause error } func (e ErrAdvertiseFailed) Error() string { return fmt.Sprintf("advertise failed, retry: %v, cause: %v", e.WillRetry, e.Cause) } // fuzzInterval returns a fuzzed version of the interval based on FullJitter as described here: // http://www.awsarchitectureblog.com/2015/03/backoff.html func fuzzInterval(interval time.Duration) time.Duration { return time.Duration(rand.Int63n(int64(interval))) } // fuzzedAdvertiseInterval returns the time to sleep between successful advertisements. func (c *Client) fuzzedAdvertiseInterval() time.Duration { return advertiseInterval + fuzzInterval(advertiseFuzzInterval) } // logFailedRegistrationRetry logs either a warning or info depending on the number of // consecutiveFailures. If consecutiveFailures > maxAdvertiseFailures, then we log a warning. func (c *Client) logFailedRegistrationRetry(errLogger tchannel.Logger, consecutiveFailures uint) { logFn := errLogger.Info if consecutiveFailures > maxAdvertiseFailures { logFn = errLogger.Warn } logFn("Hyperbahn client registration failed, will retry.") } // advertiseLoop readvertises the service approximately every minute (with some fuzzing). func (c *Client) advertiseLoop() { sleepFor := c.fuzzedAdvertiseInterval() consecutiveFailures := uint(0) for { c.sleep(sleepFor) if c.IsClosed() { c.tchan.Logger().Infof("Hyperbahn client closed") return } if err := c.sendAdvertise(); err != nil { consecutiveFailures++ errLogger := c.tchan.Logger().WithFields(tchannel.ErrField(err)) if consecutiveFailures >= maxAdvertiseFailures && c.opts.FailStrategy == FailStrategyFatal { c.opts.Handler.OnError(ErrAdvertiseFailed{Cause: err, WillRetry: false}) errLogger.Fatal("Hyperbahn client registration failed.") } c.logFailedRegistrationRetry(errLogger, consecutiveFailures) c.opts.Handler.OnError(ErrAdvertiseFailed{Cause: err, WillRetry: true}) // Even after many failures, cap backoff. if consecutiveFailures < maxAdvertiseFailures { sleepFor = fuzzInterval(advertiseRetryInterval * time.Duration(1<<consecutiveFailures)) } } else { c.opts.Handler.On(Readvertised) sleepFor = c.fuzzedAdvertiseInterval() consecutiveFailures = 0 } } } // initialAdvertise will do the initial Advertise call to Hyperbahn with additional // retries on top of the built-in TChannel retries. It will use exponential backoff // between each of the call attempts. func (c *Client) initialAdvertise() error { var err error for attempt := uint(0); attempt < maxAdvertiseFailures; attempt++ { err = c.sendAdvertise() if err == nil || err == errEphemeralPeer { break } c.tchan.Logger().WithFields(tchannel.ErrField(err)).Info( "Hyperbahn client initial registration failure, will retry") // Back off for a while. sleepFor := fuzzInterval(advertiseRetryInterval * time.Duration(1<<attempt)) c.sleep(sleepFor) } return err } func (c *Client) sleep(d time.Duration) { c.opts.TimeSleep(d) }