in swim/heal_via_discover_provider.go [122:179]
func (h *discoverProviderHealer) Heal() ([]string, error) {
h.node.EmitEvent(DiscoHealEvent{})
// get list from discovery provider
if h.node.discoverProvider == nil {
return []string{}, errors.New("discoverProvider not available to healer")
}
hostList, err := h.node.discoverProvider.Hosts()
if err != nil {
h.logger.Warn("healer unable to receive host list from discover provider")
return []string{}, err
}
h.previousHostListSize = len(hostList)
// collect the targets this node might want to heal with
var targets []string
for _, address := range hostList {
m, ok := h.node.memberlist.Member(address)
if !ok || statePrecedence(m.Status) >= statePrecedence(Faulty) {
targets = append(targets, address)
}
}
util.ShuffleStringsInPlace(targets)
// filter hosts that we already know about and attempt to heal nodes that
// are complementary to the membership of this node.
var ret []string
failures := 0
maxFailures := 10
for len(targets) != 0 && failures < maxFailures {
target := targets[0]
targets = del(targets, target)
// try to heal partition
hostsOnOtherSide, err := AttemptHeal(h.node, target)
if err != nil {
h.logger.WithFields(log.Fields{
"error": err.Error(),
"failure": failures,
}).Warn("heal attempt failed (10 in total)")
failures++
continue
}
for _, host := range hostsOnOtherSide {
targets = del(targets, host)
}
ret = append(ret, target)
}
if failures == maxFailures {
h.logger.WithField("reachedNodes", len(ret)).Warn("healer reached max failures")
}
return ret, nil
}