spinnaker/terminator.go (134 lines of code) (raw):

// Copyright 2016 Netflix, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package spinnaker import ( "bytes" "encoding/json" "fmt" "io/ioutil" "log" "net/http" "github.com/pkg/errors" "github.com/Netflix/chaosmonkey/v2" ) const terminateType string = "terminateInstances" type ( // killPayload is the POST request body for Spinnaker instance terminations killPayload struct { Application string `json:"application"` Description string `json:"description"` Job []kpJob `json:"job"` } // kpJob is the "job" of killPayload kpJob struct { User string `json:"user"` Type string `json:"type"` Credentials string `json:"credentials"` Region string `json:"region"` ServerGroupName string `json:"serverGroupName"` InstanceIDs []string `json:"instanceIds"` CloudProvider string `json:"cloudProvider"` } // fakeTerminator implements term.Terminator, but it just logs the http requests rather than actually // making them fakeTerminator struct{} ) // NewFakeTerm returns a fake Terminator that prints out what API calls it would make against Spinnaker func NewFakeTerm() chaosmonkey.Terminator { return fakeTerminator{} } // tasksURL returns the Spinnaker tasks URL associated with an app func (s Spinnaker) tasksURL(appName string) string { return s.appURL(appName) + "/tasks" } // Kill implements term.Terminator.Kill func (t fakeTerminator) Execute(trm chaosmonkey.Termination) error { return nil } // Execute implements term.Terminator.Execute func (s Spinnaker) Execute(trm chaosmonkey.Termination) (err error) { ins := trm.Instance url := s.tasksURL(ins.AppName()) otherID, err := s.OtherID(ins) if err != nil { return errors.Wrap(err, "retrieve other id failed") } payload := killJSONPayload(ins, otherID, s.user) resp, err := s.client.Post(url, "application/json", bytes.NewReader(payload)) if err != nil { return errors.Wrap(err, fmt.Sprintf("POST to %s failed, (body '%s')", url, string(payload))) } defer func() { if cerr := resp.Body.Close(); cerr != nil && err == nil { err = errors.Wrap(cerr, fmt.Sprintf("failed to close response body of %s", url)) } }() if resp.StatusCode != http.StatusOK { log.Printf("Unexpected response: %d", resp.StatusCode) contents, err := ioutil.ReadAll(resp.Body) if err != nil { return errors.Wrap(err, "failed to read response body") } return fmt.Errorf("unexpected response code: %d, body: %s", resp.StatusCode, string(contents)) } return nil } // killJsonPayload generates the JSON request body for terminating an instance // otherID is an optional second instance ID, as some backends may have a second // identifer. func killJSONPayload(ins chaosmonkey.Instance, otherID string, spinnakerUser string) []byte { var desc string if otherID != "" { desc = fmt.Sprintf("Chaos Monkey terminate instance: %s %s (%s, %s, %s)", ins.ID(), otherID, ins.AccountName(), ins.RegionName(), ins.ASGName()) } else { desc = fmt.Sprintf("Chaos Monkey terminate instance: %s (%s, %s, %s)", ins.ID(), ins.AccountName(), ins.RegionName(), ins.ASGName()) } p := killPayload{ Application: ins.AppName(), Description: desc, Job: []kpJob{ { User: spinnakerUser, Type: terminateType, Credentials: ins.AccountName(), Region: ins.RegionName(), ServerGroupName: ins.ASGName(), InstanceIDs: []string{ins.ID()}, CloudProvider: ins.CloudProvider(), }, }, } result, err := json.Marshal(p) if err != nil { log.Fatalf("chronos.jsonPayload could not marshal data into json: %v", err) } return result } // OtherID returns the alternate instance id of an instance, if it exists // If there is no alternate instance id, it returns an empty string // This is used by Titus, where we also report the uuid func (s Spinnaker) OtherID(ins chaosmonkey.Instance) (otherID string, err error) { url := s.instanceURL(ins.AccountName(), ins.RegionName(), ins.ID()) resp, err := s.client.Get(url) if err != nil { return "", errors.Wrap(err, fmt.Sprintf("get failed on %s", url)) } defer func() { if cerr := resp.Body.Close(); cerr != nil && err == nil { err = errors.Wrap(cerr, fmt.Sprintf("failed to close response body from %s", url)) } }() body, err := ioutil.ReadAll(resp.Body) if err != nil { return "", errors.Wrap(err, fmt.Sprintf("body read failed at %s", url)) } // Example of response body: /* { ... "health": [ { "type": "Titus", "healthClass": "platform", "state": "Up" }, { "instanceId": "55fe33ab-5b66-450a-85f7-f3129806b87f", "titusTaskId": "Titus-123456-worker-0-0", ... } ], } */ var fields struct { Health []map[string]interface{} `json:"health"` Error string `json:"error"` } err = json.Unmarshal(body, &fields) if err != nil { return "", errors.Wrap(err, fmt.Sprintf("json unmarshal failed, body: %s", body)) } if resp.StatusCode != http.StatusOK { if fields.Error == "" { return "", fmt.Errorf("unexpected status code: %d. body: %s", resp.StatusCode, body) } return "", fmt.Errorf("unexpected status code: %d. error: %s", resp.StatusCode, fields.Error) } // In some cases, an instance may be missing health information. // We just return a blank otherID in that case if len(fields.Health) < 2 { return "", nil } otherID, ok := fields.Health[1]["instanceId"].(string) if !ok { return "", nil } // If the instance id is the same, there is no alternate if ins.ID() == otherID { return "", nil } return otherID, nil }