npm/util/ioutil/restore.go (274 lines of code) (raw):

//go:build !windows // +build !windows package ioutil import ( "bytes" "fmt" "regexp" "strconv" "strings" "github.com/Azure/azure-container-networking/common" "github.com/Azure/azure-container-networking/npm/metrics" "github.com/Azure/azure-container-networking/npm/util" npmerrors "github.com/Azure/azure-container-networking/npm/util/errors" "k8s.io/klog" ) // FileCreator is a tool for: // - building a buffer file // - running a command with the file // - handling errors in the file type FileCreator struct { lines []*Line sections map[string]*Section // key is sectionID lineNumbersToOmit map[int]struct{} errorsToRetryOn []*ErrorDefinition lineFailureDefinitions []*ErrorDefinition tryCount int maxTryCount int ioShim *common.IOShim verbose bool } // TODO ideas: // - section to error handler(s) map for addLine // - error handlers have the kind of line error pattern as a requirement // Line defines the content, section, and error handlers for a line type Line struct { content string sectionID string errorHandlers []*LineErrorHandler } // Section is a logically connected components (not necessarily adjacent lines) type Section struct { id string lineNums []int } // ErrorDefinition defines an error by a regular expression and its error code. type ErrorDefinition struct { matchPattern string re *regexp.Regexp } // LineErrorHandler defines an error and how to handle it type LineErrorHandler struct { Definition *ErrorDefinition Method LineErrorHandlerMethod Callback func() } // LineErrorHandlerMethod defines behavior when an error occurs type LineErrorHandlerMethod string const ( // Continue specifies skipping this line and all previous lines Continue LineErrorHandlerMethod = "continue" // ContinueAndAbortSection specifies skipping this line, all previous lines, and all lines tied to this line's section ContinueAndAbortSection LineErrorHandlerMethod = "continue-and-abort" anyMatchPattern = ".*" ) // AlwaysMatchDefinition will match any error var AlwaysMatchDefinition = NewErrorDefinition(anyMatchPattern) func NewFileCreator(ioShim *common.IOShim, maxTryCount int, lineFailurePatterns ...string) *FileCreator { creator := &FileCreator{ lines: make([]*Line, 0), sections: make(map[string]*Section), lineNumbersToOmit: make(map[int]struct{}), errorsToRetryOn: make([]*ErrorDefinition, 0), lineFailureDefinitions: make([]*ErrorDefinition, len(lineFailurePatterns)), tryCount: 0, maxTryCount: maxTryCount, ioShim: ioShim, } for k, lineFailurePattern := range lineFailurePatterns { creator.lineFailureDefinitions[k] = NewErrorDefinition(lineFailurePattern) } return creator } func (creator *FileCreator) Verbose() { creator.verbose = true } func NewErrorDefinition(pattern string) *ErrorDefinition { return &ErrorDefinition{ matchPattern: pattern, re: regexp.MustCompile(pattern), } } func (creator *FileCreator) AddErrorToRetryOn(definition *ErrorDefinition) { creator.errorsToRetryOn = append(creator.errorsToRetryOn, definition) } func (creator *FileCreator) AddLine(sectionID string, errorHandlers []*LineErrorHandler, items ...string) { section, exists := creator.sections[sectionID] if !exists { section = &Section{sectionID, make([]int, 0)} creator.sections[sectionID] = section } spaceSeparatedItems := strings.Join(items, " ") line := &Line{spaceSeparatedItems, sectionID, errorHandlers} creator.lines = append(creator.lines, line) section.lineNums = append(section.lineNums, len(creator.lines)-1) } // ToString combines the lines in the FileCreator and ends with a new line. func (creator *FileCreator) ToString() string { result := strings.Builder{} for lineNum, line := range creator.lines { _, isOmitted := creator.lineNumbersToOmit[lineNum] if !isOmitted { result.WriteString(line.content + "\n") } } return result.String() } func (creator *FileCreator) RunCommandWithFile(cmd string, args ...string) error { fileString := creator.ToString() wasFileAltered, err := creator.runCommandOnceWithFile(fileString, cmd, args...) if err == nil { return nil } commandString := cmd + " " + strings.Join(args, " ") for !creator.hasNoMoreRetries() { sameNew := "same" if wasFileAltered { sameNew = "updated" } msg := fmt.Sprintf("on try number %d, failed to run command [%s]. Rerunning with %s file. err: [%s]", creator.tryCount, commandString, sameNew, err.Error()) metrics.SendErrorLogAndMetric(util.UtilID, "error: %s", msg) if wasFileAltered { // get the new file contents fileString = creator.ToString() } wasFileAltered, err = creator.runCommandOnceWithFile(fileString, cmd, args...) if err == nil { klog.Infof("successfully ran command [%s] on try number %d", commandString, creator.tryCount) return nil } } errString := fmt.Sprintf("after %d tries, failed to run command [%s] with error: %v", creator.tryCount, commandString, err) // TODO conditionally specify as retriable? return npmerrors.Errorf(npmerrors.RunFileCreator, false, errString) } // RunCommandOnceWithFile runs the command with the file once and increments the try count. // It returns whether the file was altered and any error. // For automatic retrying and proper logging, use RunCommandWithFile. // This method can be used for external testing of file creator contents after each run. func (creator *FileCreator) RunCommandOnceWithFile(cmd string, args ...string) (bool, error) { if creator.hasNoMoreRetries() { return false, npmerrors.Errorf(npmerrors.RunFileCreator, false, fmt.Sprintf("reached max try count %d", creator.tryCount)) } fileString := creator.ToString() return creator.runCommandOnceWithFile(fileString, cmd, args...) } // returns whether the file was altered and any error // TODO return another bool that specifies if there was a file-level retriable error? func (creator *FileCreator) runCommandOnceWithFile(fileString, cmd string, args ...string) (bool, error) { commandString := cmd + " " + strings.Join(args, " ") if fileString == "" { // NOTE this wouldn't prevent us from running an iptables restore file with just "COMMIT\n" klog.Infof("returning as a success without running command [%s] since the fileString is empty", commandString) return false, nil } // TODO: Refactor non-error/warning klogs with Zap and set the following logs to "debug" level // klog.Infof("running this restore command: [%s]", commandString) if creator.verbose { creator.logLines(commandString) } creator.tryCount++ command := creator.ioShim.Exec.Command(cmd, args...) command.SetStdin(bytes.NewBufferString(fileString)) // run the command stdErrBytes, err := command.CombinedOutput() if err == nil { // success return false, nil } stdErr := string(stdErrBytes) err = fmt.Errorf("error running command [%s] with err [%w] and stdErr [%s]", commandString, err, stdErr) if creator.hasNoMoreRetries() { return false, err } // begin the retry logic if creator.hasFileLevelError(stdErr) { return false, npmerrors.SimpleErrorWrapper("file-level error", err) } // no file-level error, so handle line-level error if there is one numLines := creator.numLines() for _, lineFailureDefinition := range creator.lineFailureDefinitions { lineNum := lineFailureDefinition.getErrorLineNumber(stdErr, commandString, numLines) if lineNum != -1 { wasFileAltered, line := creator.handleLineError(stdErr, commandString, lineNum) return wasFileAltered, npmerrors.SimpleErrorWrapper(fmt.Sprintf("line-number error for line [%s]", line.content), err) } } return false, npmerrors.SimpleErrorWrapper("unknown error", err) } func (creator *FileCreator) hasNoMoreRetries() bool { return creator.tryCount >= creator.maxTryCount } func (creator *FileCreator) hasFileLevelError(stdErr string) bool { for _, errorDefinition := range creator.errorsToRetryOn { if errorDefinition.isMatch(stdErr) { return true } } return false } func (definition *ErrorDefinition) isMatch(stdErr string) bool { return definition.matchPattern == anyMatchPattern || definition.re.MatchString(stdErr) } func (creator *FileCreator) numLines() int { return len(creator.lines) - len(creator.lineNumbersToOmit) } // return -1 if there's a failure func (definition *ErrorDefinition) getErrorLineNumber(stdErr, commandString string, numLines int) int { result := definition.re.FindStringSubmatch(stdErr) if result == nil || len(result) < 2 { metrics.SendErrorLogAndMetric(util.UtilID, "expected error with line number, but couldn't detect one with error regex pattern [%s] for command [%s] with stdErr [%s]", definition.matchPattern, commandString, stdErr) return -1 } lineNumString := result[1] lineNum, err := strconv.Atoi(lineNumString) if err != nil { metrics.SendErrorLogAndMetric(util.UtilID, "expected error with line number, but error regex pattern %s didn't produce a number for command [%s] with stdErr [%s]", definition.matchPattern, commandString, stdErr) return -1 } if lineNum < 1 || lineNum > numLines { metrics.SendErrorLogAndMetric(util.UtilID, "expected error with line number, but error regex pattern %s produced an invalid line number %d for command [%s] with stdErr [%s]", definition.matchPattern, lineNum, commandString, stdErr, ) return -1 } return lineNum } // return whether the file was altered func (creator *FileCreator) handleLineError(stdErr, commandString string, lineNum int) (bool, *Line) { lineIndex := 0 currentLineNum := 1 for i := range creator.lines { if _, isOmitted := creator.lineNumbersToOmit[i]; isOmitted { continue } if currentLineNum == lineNum { lineIndex = i break } currentLineNum++ } line := creator.lines[lineIndex] for _, errorHandler := range line.errorHandlers { if !errorHandler.Definition.isMatch(stdErr) { continue } switch errorHandler.Method { case Continue: klog.Infof("continuing after line %d for command [%s]", lineNum, commandString) for i := 0; i <= lineIndex; i++ { creator.lineNumbersToOmit[i] = struct{}{} } case ContinueAndAbortSection: klog.Infof("continuing after line %d and aborting section [%s] for command [%s]", lineNum, line.sectionID, commandString) for i := 0; i <= lineIndex; i++ { creator.lineNumbersToOmit[i] = struct{}{} } section := creator.sections[line.sectionID] for _, lineNum := range section.lineNums { creator.lineNumbersToOmit[lineNum] = struct{}{} } } errorHandler.Callback() return true, creator.lines[lineIndex] } return false, creator.lines[lineIndex] } func (creator *FileCreator) logLines(commandString string) { if creator.tryCount == 0 { // print every line lineNum := 1 for i, line := range creator.lines { if _, ok := creator.lineNumbersToOmit[i]; ok { metrics.SendErrorLogAndMetric(util.UtilID, "unexpectedly seeing an omitted line for tryCount=0. line num: %d", i) continue } klog.Infof("line %d of restore command [%s] with section ID [%s]: [%s]", lineNum, commandString, line.sectionID, line.content) lineNum++ } return } // don't print every line because printing all lines can pollute the logs and we already know the lines if len(creator.lineNumbersToOmit) == 0 { klog.Infof("on try %d of restore command [%s]. repeating with same lines", creator.tryCount, commandString) return } lineNumMappings := make([]string, 0, creator.numLines()) lineNum := 1 for i := range creator.lines { if _, ok := creator.lineNumbersToOmit[i]; ok { continue } // this mapping could be off if we unexpectedly saw an omitted line for the first try (see error log in branch above) lineNumMappings = append(lineNumMappings, fmt.Sprintf("%d->%d", lineNum, i+1)) lineNum++ } klog.Infof("on try %d of restore command [%s]. mapping of current line numbers to original line numbers: %+v", creator.tryCount, commandString, lineNumMappings) }