grok.go (259 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package grok import ( "fmt" "regexp" "strconv" "strings" "github.com/elastic/go-grok/patterns" ) const dotSep = "___" var ( ErrParseFailure = fmt.Errorf("parsing failed") ErrTypeNotProvided = fmt.Errorf("type not specified") ErrUnsupportedName = fmt.Errorf("name contains unsupported character ':'") // grok can be specified in either of these forms: // %{SYNTAX} - e.g {NUMBER} // %{SYNTAX:ID} - e.g {NUMBER:MY_AGE} // %{SYNTAX:ID:TYPE} - e.g {NUMBER:MY_AGE:INT} // supported types are int, long, double, float and boolean // for go specific implementation int and long results in int // double and float both results in float reusePattern = regexp.MustCompile(`%{(\w+(?::[\w+.]+(?::\w+)?)?)}`) ) type Grok struct { patternDefinitions map[string]string re *regexp.Regexp typeHints map[string]string lookupDefaultPatterns bool } func New() *Grok { return &Grok{ patternDefinitions: make(map[string]string), lookupDefaultPatterns: true, } } func NewWithoutDefaultPatterns() *Grok { return &Grok{ patternDefinitions: make(map[string]string), } } func NewWithPatterns(patterns ...map[string]string) (*Grok, error) { g := &Grok{ patternDefinitions: make(map[string]string), lookupDefaultPatterns: true, } for _, p := range patterns { if err := g.AddPatterns(p); err != nil { return nil, err } } return g, nil } // NewComplete creates a grok parser with full set of patterns func NewComplete(additionalPatterns ...map[string]string) (*Grok, error) { g, err := NewWithPatterns( patterns.AWS, patterns.Bind9, patterns.Bro, patterns.Exim, patterns.HAProxy, patterns.Httpd, patterns.Firewalls, patterns.Java, patterns.Junos, patterns.Maven, patterns.MCollective, patterns.MongoDB, patterns.PostgreSQL, patterns.Rails, patterns.Redis, patterns.Ruby, patterns.Squid, patterns.Syslog, ) if err != nil { return nil, err } for _, p := range additionalPatterns { if err := g.AddPatterns(p); err != nil { return nil, err } } return g, nil } func (grok *Grok) AddPattern(name, patternDefinition string) error { if strings.ContainsRune(name, ':') { return ErrUnsupportedName } // overwrite existing if present grok.patternDefinitions[name] = patternDefinition return nil } func (grok *Grok) AddPatterns(patternDefinitions map[string]string) error { // overwrite existing if present for name, patternDefinition := range patternDefinitions { if strings.ContainsRune(name, ':') { return ErrUnsupportedName } grok.patternDefinitions[name] = patternDefinition } return nil } func (grok *Grok) HasCaptureGroups() bool { if grok == nil || grok.re == nil { return false } for _, groupName := range grok.re.SubexpNames() { if groupName != "" { return true } } return false } func (grok *Grok) Compile(pattern string, namedCapturesOnly bool) error { return grok.compile(pattern, namedCapturesOnly) } func (grok *Grok) Match(text []byte) bool { return grok.re.Match(text) } func (grok *Grok) MatchString(text string) bool { return grok.re.MatchString(text) } // ParseString parses text in a form of string and returns map[string]string with values // not converted to types according to hints. // When expression is not a match nil map is returned. func (grok *Grok) ParseString(text string) (map[string]string, error) { return grok.captureString(text) } // Parse parses text in a form of []byte and returns map[string][]byte with values // not converted to types according to hints. // When expression is not a match nil map is returned. func (grok *Grok) Parse(text []byte) (map[string][]byte, error) { return grok.captureBytes(text) } // ParseTyped parses text and returns map[string]interface{} with values // typed according to type hints generated at compile time. // If hint is not found error returned is TypeNotProvided. // When expression is not a match nil map is returned. func (grok *Grok) ParseTyped(text []byte) (map[string]interface{}, error) { captures, err := grok.captureTyped(text) if err != nil { return nil, err } captureBytes := make(map[string]interface{}) for k, v := range captures { captureBytes[k] = v } return captureBytes, nil } // ParseTypedString parses text and returns map[string]interface{} with values // typed according to type hints generated at compile time. // If hint is not found error returned is TypeNotProvided. // When expression is not a match nil map is returned. func (grok *Grok) ParseTypedString(text string) (map[string]interface{}, error) { return grok.ParseTyped([]byte(text)) } func (grok *Grok) compile(pattern string, namedCapturesOnly bool) error { // get expanded pattern expandedExpression, hints, err := grok.expand(pattern, namedCapturesOnly) if err != nil { return err } compiledExpression, err := regexp.Compile(expandedExpression) if err != nil { return err } grok.re = compiledExpression grok.typeHints = hints return nil } func (grok *Grok) captureString(text string) (map[string]string, error) { return captureTypeFn(grok.re, text, func(v, _ string) (string, error) { return v, nil }, ) } func (grok *Grok) captureBytes(text []byte) (map[string][]byte, error) { return captureTypeFn(grok.re, string(text), func(v, _ string) ([]byte, error) { return []byte(v), nil }, ) } func (grok *Grok) captureTyped(text []byte) (map[string]interface{}, error) { return captureTypeFn(grok.re, string(text), grok.convertMatch) } func captureTypeFn[K any](re *regexp.Regexp, text string, conversionFn func(v, key string) (K, error)) (map[string]K, error) { captures := make(map[string]K) matches := re.FindStringSubmatch(text) if len(matches) == 0 { return captures, nil } names := re.SubexpNames() if len(names) == 0 { return captures, nil } for i, name := range names { if len(name) == 0 { continue } match := matches[i] if len(match) == 0 { continue } if conversionFn != nil { v, err := conversionFn(string(match), name) if err != nil { return nil, err } captures[strings.ReplaceAll(name, dotSep, ".")] = v } } return captures, nil } func (grok *Grok) convertMatch(match, name string) (interface{}, error) { hint, found := grok.typeHints[name] if !found { return match, nil } switch hint { case "string": return match, nil case "double": return strconv.ParseFloat(match, 64) case "float": return strconv.ParseFloat(match, 64) case "int": return strconv.Atoi(match) case "long": return strconv.Atoi(match) case "bool": return strconv.ParseBool(match) case "boolean": return strconv.ParseBool(match) default: return nil, fmt.Errorf("invalid type for %v: %w", name, ErrTypeNotProvided) } } // expand processes a pattern and returns expanded regular expression, type hints and error func (grok *Grok) expand(pattern string, namedCapturesOnly bool) (string, map[string]string, error) { hints := make(map[string]string) expandedPattern := pattern // recursion break is guarding against cyclic reference in pattern definitions // as this is performed only once at compile time more clever optimization (e.g detecting cycles in graph) is TBD for recursionBreak := 1000; recursionBreak > 0; recursionBreak-- { subMatches := reusePattern.FindAllStringSubmatch(expandedPattern, -1) if len(subMatches) == 0 { // nothing to expand anymore break } for _, nameSubmatch := range subMatches { // grok can be specified in either of these forms: // %{SYNTAX} - e.g {NUMBER} // %{SYNTAX:ID} - e.g {NUMBER:MY_AGE} // %{SYNTAX:ID:TYPE} - e.g {NUMBER:MY_AGE:INT} // nameSubmatch is equal to [["%{NAME:ID:TYPe}" "NAME:ID:TYPe"]] // we need only inner part nameParts := strings.Split(nameSubmatch[1], ":") grokId := nameParts[0] var targetId string if len(nameParts) > 1 { targetId = strings.ReplaceAll(nameParts[1], ".", dotSep) } else { targetId = nameParts[0] } // compile hints for used patterns if len(nameParts) == 3 { hints[targetId] = nameParts[2] } knownPattern, found := grok.lookupPattern(grokId) if !found { return "", nil, fmt.Errorf("pattern definition %q unknown: %w", grokId, ErrParseFailure) } var replacementPattern string if namedCapturesOnly && len(nameParts) == 1 { // this has no semantic (pattern:foo) so we don't need to capture replacementPattern = "(" + knownPattern + ")" } else { replacementPattern = "(?P<" + targetId + ">" + knownPattern + ")" } // expand pattern with definition expandedPattern = strings.ReplaceAll(expandedPattern, nameSubmatch[0], replacementPattern) } } return expandedPattern, hints, nil } func (grok *Grok) lookupPattern(grokId string) (string, bool) { if knownPattern, found := grok.patternDefinitions[grokId]; found { return knownPattern, found } if grok.lookupDefaultPatterns { if knownPattern, found := patterns.Default[grokId]; found { return knownPattern, found } } return "", false }