processors/dissect/dissect.go (206 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package dissect import ( "errors" "fmt" "net" "strconv" "strings" "github.com/elastic/elastic-agent-libs/mapstr" ) // Map represents the keys and their values extracted with the defined tokenizer. type Map = map[string]string type MapConverted = map[string]interface{} // positions represents the start and end position of the keys found in the string. type positions []position type position struct { start int end int } // Dissector is a tokenizer based on the Dissect syntax as defined at: // https://www.elastic.co/guide/en/logstash/current/plugins-filters-dissect.html type Dissector struct { raw string parser *parser trimmer trimmer } // Dissect takes the raw string and will use the defined tokenizer to return a map with the // extracted keys and their values. // // Dissect uses a 3 steps process: // - Find the key positions // - Extract and resolve the keys (append / indirect) // - Ignore namedSkipField func (d *Dissector) Dissect(s string) (Map, error) { if len(s) == 0 { return nil, errEmpty } positions, err := d.extract(s) if err != nil { return nil, err } if len(positions) == 0 { return nil, errParsingFailure } if d.trimmer != nil { for idx, pos := range positions { pos.start, pos.end = d.trimmer.Trim(s, pos.start, pos.end) positions[idx] = pos } } return d.resolve(s, positions), nil } func (d *Dissector) DissectConvert(s string) (MapConverted, error) { if len(s) == 0 { return nil, errEmpty } positions, err := d.extract(s) if err != nil { return nil, err } if len(positions) == 0 { return nil, errParsingFailure } return d.resolveConvert(s, positions), nil } // Raw returns the raw tokenizer used to generate the actual parser. func (d *Dissector) Raw() string { return d.raw } // extract will navigate through the delimiters and will save the ending and starting position // of the keys. After we will resolve the positions with the required fields and do the reordering. func (d *Dissector) extract(s string) (positions, error) { positions := make([]position, len(d.parser.fields)) var i, start, lookahead, end int // Position on the first delimiter, we assume a hard match on the first delimiter. // Previous version of dissect was doing a lookahead in the string until it can find the delimiter, // LS and Beats now have the same behavior and this is consistent with the principle of least // surprise. dl := d.parser.delimiters[0] offset := dl.IndexOf(s, 0) if offset == -1 || offset != 0 { return nil, fmt.Errorf( "could not find beginning delimiter: `%s` in remaining: `%s`, (offset: %d)", dl.Delimiter(), s, 0, ) } offset += dl.Len() // move through all the other delimiters, until we have consumed all of them. for dl.Next() != nil { start = offset // corresponding field of the delimiter field := d.parser.fields[d.parser.fieldsIDMap[i]] // for fixed-length field, just step the same size of its length if field.IsFixedLength() { end = offset + field.Length() if end > len(s) { return nil, fmt.Errorf( "field length is grater than string length: remaining: `%s`, (offset: %d), field: %s", s[offset:], offset, field, ) } } else { end = dl.Next().IndexOf(s, offset) if end == -1 { return nil, fmt.Errorf( "could not find delimiter: `%s` in remaining: `%s`, (offset: %d)", dl.Delimiter(), s[offset:], offset, ) } } offset = end // Greedy consumes keys defined with padding. // Keys are defined with `->` suffix. if dl.IsGreedy() { for { lookahead = dl.Next().IndexOf(s, offset+1) if lookahead != offset+1 { break } else { offset = lookahead } } } positions[i] = position{start: start, end: end} offset += dl.Next().Len() i++ dl = dl.Next() } field := d.parser.fields[d.parser.fieldsIDMap[i]] if field.IsFixedLength() && offset+field.Length() != len(s) { return nil, fmt.Errorf("last fixed length key `%s` (length: %d) does not fit into remaining: `%s`, (offset: %d)", field, field.Length(), s, offset, ) } // If we have remaining contents and have not captured all the requested fields if offset < len(s) && i < len(d.parser.fields) { positions[i] = position{start: offset, end: len(s)} } return positions, nil } // resolve takes the raw string and the extracted positions and apply fields syntax. func (d *Dissector) resolve(s string, p positions) Map { m := make(Map, len(p)) for _, f := range d.parser.fields { pos := p[f.ID()] f.Apply(s[pos.start:pos.end], m) } for _, f := range d.parser.referenceFields { delete(m, f.Key()) } return m } func (d *Dissector) resolveConvert(s string, p positions) MapConverted { lookup := make(mapstr.M, len(p)) m := make(Map, len(p)) mc := make(MapConverted, len(p)) for _, f := range d.parser.fields { pos := p[f.ID()] f.Apply(s[pos.start:pos.end], m) // using map[string]string to avoid another set of apply methods if !f.IsSaveable() { lookup[f.Key()] = s[pos.start:pos.end] } else { key := f.Key() if k, ok := lookup[f.Key()]; ok { key, _ = k.(string) } v := m[key] if f.DataType() != "" { mc[key] = convertData(f.DataType(), v) } else { mc[key] = v } } } for _, f := range d.parser.referenceFields { delete(mc, f.Key()) } return mc } // New creates a new Dissector from a tokenized string. func New(tokenizer string) (*Dissector, error) { p, err := newParser(tokenizer) if err != nil { return nil, err } if err := validate(p); err != nil { return nil, err } return &Dissector{parser: p, raw: tokenizer}, nil } // strToInt is a helper to interpret a string as either base 10 or base 16. func strToInt(s string, bitSize int) (int64, error) { base := 10 if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") { // strconv.ParseInt will accept the '0x' or '0X` prefix only when base is 0. base = 0 } return strconv.ParseInt(s, base, bitSize) } func transformType(typ dataType, value string) (interface{}, error) { value = strings.TrimRight(value, " ") switch typ { case String: return value, nil case Long: return strToInt(value, 64) case Integer: i, err := strToInt(value, 32) return int32(i), err case Float: f, err := strconv.ParseFloat(value, 32) return float32(f), err case Double: d, err := strconv.ParseFloat(value, 64) return d, err case Boolean: return strconv.ParseBool(value) case IP: if net.ParseIP(value) != nil { return value, nil } return "", errors.New("value is not a valid IP address") default: return value, nil } } func convertData(typ string, b string) interface{} { if dt, ok := dataTypeNames[typ]; ok { value, err := transformType(dt, b) if err == nil { return value } } return b }