processors/dissect/parser.go (78 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package dissect import ( "sort" ) // parser extracts the useful information from the raw tokenizer string, fields, delimiters and // skip fields. type parser struct { delimiters []delimiter fields []field fieldsIDMap map[int]int referenceFields []field } var isIndirectField = func(field field) bool { switch field.(type) { case indirectField: return true default: return false } } func newParser(tokenizer string) (*parser, error) { // returns pair of delimiter + key matches := delimiterRE.FindAllStringSubmatchIndex(tokenizer, -1) if len(matches) == 0 { return nil, errInvalidTokenizer } var delimiters []delimiter //nolint:prealloc // we do not know how big it is going to be var fields []field //nolint:prealloc // we do not know how big it is going to be pos := 0 for id, m := range matches { d := newDelimiter(tokenizer[m[2]:m[3]]) key := tokenizer[m[4]:m[5]] field, err := newField(id, key, d) if err != nil { return nil, err } if field.IsGreedy() { d.MarkGreedy() } fields = append(fields, field) delimiters = append(delimiters, d) pos = m[5] + 1 } if pos < len(tokenizer) { d := newDelimiter(tokenizer[pos:]) delimiters = append(delimiters, d) } // Chain delimiters between them to make it easier to match them with the string. // Some delimiters also need information about their surrounding for decision. for i := 0; i < len(delimiters); i++ { if i+1 < len(delimiters) { delimiters[i].SetNext(delimiters[i+1]) } } // group and order append field at the end so the string join is from left to right. sort.Slice(fields, func(i, j int) bool { return fields[i].Ordinal() < fields[j].Ordinal() }) fieldsIDMap := make(map[int]int) for i, f := range fields { fieldsIDMap[f.ID()] = i } // List of fields needed for indirection but don't need to appear in the final event. var referenceFields []field for _, f := range fields { if !f.IsSaveable() { referenceFields = append(referenceFields, f) } } return &parser{ delimiters: delimiters, fields: fields, fieldsIDMap: fieldsIDMap, referenceFields: referenceFields, }, nil } func filterFieldsWith(fields []field, predicate func(field) bool) []field { var filtered []field for _, field := range fields { if predicate(field) { filtered = append(filtered, field) } } return filtered }