aucoalesce/normalize.go (179 lines of code) (raw):

// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package aucoalesce import ( "bytes" _ "embed" "fmt" "strings" "gopkg.in/yaml.v3" ) var ( //go:embed normalizations.yaml normalizationDataYAML []byte syscallNorms map[string]*Normalization recordTypeNorms map[string][]*Normalization ) func init() { var err error syscallNorms, recordTypeNorms, err = LoadNormalizationConfig(normalizationDataYAML) if err != nil { panic(fmt.Errorf("failed to parse built in normalization mappings: %w", err)) } } // Strings is a custom type to enable YAML values that can be either a string // or a list of strings. type Strings struct { Values []string } var _ yaml.Unmarshaler = (*Strings)(nil) func (s *Strings) UnmarshalYAML(n *yaml.Node) error { var singleValue string if err := n.Decode(&singleValue); err == nil { s.Values = []string{singleValue} return nil } return n.Decode(&s.Values) } type NormalizationConfig struct { Macros []any `yaml:"macros"` Normalizations []Normalization `yaml:"normalizations"` } type Normalization struct { SubjectPrimaryFieldName Strings `yaml:"subject_primary"` SubjectSecondaryFieldName Strings `yaml:"subject_secondary"` Action string `yaml:"action"` ObjectPrimaryFieldName Strings `yaml:"object_primary"` ObjectSecondaryFieldName Strings `yaml:"object_secondary"` ObjectWhat string `yaml:"object_what"` ObjectPathIndex int `yaml:"object_path_index"` How Strings `yaml:"how"` RecordTypes Strings `yaml:"record_types"` Syscalls Strings `yaml:"syscalls"` SourceIP Strings `yaml:"source_ip"` HasFields Strings `yaml:"has_fields"` // Apply the normalization if all fields are present. ECS ECSMapping `yaml:"ecs"` Description string `yaml:"description,omitempty"` } type ECSFieldMapping struct { From readReference `yaml:"from" json:"from"` To writeReference `yaml:"to" json:"to"` } type ECSMapping struct { Kind string `yaml:"kind"` Category Strings `yaml:"category"` Type Strings `yaml:"type"` Mappings []ECSFieldMapping `yaml:"mappings"` } type ( readReference func(*Event) string writeReference func(*Event, string) ) var ( _ yaml.Unmarshaler = (*readReference)(nil) _ yaml.Unmarshaler = (*writeReference)(nil) ) var ( fromFieldReferences = map[string]readReference{ "subject.primary": func(event *Event) string { return event.Summary.Actor.Primary }, "subject.secondary": func(event *Event) string { return event.Summary.Actor.Secondary }, "object.primary": func(event *Event) string { return event.Summary.Object.Primary }, "object.secondary": func(event *Event) string { return event.Summary.Object.Secondary }, } fromDictReferences = map[string]func(key string) readReference{ "data": func(key string) readReference { return func(event *Event) string { return event.Data[key] } }, "uid": func(key string) readReference { return func(event *Event) string { return event.User.IDs[key] } }, } toFieldReferences = map[string]writeReference{ "user": func(event *Event, s string) { event.ECS.User.set(s) }, "user.effective": func(event *Event, s string) { event.ECS.User.Effective.set(s) }, "user.target": func(event *Event, s string) { event.ECS.User.Target.set(s) }, "user.changes": func(event *Event, s string) { event.ECS.User.Changes.set(s) }, "group": func(event *Event, s string) { event.ECS.Group.set(s) }, } ) func resolveFieldReference(fieldRef string) (ref readReference) { if ref = fromFieldReferences[fieldRef]; ref != nil { return ref } if dot := strings.IndexByte(fieldRef, '.'); dot != -1 { dict := fieldRef[:dot] key := fieldRef[dot+1:] if accessor := fromDictReferences[dict]; accessor != nil { return accessor(key) } } return nil } func (ref *readReference) UnmarshalYAML(n *yaml.Node) error { var fieldRef string if err := n.Decode(&fieldRef); err != nil { return err } if *ref = resolveFieldReference(fieldRef); *ref == nil { return fmt.Errorf("field '%s' is not a valid from-reference for ECS mapping", fieldRef) } return nil } func (ref *writeReference) UnmarshalYAML(n *yaml.Node) error { var fieldRef string if err := n.Decode(&fieldRef); err != nil { return err } if *ref = toFieldReferences[fieldRef]; *ref == nil { return fmt.Errorf("field '%s' is not a valid to-reference for ECS mapping", fieldRef) } return nil } func LoadNormalizationConfig(b []byte) (syscalls map[string]*Normalization, recordTypes map[string][]*Normalization, err error) { c := &NormalizationConfig{} dec := yaml.NewDecoder(bytes.NewReader(b)) dec.KnownFields(true) if err := dec.Decode(c); err != nil { return nil, nil, err } syscalls = map[string]*Normalization{} recordTypes = map[string][]*Normalization{} for i := range c.Normalizations { norm := c.Normalizations[i] for _, syscall := range norm.Syscalls.Values { if _, found := syscalls[syscall]; found { return nil, nil, fmt.Errorf("duplication mappings for syscall %v", syscall) } syscalls[syscall] = &norm } for _, recordType := range norm.RecordTypes.Values { norms, found := recordTypes[recordType] if found { for _, n := range norms { if len(n.HasFields.Values) == 0 { return nil, nil, fmt.Errorf("duplication mappings for record_type %v without has_fields qualifier", recordType) } } } recordTypes[recordType] = append(norms, &norm) } } return syscalls, recordTypes, nil }