internal/fields/validate.go (1,078 lines of code) (raw):

// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one // or more contributor license agreements. Licensed under the Elastic License; // you may not use this file except in compliance with the Elastic License. package fields import ( "bufio" _ "embed" "encoding/json" "errors" "fmt" "net" "os" "path/filepath" "regexp" "slices" "sort" "strconv" "strings" "github.com/Masterminds/semver/v3" "github.com/cbroglie/mustache" "gopkg.in/yaml.v3" "github.com/elastic/elastic-package/internal/common" "github.com/elastic/elastic-package/internal/logger" "github.com/elastic/elastic-package/internal/multierror" "github.com/elastic/elastic-package/internal/packages" "github.com/elastic/elastic-package/internal/packages/buildmanifest" ) const externalFieldAppendedTag = "ecs_component" var ( semver2_0_0 = semver.MustParse("2.0.0") semver2_3_0 = semver.MustParse("2.3.0") semver3_0_1 = semver.MustParse("3.0.1") // List of stack releases that do not // include ECS mappings (all versions before 8.13.0). stackVersionsWithoutECS = []*semver.Version{ semver.MustParse("8.12.2"), semver.MustParse("8.12.1"), semver.MustParse("8.12.0"), semver.MustParse("8.11.4"), semver.MustParse("8.11.3"), semver.MustParse("8.11.2"), semver.MustParse("8.11.1"), semver.MustParse("8.11.0"), semver.MustParse("8.10.4"), semver.MustParse("8.10.3"), semver.MustParse("8.10.2"), semver.MustParse("8.10.1"), semver.MustParse("8.10.0"), semver.MustParse("8.9.2"), semver.MustParse("8.9.1"), semver.MustParse("8.9.0"), semver.MustParse("8.8.2"), semver.MustParse("8.8.1"), semver.MustParse("8.8.0"), semver.MustParse("8.7.1"), semver.MustParse("8.7.0"), semver.MustParse("8.6.2"), semver.MustParse("8.6.1"), semver.MustParse("8.6.0"), semver.MustParse("8.5.3"), semver.MustParse("8.5.2"), semver.MustParse("8.5.1"), semver.MustParse("8.5.0"), semver.MustParse("8.4.3"), semver.MustParse("8.4.2"), semver.MustParse("8.4.1"), semver.MustParse("8.4.0"), semver.MustParse("8.3.3"), semver.MustParse("8.3.2"), semver.MustParse("8.3.1"), semver.MustParse("8.3.0"), semver.MustParse("8.2.3"), semver.MustParse("8.2.2"), semver.MustParse("8.2.1"), semver.MustParse("8.2.0"), semver.MustParse("8.1.3"), semver.MustParse("8.1.2"), semver.MustParse("8.1.1"), semver.MustParse("8.1.0"), semver.MustParse("8.0.1"), semver.MustParse("8.0.0"), semver.MustParse("7.17.24"), semver.MustParse("7.17.23"), semver.MustParse("7.17.22"), semver.MustParse("7.17.21"), semver.MustParse("7.17.20"), semver.MustParse("7.17.19"), semver.MustParse("7.17.18"), semver.MustParse("7.17.17"), semver.MustParse("7.17.16"), semver.MustParse("7.17.15"), semver.MustParse("7.17.14"), semver.MustParse("7.17.13"), semver.MustParse("7.17.12"), semver.MustParse("7.17.11"), semver.MustParse("7.17.10"), semver.MustParse("7.17.9"), semver.MustParse("7.17.8"), semver.MustParse("7.17.7"), semver.MustParse("7.17.6"), semver.MustParse("7.17.5"), semver.MustParse("7.17.4"), semver.MustParse("7.17.3"), semver.MustParse("7.17.2"), semver.MustParse("7.17.1"), semver.MustParse("7.17.0"), semver.MustParse("7.16.3"), semver.MustParse("7.16.2"), semver.MustParse("7.16.1"), semver.MustParse("7.16.0"), semver.MustParse("7.15.2"), semver.MustParse("7.15.1"), semver.MustParse("7.15.0"), semver.MustParse("7.14.2"), semver.MustParse("7.14.1"), semver.MustParse("7.14.0"), // First version of Fleet in GA; there are no packages older than this version. } defaultExternal = "ecs" ) // Validator is responsible for fields validation. type Validator struct { // Schema contains definition records. Schema []FieldDefinition // SpecVersion contains the version of the spec used by the package. specVersion semver.Version // expectedDatasets contains the value expected for dataset fields. expectedDatasets []string defaultNumericConversion bool // fields that store keywords, but can be received as numeric types. numericKeywordFields []string // fields that store numbers, but can be received as strings. stringNumberFields []string disabledDependencyManagement bool enabledAllowedIPCheck bool allowedCIDRs []*net.IPNet enabledImportAllECSSchema bool disabledNormalization bool injectFieldsOptions InjectFieldsOptions } // ValidatorOption represents an optional flag that can be passed to CreateValidatorForDirectory. type ValidatorOption func(*Validator) error // WithSpecVersion enables validation dependant of the spec version used by the package. func WithSpecVersion(version string) ValidatorOption { return func(v *Validator) error { sv, err := semver.NewVersion(version) if err != nil { return fmt.Errorf("invalid version %q: %v", version, err) } v.specVersion = *sv return nil } } // WithDefaultNumericConversion configures the validator to accept defined keyword (or constant_keyword) fields as numeric-type. func WithDefaultNumericConversion() ValidatorOption { return func(v *Validator) error { v.defaultNumericConversion = true return nil } } // WithNumericKeywordFields configures the validator to accept specific fields to have numeric-type // while defined as keyword or constant_keyword. func WithNumericKeywordFields(fields []string) ValidatorOption { return func(v *Validator) error { v.numericKeywordFields = common.StringSlicesUnion(v.numericKeywordFields, fields) return nil } } // WithStringNumberFields configures the validator to accept specific fields to have fields defined as numbers // as their string representation. func WithStringNumberFields(fields []string) ValidatorOption { return func(v *Validator) error { v.stringNumberFields = common.StringSlicesUnion(v.stringNumberFields, fields) return nil } } // WithDisabledDependencyManagement configures the validator to ignore external fields and won't follow dependencies. func WithDisabledDependencyManagement() ValidatorOption { return func(v *Validator) error { v.disabledDependencyManagement = true return nil } } // WithEnabledAllowedIPCheck configures the validator to perform check on the IP values against an allowed list. func WithEnabledAllowedIPCheck() ValidatorOption { return func(v *Validator) error { v.enabledAllowedIPCheck = true return nil } } // WithExpectedDatasets configures the validator to check if the dataset field value matches one of the expected values. func WithExpectedDatasets(datasets []string) ValidatorOption { return func(v *Validator) error { v.expectedDatasets = datasets return nil } } // WithEnabledImportAllECSSchema configures the validator to check or not the fields with the complete ECS schema. func WithEnabledImportAllECSSChema(importSchema bool) ValidatorOption { return func(v *Validator) error { v.enabledImportAllECSSchema = importSchema return nil } } // WithDisableNormalization configures the validator to disable normalization. func WithDisableNormalization(disabledNormalization bool) ValidatorOption { return func(v *Validator) error { v.disabledNormalization = disabledNormalization return nil } } // WithInjectFieldsOptions configures fields injection. func WithInjectFieldsOptions(options InjectFieldsOptions) ValidatorOption { return func(v *Validator) error { v.injectFieldsOptions = options return nil } } type packageRootFinder interface { FindPackageRoot() (string, bool, error) } type packageRoot struct{} func (p packageRoot) FindPackageRoot() (string, bool, error) { return packages.FindPackageRoot() } // CreateValidatorForDirectory function creates a validator for the directory. func CreateValidatorForDirectory(fieldsParentDir string, opts ...ValidatorOption) (v *Validator, err error) { p := packageRoot{} return createValidatorForDirectoryAndPackageRoot(fieldsParentDir, p, opts...) } func createValidatorForDirectoryAndPackageRoot(fieldsParentDir string, finder packageRootFinder, opts ...ValidatorOption) (v *Validator, err error) { v = new(Validator) // In validator, inject fields with settings used for validation, such as `allowed_values`. v.injectFieldsOptions.IncludeValidationSettings = true for _, opt := range opts { if err := opt(v); err != nil { return nil, err } } v.allowedCIDRs = initializeAllowedCIDRsList() fieldsDir := filepath.Join(fieldsParentDir, "fields") var fdm *DependencyManager if !v.disabledDependencyManagement { packageRoot, found, err := finder.FindPackageRoot() if err != nil { return nil, fmt.Errorf("can't find package root: %w", err) } if !found { return nil, errors.New("package root not found and dependency management is enabled") } fdm, v.Schema, err = initDependencyManagement(packageRoot, v.specVersion, v.enabledImportAllECSSchema) if err != nil { return nil, fmt.Errorf("failed to initialize dependency management: %w", err) } } fields, err := loadFieldsFromDir(fieldsDir, fdm, v.injectFieldsOptions) if err != nil { return nil, fmt.Errorf("can't load fields from directory (path: %s): %w", fieldsDir, err) } v.Schema = append(fields, v.Schema...) return v, nil } func initDependencyManagement(packageRoot string, specVersion semver.Version, importECSSchema bool) (*DependencyManager, []FieldDefinition, error) { buildManifest, ok, err := buildmanifest.ReadBuildManifest(packageRoot) if err != nil { return nil, nil, fmt.Errorf("can't read build manifest: %w", err) } if !ok { // There is no build manifest, nothing to do. return nil, nil, nil } fdm, err := CreateFieldDependencyManager(buildManifest.Dependencies) if err != nil { return nil, nil, fmt.Errorf("can't create field dependency manager: %w", err) } // Check if the package embeds ECS mappings packageEmbedsEcsMappings := buildManifest.ImportMappings() && !specVersion.LessThan(semver2_3_0) // Check if all stack versions support ECS mappings stackSupportsEcsMapping, err := supportsECSMappings(packageRoot) if err != nil { return nil, nil, fmt.Errorf("can't check if stack version includes ECS mappings: %w", err) } // If the package embeds ECS mappings, or the stack version includes ECS mappings, then // we should import the ECS schema to validate the package fields against it. var schema []FieldDefinition if (packageEmbedsEcsMappings || stackSupportsEcsMapping) && importECSSchema { // Import all fields from external schema (most likely ECS) to // validate the package fields against it. ecsSchema, err := fdm.ImportAllFields(defaultExternal) if err != nil { return nil, nil, err } logger.Debugf("Imported ECS fields definition from external schema for validation (embedded in package: %v, stack uses ecs@mappings template: %v)", packageEmbedsEcsMappings, stackSupportsEcsMapping) schema = ecsSchema } // ecs@mappings adds additional multifields that are not defined anywhere. // Adding them in all cases so packages can be tested in versions of the stack that // add the ecs@mappings component template. schema = appendECSMappingMultifields(schema, "") return fdm, schema, nil } // supportsECSMappings check if all the versions of the stack the package can run on support ECS mappings. func supportsECSMappings(packageRoot string) (bool, error) { packageManifest, err := packages.ReadPackageManifestFromPackageRoot(packageRoot) if err != nil { return false, fmt.Errorf("can't read package manifest: %w", err) } if len(packageManifest.Conditions.Kibana.Version) == 0 { logger.Debugf("No Kibana version constraint found in package manifest; assuming it does not support ECS mappings.") return false, nil } kibanaConstraints, err := semver.NewConstraint(packageManifest.Conditions.Kibana.Version) if err != nil { return false, fmt.Errorf("invalid constraint for Kibana: %w", err) } return allVersionsIncludeECS(kibanaConstraints), nil } // allVersionsIncludeECS Check if all the stack versions in the constraints include ECS mappings. Only the stack // versions 8.13.0 and above include ECS mappings. // // Returns true if all the stack versions in the constraints include ECS mappings, otherwise returns false. func allVersionsIncludeECS(kibanaConstraints *semver.Constraints) bool { // Looking for a version that satisfies the package constraints. for _, v := range stackVersionsWithoutECS { if kibanaConstraints.Check(v) { // Found a version that satisfies the constraints, // so at least this version does not include // ECS mappings. return false } } // If no version satisfies the constraints, then all versions // include ECS mappings. return true // This check works under the assumption the constraints are not limited // upwards. // // For example, if the constraint is `>= 8.12.0` and the stack version is // `8.12.999`, the constraint will be satisfied. // // However, if the constraint is `>= 8.0.0, < 8.10.0` the check will not // return the right result. // // To support this, we would need to check the constraint against a larger // set of versions, and check if the constraint is satisfied for all // of them, like in the commented out example above. // // lastStackVersionWithoutEcsMappings := semver.MustParse("8.12.999") // return !kibanaConstraints.Check(lastStackVersionWithoutEcsMappings) } func ecsPathWithMultifieldsMatch(name string) bool { suffixes := []string{ // From https://github.com/elastic/elasticsearch/blob/34a78f3cf3e91cd13f51f1f4f8e378f8ed244a2b/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json#L87 ".body.content", "url.full", "url.original", // From https://github.com/elastic/elasticsearch/blob/34a78f3cf3e91cd13f51f1f4f8e378f8ed244a2b/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json#L96 "command_line", "stack_trace", // From https://github.com/elastic/elasticsearch/blob/34a78f3cf3e91cd13f51f1f4f8e378f8ed244a2b/x-pack/plugin/core/template-resources/src/main/resources/ecs%40mappings.json#L113 ".title", ".executable", ".name", ".working_directory", ".full_name", "file.path", "file.target_path", "os.full", "email.subject", "vulnerability.description", "user_agent.original", } for _, suffix := range suffixes { if strings.HasSuffix(name, suffix) { return true } } return false } // appendECSMappingMultifields adds multifields included in ecs@mappings that are not defined anywhere, for fields // that don't define any multifield. func appendECSMappingMultifields(schema []FieldDefinition, prefix string) []FieldDefinition { rules := []struct { match func(name string) bool definitions []FieldDefinition }{ { match: ecsPathWithMultifieldsMatch, definitions: []FieldDefinition{ { Name: "text", Type: "match_only_text", External: externalFieldAppendedTag, }, }, }, } var result []FieldDefinition for _, def := range schema { fullName := def.Name if prefix != "" { fullName = prefix + "." + fullName } def.Fields = appendECSMappingMultifields(def.Fields, fullName) for _, rule := range rules { if !rule.match(fullName) { continue } for _, mf := range rule.definitions { // Append multifields only if they are not already defined. f := func(d FieldDefinition) bool { return d.Name == mf.Name } if !slices.ContainsFunc(def.MultiFields, f) { def.MultiFields = append(def.MultiFields, mf) } } } result = append(result, def) } return result } //go:embed _static/allowed_geo_ips.txt var allowedGeoIPs string func initializeAllowedCIDRsList() (cidrs []*net.IPNet) { s := bufio.NewScanner(strings.NewReader(allowedGeoIPs)) for s.Scan() { _, cidr, err := net.ParseCIDR(s.Text()) if err != nil { panic("invalid ip in _static/allowed_geo_ips.txt: " + s.Text()) } cidrs = append(cidrs, cidr) } return cidrs } func loadFieldsFromDir(fieldsDir string, fdm *DependencyManager, injectOptions InjectFieldsOptions) ([]FieldDefinition, error) { files, err := filepath.Glob(filepath.Join(fieldsDir, "*.yml")) if err != nil { return nil, fmt.Errorf("reading directory with fields failed (path: %s): %w", fieldsDir, err) } var fields []FieldDefinition for _, file := range files { body, err := os.ReadFile(file) if err != nil { return nil, fmt.Errorf("reading fields file failed: %w", err) } if fdm != nil { body, err = injectFields(body, fdm, injectOptions) if err != nil { return nil, fmt.Errorf("loading external fields failed: %w", err) } } var u []FieldDefinition err = yaml.Unmarshal(body, &u) if err != nil { return nil, fmt.Errorf("unmarshalling field body failed: %w", err) } fields = append(fields, u...) } return fields, nil } func injectFields(d []byte, dm *DependencyManager, options InjectFieldsOptions) ([]byte, error) { var fields []common.MapStr err := yaml.Unmarshal(d, &fields) if err != nil { return nil, fmt.Errorf("parsing fields failed: %w", err) } fields, _, err = dm.injectFieldsWithOptions(fields, options) if err != nil { return nil, fmt.Errorf("injecting fields failed: %w", err) } return yaml.Marshal(fields) } // ValidateDocumentBody validates the provided document body. func (v *Validator) ValidateDocumentBody(body json.RawMessage) multierror.Error { var c common.MapStr err := json.Unmarshal(body, &c) if err != nil { return multierror.Error{fmt.Errorf("unmarshalling document body failed: %w", err)} } return v.ValidateDocumentMap(c) } // ValidateDocumentMap validates the provided document as common.MapStr. func (v *Validator) ValidateDocumentMap(body common.MapStr) multierror.Error { errs := v.validateDocumentValues(body) errs = append(errs, v.validateMapElement("", body, body)...) if len(errs) == 0 { return nil } return errs.Unique() } var datasetFieldNames = []string{ "event.dataset", "data_stream.dataset", } func (v *Validator) validateDocumentValues(body common.MapStr) multierror.Error { var errs multierror.Error if !v.specVersion.LessThan(semver2_0_0) && v.expectedDatasets != nil { for _, datasetField := range datasetFieldNames { value, err := body.GetValue(datasetField) if errors.Is(err, common.ErrKeyNotFound) { continue } // Why do we render the expected datasets here? // Because the expected datasets can contain // mustache templates, and not just static // strings. // // For example, the expected datasets for the // Kubernetes container logs dataset can be: // // - "{{kubernetes.labels.elastic_co/dataset}}" // var renderedExpectedDatasets []string for _, dataset := range v.expectedDatasets { renderedDataset, err := mustache.Render(dataset, body) if err != nil { err := fmt.Errorf("can't render expected dataset %q: %w", dataset, err) errs = append(errs, err) return errs } renderedExpectedDatasets = append(renderedExpectedDatasets, renderedDataset) } str, ok := valueToString(value, v.disabledNormalization) exists := stringInArray(str, renderedExpectedDatasets) if !ok || !exists { err := fmt.Errorf("field %q should have value in %q, it has \"%v\"", datasetField, v.expectedDatasets, value) errs = append(errs, err) } } } return errs } func stringInArray(target string, arr []string) bool { // Check if target is part of the array found := false for _, item := range arr { if item == target { found = true break } } return found } func valueToString(value any, disabledNormalization bool) (string, bool) { if disabledNormalization { // when synthetics mode is enabled, each field present in the document is an array // so this check needs to retrieve the first element of the array vals, err := common.ToStringSlice(value) if err != nil || len(vals) != 1 { return "", false } return vals[0], true } str, ok := value.(string) return str, ok } func (v *Validator) validateMapElement(root string, elem common.MapStr, doc common.MapStr) multierror.Error { var errs multierror.Error for name, val := range elem { key := strings.TrimLeft(root+"."+name, ".") switch val := val.(type) { case []map[string]any: for _, m := range val { err := v.validateMapElement(key, m, doc) if err != nil { errs = append(errs, err...) } } case map[string]any: if isFieldTypeFlattened(key, v.Schema) { // Do not traverse into objects with flattened data types // because the entire object is mapped as a single field. continue } err := v.validateMapElement(key, val, doc) if err != nil { errs = append(errs, err...) } default: if skipLeafOfObject(root, name, v.specVersion, v.Schema) { // Till some versions we skip some validations on leaf of objects, check if it is the case. break } err := v.validateScalarElement(key, val, doc) if err != nil { errs = append(errs, err...) } } } if len(errs) > 0 { return errs.Unique() } return nil } func (v *Validator) validateScalarElement(key string, val any, doc common.MapStr) multierror.Error { if key == "" { return nil // root key is always valid } definition := FindElementDefinition(key, v.Schema) if definition == nil { switch { case skipValidationForField(key): return nil // generic field, let's skip validation for now case isFlattenedSubfield(key, v.Schema): return nil // flattened subfield, it will be stored as member of the flattened ancestor. case isArrayOfObjects(val): return multierror.Error{fmt.Errorf(`field %q is used as array of objects, expected explicit definition with type group or nested`, key)} case couldBeMultifield(key, v.Schema): return multierror.Error{fmt.Errorf(`field %q is undefined, could be a multifield`, key)} case !isParentEnabled(key, v.Schema): return nil // parent mapping is disabled default: return multierror.Error{fmt.Errorf(`field %q is undefined`, key)} } } if !v.disabledNormalization { err := v.validateExpectedNormalization(*definition, val) if err != nil { return multierror.Error{fmt.Errorf("field %q is not normalized as expected: %w", key, err)} } } errs := v.parseElementValue(key, *definition, val, doc) if len(errs) > 0 { return errs.Unique() } return nil } func (v *Validator) SanitizeSyntheticSourceDocs(docs []common.MapStr) ([]common.MapStr, error) { var newDocs []common.MapStr var multifields []string for _, doc := range docs { for key, contents := range doc { shouldBeArray := false definition := FindElementDefinition(key, v.Schema) if definition != nil { shouldBeArray = v.shouldValueBeArray(definition) } // if it needs to be normalized, the field is kept as it is if shouldBeArray { continue } // in case it is not specified any normalization and that field is an array of // just one element, the field is going to be updated to remove the array and keep // that element as a value. vals, ok := contents.([]any) if !ok { continue } if len(vals) == 1 { _, err := doc.Put(key, vals[0]) if err != nil { return nil, fmt.Errorf("key %s could not be updated: %w", key, err) } } } expandedDoc, newMultifields, err := createDocExpandingObjects(doc, v.Schema) if err != nil { return nil, fmt.Errorf("failure while expanding objects from doc: %w", err) } newDocs = append(newDocs, expandedDoc) for _, multifield := range newMultifields { if slices.Contains(multifields, multifield) { continue } multifields = append(multifields, multifield) } } if len(multifields) > 0 { sort.Strings(multifields) logger.Debugf("Some keys were not included in sanitized docs because they are multifields: %s", strings.Join(multifields, ", ")) } return newDocs, nil } func (v *Validator) shouldValueBeArray(definition *FieldDefinition) bool { // normalization should just be checked if synthetic source is enabled and the // spec version of this package is >= 2.0.0 if v.disabledNormalization && !v.specVersion.LessThan(semver2_0_0) { for _, normalize := range definition.Normalize { switch normalize { case "array": return true } } } return false } func createDocExpandingObjects(doc common.MapStr, schema []FieldDefinition) (common.MapStr, []string, error) { keys := make([]string, 0) for k := range doc { keys = append(keys, k) } sort.Strings(keys) newDoc := make(common.MapStr) var multifields []string for _, k := range keys { value, err := doc.GetValue(k) if err != nil { return nil, nil, fmt.Errorf("not found key %s: %w", k, err) } _, err = newDoc.Put(k, value) if err == nil { continue } // Possible errors found but not limited to those // - expected map but type is string // - expected map but type is []any if strings.HasPrefix(err.Error(), "expected map but type is") { if couldBeMultifield(k, schema) { // We cannot add multifields and they are not in source documents ignore them. multifields = append(multifields, k) continue } logger.Warnf("not able to add key %s: %s", k, err) continue } return nil, nil, fmt.Errorf("not added key %s with value %s: %w", k, value, err) } return newDoc, multifields, nil } // skipValidationForField skips field validation (field presence) of special fields. The special fields are present // in every (most?) documents collected by Elastic Agent, but aren't defined in any integration in `fields.yml` files. // FIXME https://github.com/elastic/elastic-package/issues/147 func skipValidationForField(key string) bool { return isFieldFamilyMatching("agent", key) || isFieldFamilyMatching("elastic_agent", key) || isFieldFamilyMatching("cloud", key) || // too many common fields isFieldFamilyMatching("event", key) || // too many common fields isFieldFamilyMatching("host", key) || // too many common fields isFieldFamilyMatching("metricset", key) || // field is deprecated isFieldFamilyMatching("event.module", key) // field is deprecated } // skipLeafOfObject checks if the element is a child of an object that was skipped in some previous // version of the spec. This is relevant in documents that store fields without subobjects. func skipLeafOfObject(root, name string, specVersion semver.Version, schema []FieldDefinition) bool { // We are only skipping validation of these fields on versions older than 3.0.1. if !specVersion.LessThan(semver3_0_1) { return false } // If it doesn't contain a dot in the name, we have traversed its parent, if any. if !strings.Contains(name, ".") { return false } key := name if root != "" { key = root + "." + name } _, ancestor := findAncestorElementDefinition(key, schema, func(key string, def *FieldDefinition) bool { // Don't look for ancestors beyond root, these objects have been already traversed. if len(key) < len(root) { return false } if !slices.Contains([]string{"group", "object", "nested", "flattened"}, def.Type) { return false } return true }) return ancestor != nil } func isFieldFamilyMatching(family, key string) bool { return key == family || strings.HasPrefix(key, family+".") } func isFieldTypeFlattened(key string, fieldDefinitions []FieldDefinition) bool { definition := FindElementDefinition(key, fieldDefinitions) return definition != nil && definition.Type == "flattened" } func couldBeMultifield(key string, fieldDefinitions []FieldDefinition) bool { parent := findParentElementDefinition(key, fieldDefinitions) if parent == nil { // Parent is not defined, so not sure what this can be. return false } switch parent.Type { case "", "group", "nested", "object": // Objects cannot have multifields. return false } return true } // isParentEnabled returns true by default unless the parent field exists and enabled is set false // This is needed in order to correctly validate the fields that should not be mapped // because parent field mapping was disabled func isParentEnabled(key string, fieldDefinitions []FieldDefinition) bool { parent := findParentElementDefinition(key, fieldDefinitions) if parent != nil && parent.Enabled != nil && !*parent.Enabled { return false } return true } func isArrayOfObjects(val any) bool { switch val := val.(type) { case []map[string]any: return true case []any: for _, e := range val { if _, isMap := e.(map[string]any); isMap { return true } } } return false } func isFlattenedSubfield(key string, schema []FieldDefinition) bool { _, ancestor := findAncestorElementDefinition(key, schema, func(_ string, def *FieldDefinition) bool { return def.Type == "flattened" }) return ancestor != nil } func findElementDefinitionForRoot(root, searchedKey string, fieldDefinitions []FieldDefinition) *FieldDefinition { for _, def := range fieldDefinitions { key := strings.TrimLeft(root+"."+def.Name, ".") if compareKeys(key, def, searchedKey) { return &def } fd := findElementDefinitionForRoot(key, searchedKey, def.Fields) if fd != nil { return fd } fd = findElementDefinitionForRoot(key, searchedKey, def.MultiFields) if fd != nil { return fd } } if root == "" { // No definition found, check if the parent is an object with object type. parent := findParentElementDefinition(searchedKey, fieldDefinitions) if parent != nil && parent.Type == "object" && parent.ObjectType != "" { fd := *parent fd.Name = searchedKey fd.Type = parent.ObjectType fd.ObjectType = "" return &fd } } return nil } // FindElementDefinition is a helper function used to find the fields definition in the schema. func FindElementDefinition(searchedKey string, fieldDefinitions []FieldDefinition) *FieldDefinition { return findElementDefinitionForRoot("", searchedKey, fieldDefinitions) } func findParentElementDefinition(key string, fieldDefinitions []FieldDefinition) *FieldDefinition { lastDotIndex := strings.LastIndex(key, ".") if lastDotIndex < 0 { // Field at the root level cannot be a multifield. return nil } parentKey := key[:lastDotIndex] return FindElementDefinition(parentKey, fieldDefinitions) } func findAncestorElementDefinition(key string, fieldDefinitions []FieldDefinition, cond func(string, *FieldDefinition) bool) (string, *FieldDefinition) { for strings.Contains(key, ".") { i := strings.LastIndex(key, ".") key = key[:i] ancestor := FindElementDefinition(key, fieldDefinitions) if ancestor == nil { continue } if cond(key, ancestor) { return key, ancestor } } return "", nil } // compareKeys checks if `searchedKey` matches with the given `key`. `key` can contain // wildcards (`*`), that match any sequence of characters in `searchedKey` different to dots. func compareKeys(key string, def FieldDefinition, searchedKey string) bool { // Loop over every byte in `key` to find if there is a matching byte in `searchedKey`. var j int for _, k := range []byte(key) { if j >= len(searchedKey) { // End of searched key reached before maching all characters in the key. return false } switch k { case searchedKey[j]: // Match, continue. j++ case '*': // Wildcard, match everything till next dot. switch idx := strings.IndexByte(searchedKey[j:], '.'); idx { default: // Jump till next dot. j += idx case -1: // No dots, wildcard matches with the rest of the searched key. j = len(searchedKey) case 0: // Empty name on wildcard, this is not permitted (e.g. `example..foo`). return false } default: // No match. return false } } // If everything matched, searched key has been found. if len(searchedKey) == j { return true } // Workaround for potential subfields of certain types as geo_point or histogram. if len(searchedKey) > j { extraPart := searchedKey[j:] if validSubField(def, extraPart) { return true } } return false } func (v *Validator) validateExpectedNormalization(definition FieldDefinition, val any) error { // Validate expected normalization starting with packages following spec v2 format. if v.specVersion.LessThan(semver2_0_0) { return nil } for _, normalize := range definition.Normalize { switch normalize { case "array": if _, isArray := val.([]any); val != nil && !isArray { return fmt.Errorf("expected array, found %q (%T)", val, val) } } } return nil } // validSubField checks if the extra part that didn't match with any field definition, // matches with the possible sub field of complex fields like geo_point or histogram. func validSubField(def FieldDefinition, extraPart string) bool { fieldType := def.Type if def.Type == "object" && def.ObjectType != "" { fieldType = def.ObjectType } subFields := []string{".lat", ".lon", ".values", ".counts"} perType := map[string][]string{ "geo_point": subFields[0:2], "histogram": subFields[2:4], } allowed, found := perType[fieldType] if !found { if def.External != "" { // An unresolved external field could be anything. allowed = subFields } else { return false } } for _, a := range allowed { if a == extraPart { return true } } return false } // parseElementValue checks that the value stored in a field matches the field definition. For // arrays it checks it for each Element. func (v *Validator) parseElementValue(key string, definition FieldDefinition, val any, doc common.MapStr) multierror.Error { // Validate types first for each element, so other checks don't need to worry about types. var errs multierror.Error err := forEachElementValue(key, definition, val, doc, v.parseSingleElementValue) if err != nil { errs = append(errs, err...) } // Perform validations that need to be done on several fields at the same time. allElementsErr := v.parseAllElementValues(key, definition, val, doc) if allElementsErr != nil { errs = append(errs, allElementsErr) } if len(errs) > 0 { return errs.Unique() } return nil } // parseAllElementValues performs validations that must be done for all elements at once in // case that there are multiple values. func (v *Validator) parseAllElementValues(key string, definition FieldDefinition, val any, doc common.MapStr) error { switch definition.Type { case "constant_keyword", "keyword", "text": if !v.specVersion.LessThan(semver2_0_0) { if err := ensureExpectedEventType(key, val, definition, doc); err != nil { return err } } } return nil } // parseSingeElementValue performs validations on individual values of each element. func (v *Validator) parseSingleElementValue(key string, definition FieldDefinition, val any, doc common.MapStr) multierror.Error { invalidTypeError := func() multierror.Error { return multierror.Error{fmt.Errorf("field %q's Go type, %T, does not match the expected field type: %s (field value: %v)", key, val, definition.Type, val)} } stringValue := func() (string, bool) { switch val := val.(type) { case string: return val, true case bool, float64: if v.defaultNumericConversion || slices.Contains(v.numericKeywordFields, key) { return fmt.Sprintf("%v", val), true } } return "", false } switch definition.Type { // Constant keywords can define a value in the definition, if they do, all // values stored in this field should be this one. // If a pattern is provided, it checks if the value matches. case "constant_keyword": valStr, valid := stringValue() if !valid { return invalidTypeError() } if err := ensureConstantKeywordValueMatches(key, valStr, definition.Value); err != nil { return multierror.Error{err} } if err := ensurePatternMatches(key, valStr, definition.Pattern); err != nil { return multierror.Error{err} } if err := ensureAllowedValues(key, valStr, definition); err != nil { return multierror.Error{err} } // Normal text fields should be of type string. // If a pattern is provided, it checks if the value matches. case "keyword", "text": valStr, valid := stringValue() if !valid { return invalidTypeError() } if err := ensurePatternMatches(key, valStr, definition.Pattern); err != nil { return multierror.Error{err} } if err := ensureAllowedValues(key, valStr, definition); err != nil { return multierror.Error{err} } // Dates are expected to be formatted as strings or as seconds or milliseconds // since epoch. // If it is a string and a pattern is provided, it checks if the value matches. case "date": switch val := val.(type) { case string: if err := ensurePatternMatches(key, val, definition.Pattern); err != nil { return multierror.Error{err} } case float64: // date as seconds or milliseconds since epoch if definition.Pattern != "" { return multierror.Error{fmt.Errorf("numeric date in field %q, but pattern defined", key)} } default: return invalidTypeError() } // IP values should be actual IPs, included in the ranges of IPs available // in the geoip test database. // If a pattern is provided, it checks if the value matches. case "ip": valStr, valid := val.(string) if !valid { return invalidTypeError() } if err := ensurePatternMatches(key, valStr, definition.Pattern); err != nil { return multierror.Error{err} } if v.enabledAllowedIPCheck && !v.isAllowedIPValue(valStr) { return multierror.Error{fmt.Errorf("the IP %q is not one of the allowed test IPs (see: https://github.com/elastic/elastic-package/blob/main/internal/fields/_static/allowed_geo_ips.txt)", valStr)} } // Groups should only contain nested fields, not single values. case "group", "nested", "object": switch val := val.(type) { case map[string]any: // This is probably an element from an array of objects, // even if not recommended, it should be validated. if v.specVersion.LessThan(semver3_0_1) { break } errs := v.validateMapElement(key, common.MapStr(val), doc) if len(errs) == 0 { return nil } return errs.Unique() case []any: // This can be an array of array of objects. Elasticsearh will probably // flatten this. So even if this is quite unexpected, let's try to handle it. if v.specVersion.LessThan(semver3_0_1) { break } return forEachElementValue(key, definition, val, doc, v.parseSingleElementValue) case nil: // The document contains a null, let's consider this like an empty array. return nil default: switch { case definition.Type == "object" && definition.ObjectType != "": // This is the leaf element of an object without wildcards in the name, adapt the definition and try again. definition.Name = definition.Name + ".*" definition.Type = definition.ObjectType definition.ObjectType = "" return v.parseSingleElementValue(key, definition, val, doc) case definition.Type == "object" && definition.ObjectType == "": // Legacy mapping, ambiguous definition not allowed by recent versions of the spec, ignore it. return nil } return multierror.Error{fmt.Errorf("field %q is a group of fields of type %s, it cannot store values", key, definition.Type)} } // Numbers should have been parsed as float64, otherwise they are not numbers. case "float", "long", "double": switch val := val.(type) { case float64: case json.Number: case string: if !slices.Contains(v.stringNumberFields, key) { return invalidTypeError() } if _, err := strconv.ParseFloat(val, 64); err != nil { return invalidTypeError() } default: return invalidTypeError() } // All other types are considered valid not blocking validation. default: return nil } return nil } // isDocumentation reports whether ip is a reserved address for documentation, // according to RFC 5737 (IPv4 Address Blocks Reserved for Documentation), RFC 6676, // RFC 3849 (IPv6 Address Prefix Reserved for Documentation) and RFC 9637. func isDocumentation(ip net.IP) bool { if ip4 := ip.To4(); ip4 != nil { // Following RFC 5737, Section 3. Documentation Address Blocks which says: // The blocks 192.0.2.0/24 (TEST-NET-1), 198.51.100.0/24 (TEST-NET-2), // and 203.0.113.0/24 (TEST-NET-3) are provided for use in // documentation. // Following RFC 6676, the IPV4 multicast addresses allocated for documentation // purposes are 233.252.0.0/24 return ((ip4[0] == 192 && ip4[1] == 0 && ip4[2] == 2) || (ip4[0] == 198 && ip4[1] == 51 && ip4[2] == 100) || (ip4[0] == 203 && ip4[1] == 0 && ip4[2] == 113) || (ip4[0] == 233 && ip4[1] == 252 && ip4[2] == 0)) } // Following RFC 3849, Section 2. Documentation IPv6 Address Prefix which // says: // The prefix allocated for documentation purposes is 2001:DB8::/32 // Following RFC 9637, a new address block 3fff::/20 is registered for documentation purposes return len(ip) == net.IPv6len && (ip[0] == 32 && ip[1] == 1 && ip[2] == 13 && ip[3] == 184) || (ip[0] == 63 && ip[1] == 255 && ip[2] <= 15) } // isAllowedIPValue checks if the provided IP is allowed for testing // The set of allowed IPs are: // - private IPs as described in RFC 1918 & RFC 4193 // - public IPs allowed by MaxMind for testing // - Reserved IPs for documentation RFC 5737, RFC 3849, RFC 6676 and RFC 9637 // - 0.0.0.0 and 255.255.255.255 for IPv4 // - 0:0:0:0:0:0:0:0 and ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff for IPv6 func (v *Validator) isAllowedIPValue(s string) bool { ip := net.ParseIP(s) if ip == nil { return false } for _, allowedCIDR := range v.allowedCIDRs { if allowedCIDR.Contains(ip) { return true } } if ip.IsUnspecified() || ip.IsPrivate() || isDocumentation(ip) || ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsMulticast() || ip.Equal(net.IPv4bcast) { return true } return false } // forEachElementValue visits a function for each element in the given value if // it is an array. If it is not an array, it calls the function with it. func forEachElementValue(key string, definition FieldDefinition, val any, doc common.MapStr, fn func(string, FieldDefinition, any, common.MapStr) multierror.Error) multierror.Error { arr, isArray := val.([]any) if !isArray { return fn(key, definition, val, doc) } var errs multierror.Error for _, element := range arr { err := fn(key, definition, element, doc) if err != nil { errs = append(errs, err...) } } if len(errs) > 0 { return errs.Unique() } return nil } // ensurePatternMatches validates the document's field value matches the field // definitions regular expression pattern. func ensurePatternMatches(key, value, pattern string) error { if pattern == "" { return nil } valid, err := regexp.MatchString(pattern, value) if err != nil { return fmt.Errorf("invalid pattern: %w", err) } if !valid { return fmt.Errorf("field %q's value, %s, does not match the expected pattern: %s", key, value, pattern) } return nil } // ensureConstantKeywordValueMatches validates the document's field value // matches the definition's constant_keyword value. func ensureConstantKeywordValueMatches(key, value, constantKeywordValue string) error { if constantKeywordValue == "" { return nil } if value != constantKeywordValue { return fmt.Errorf("field %q's value %q does not match the declared constant_keyword value %q", key, value, constantKeywordValue) } return nil } // ensureAllowedValues validates that the document's field value // is one of the allowed values. func ensureAllowedValues(key, value string, definition FieldDefinition) error { if !definition.AllowedValues.IsAllowed(value) { return fmt.Errorf("field %q's value %q is not one of the allowed values (%s)", key, value, strings.Join(definition.AllowedValues.Values(), ", ")) } if e := definition.ExpectedValues; len(e) > 0 && !slices.Contains(e, value) { return fmt.Errorf("field %q's value %q is not one of the expected values (%s)", key, value, strings.Join(e, ", ")) } return nil } // ensureExpectedEventType validates that the document's `event.type` field is one of the expected // one for the given value. func ensureExpectedEventType(key string, val any, definition FieldDefinition, doc common.MapStr) error { eventTypeVal, _ := doc.GetValue("event.type") eventTypes := valueToStringsSlice(eventTypeVal) values := valueToStringsSlice(val) var expected []string for _, value := range values { expectedForValue := definition.AllowedValues.ExpectedEventTypes(value) expected = common.StringSlicesUnion(expected, expectedForValue) } if len(expected) == 0 { // No restrictions defined for this value, all good to go. return nil } for _, eventType := range eventTypes { if !slices.Contains(expected, eventType) { return fmt.Errorf("field \"event.type\" value %q is not one of the expected values (%s) for any of the values of %q (%s)", eventType, strings.Join(expected, ", "), key, strings.Join(values, ", ")) } } return nil } func valueToStringsSlice(value any) []string { switch v := value.(type) { case nil: return nil case string: return []string{v} case []any: var values []string for _, e := range v { values = append(values, fmt.Sprintf("%v", e)) } return values default: return []string{fmt.Sprintf("%v", v)} } }