internal/fields/mappings.go (489 lines of code) (raw):
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.
package fields
import (
"context"
"encoding/json"
"fmt"
"slices"
"strings"
"github.com/google/go-cmp/cmp"
"github.com/elastic/elastic-package/internal/elasticsearch"
"github.com/elastic/elastic-package/internal/logger"
"github.com/elastic/elastic-package/internal/multierror"
)
// MappingValidator is responsible for mappings validation.
type MappingValidator struct {
// Schema contains definition records.
Schema []FieldDefinition
esClient *elasticsearch.Client
indexTemplateName string
dataStreamName string
exceptionFields []string
}
// MappingValidatorOption represents an optional flag that can be passed to CreateValidatorForMappings.
type MappingValidatorOption func(*MappingValidator) error
// WithMappingValidatorElasticsearchClient configures the Elasticsearch client.
func WithMappingValidatorElasticsearchClient(esClient *elasticsearch.Client) MappingValidatorOption {
return func(v *MappingValidator) error {
v.esClient = esClient
return nil
}
}
// WithMappingValidatorIndexTemplate configures the Index Template to query to Elasticsearch.
func WithMappingValidatorIndexTemplate(indexTemplate string) MappingValidatorOption {
return func(v *MappingValidator) error {
v.indexTemplateName = indexTemplate
return nil
}
}
// WithMappingValidatorDataStream configures the Data Stream to query in Elasticsearch.
func WithMappingValidatorDataStream(dataStream string) MappingValidatorOption {
return func(v *MappingValidator) error {
v.dataStreamName = dataStream
return nil
}
}
// WithMappingValidatorDataStream configures the Data Stream to query in Elasticsearch.
func WithMappingValidatorFallbackSchema(schema []FieldDefinition) MappingValidatorOption {
return func(v *MappingValidator) error {
v.Schema = schema
return nil
}
}
// WithMappingValidatorExceptionFields configures the fields to be ignored during the validation process.
func WithMappingValidatorExceptionFields(fields []string) MappingValidatorOption {
return func(v *MappingValidator) error {
v.exceptionFields = fields
return nil
}
}
// CreateValidatorForMappings function creates a validator for the mappings.
func CreateValidatorForMappings(esClient *elasticsearch.Client, opts ...MappingValidatorOption) (v *MappingValidator, err error) {
opts = append(opts, WithMappingValidatorElasticsearchClient(esClient))
return createValidatorForMappingsAndPackageRoot(opts...)
}
func createValidatorForMappingsAndPackageRoot(opts ...MappingValidatorOption) (v *MappingValidator, err error) {
v = new(MappingValidator)
for _, opt := range opts {
if err := opt(v); err != nil {
return nil, err
}
}
return v, nil
}
func (v *MappingValidator) ValidateIndexMappings(ctx context.Context) multierror.Error {
var errs multierror.Error
logger.Debugf("Get Mappings from data stream (%s)", v.dataStreamName)
actualMappings, err := v.esClient.DataStreamMappings(ctx, v.dataStreamName)
if err != nil {
errs = append(errs, fmt.Errorf("failed to load mappings from ES (data stream %s): %w", v.dataStreamName, err))
return errs
}
logger.Debugf("Simulate Index Template (%s)", v.indexTemplateName)
previewMappings, err := v.esClient.SimulateIndexTemplate(ctx, v.indexTemplateName)
if err != nil {
errs = append(errs, fmt.Errorf("failed to load mappings from index template preview (%s): %w", v.indexTemplateName, err))
return errs
}
// Code from comment posted in https://github.com/google/go-cmp/issues/224
transformJSON := cmp.FilterValues(func(x, y []byte) bool {
return json.Valid(x) && json.Valid(y)
}, cmp.Transformer("ParseJSON", func(in []byte) string {
var tmp interface{}
if err := json.Unmarshal(in, &tmp); err != nil {
panic(err) // should never occur given previous filter to ensure valid JSON
}
out, err := json.MarshalIndent(tmp, "", " ")
if err != nil {
panic(err)
}
return string(out)
}))
// Compare dynamic templates, this should always be the same in preview and after ingesting documents
if diff := cmp.Diff(previewMappings.DynamicTemplates, actualMappings.DynamicTemplates, transformJSON); diff != "" {
errs = append(errs, fmt.Errorf("dynamic templates are different (data stream %s):\n%s", v.dataStreamName, diff))
}
// Compare actual mappings:
// - If they are the same exact mapping definitions as in preview, everything should be good
// - If the same mapping exists in both, but they have different "type", there is some issue
// - If there is a new mapping,
// - It could come from a ECS definition, compare that mapping with the ECS field definitions
// - Does this come from some dynamic template? ECS components template or dynamic templates defined in the package? This mapping is valid
// - conditions found in current dynamic templates: match, path_match, path_unmatch, match_mapping_type, unmatch_mapping_type
// - if it does not match, there should be some issue and it should be reported
// - If the mapping is a constant_keyword type (e.g. data_stream.dataset), how to check the value?
// - if the constant_keyword is defined in the preview, it should be the same
if diff := cmp.Diff(actualMappings.Properties, previewMappings.Properties, transformJSON); diff == "" {
logger.Debug("No changes found in mappings")
return errs.Unique()
}
var rawPreview map[string]any
err = json.Unmarshal(previewMappings.Properties, &rawPreview)
if err != nil {
errs = append(errs, fmt.Errorf("failed to unmarshal preview mappings (index template %s): %w", v.indexTemplateName, err))
return errs.Unique()
}
var rawActual map[string]any
err = json.Unmarshal(actualMappings.Properties, &rawActual)
if err != nil {
errs = append(errs, fmt.Errorf("failed to unmarshal actual mappings (data stream %s): %w", v.dataStreamName, err))
return errs.Unique()
}
var rawDynamicTemplates []map[string]any
err = json.Unmarshal(actualMappings.DynamicTemplates, &rawDynamicTemplates)
if err != nil {
errs = append(errs, fmt.Errorf("failed to unmarshal actual dynamic templates (data stream %s): %w", v.dataStreamName, err))
return errs.Unique()
}
mappingErrs := v.compareMappings("", false, rawPreview, rawActual, rawDynamicTemplates)
errs = append(errs, mappingErrs...)
if len(errs) > 0 {
return errs.Unique()
}
return nil
}
func currentMappingPath(path, key string) string {
if path == "" {
return key
}
return fmt.Sprintf("%s.%s", path, key)
}
func fieldNameFromPath(path string) string {
if !strings.Contains(path, ".") {
return path
}
elems := strings.Split(path, ".")
return elems[len(elems)-1]
}
func mappingParameter(field string, definition map[string]any) string {
fieldValue, ok := definition[field]
if !ok {
return ""
}
value, ok := fieldValue.(string)
if !ok {
return ""
}
return value
}
func isEmptyObject(definition map[string]any) bool {
// Example:
// "_tmp": {
// "type": "object"
// },
if len(definition) != 1 {
return false
}
return mappingParameter("type", definition) == "object"
}
func isObject(definition map[string]any) bool {
// Example:
// "http": {
// "properties": {
// "request": {
// "properties": {
// "method": {
// "type": "keyword",
// "ignore_above": 1024
// }
// }
// }
// }
// }
field, ok := definition["properties"]
if !ok {
return false
}
if _, ok = field.(map[string]any); !ok {
return false
}
return true
}
func isObjectFullyDynamic(definition map[string]any) bool {
// Example:
// "labels": {
// "type": "object",
// "dynamic": "true"
// },
fieldType := mappingParameter("type", definition)
fieldDynamic := mappingParameter("dynamic", definition)
if fieldType != "object" {
return false
}
if fieldDynamic != "true" {
return false
}
field, ok := definition["properties"]
if !ok {
return true
}
props, ok := field.(map[string]any)
if !ok {
return false
}
// It should not have properties
// https://www.elastic.co/guide/en/elasticsearch/reference/8.16/dynamic.html
if len(props) != 0 {
return false
}
return true
}
func isMultiFields(definition map[string]any) bool {
// Example:
// "path": {
// "type": "keyword",
// "fields": {
// "text": {
// "type": "match_only_text"
// }
// }
// },
fieldType := mappingParameter("type", definition)
if fieldType == "" {
return false
}
field, ok := definition["fields"]
if !ok {
return false
}
if _, ok = field.(map[string]any); !ok {
return false
}
return true
}
func isNumberTypeField(previewType, actualType string) bool {
if slices.Contains([]string{"float", "long", "double"}, previewType) && slices.Contains([]string{"float", "long", "double"}, string(actualType)) {
return true
}
return false
}
func (v *MappingValidator) validateMappingInECSSchema(currentPath string, definition map[string]any) multierror.Error {
found := FindElementDefinition(currentPath, v.Schema)
if found == nil {
return multierror.Error{fmt.Errorf("field definition not found")}
}
if found.External != "ecs" {
return multierror.Error{fmt.Errorf("field definition not found")}
}
errs := compareFieldDefinitionWithECS(currentPath, found, definition)
if len(errs) > 0 {
return errs
}
// Currently, validation of multi-fields with ECS is skipped
// Any multi-field found in the actual mapping must come from a definition from the preview or
// a dynamic template, therefore there is a definition for it.
// Example of this validation can be found at:
// https://github.com/elastic/elastic-package/pull/2285/commits/51656120
return nil
}
func compareFieldDefinitionWithECS(currentPath string, ecs *FieldDefinition, actual map[string]any) multierror.Error {
var errs multierror.Error
actualType := mappingParameter("type", actual)
if ecs.Type != actualType {
// exceptions related to numbers
if !isNumberTypeField(ecs.Type, actualType) {
errs = append(errs, fmt.Errorf("actual mapping type (%s) does not match with ECS definition type: %s", actualType, ecs.Type))
} else {
logger.Debugf("Allowed number fields with different types (ECS %s - actual %s)", string(ecs.Type), string(actualType))
}
}
// Compare other parameters
metricType := mappingParameter("time_series_metric", actual)
if ecs.MetricType != metricType {
errs = append(errs, fmt.Errorf("actual mapping \"time_series_metric\" (%s) does not match with ECS definition value: %s", metricType, ecs.MetricType))
}
if len(errs) > 0 {
return errs
}
return nil
}
// flattenMappings returns all the mapping definitions found at "path" flattened including
// specific entries for multi fields too.
func flattenMappings(path string, definition map[string]any) (map[string]any, error) {
newDefs := map[string]any{}
if isMultiFields(definition) {
newDefs[path] = definition
// multi_fields are going to be validated directly with the dynamic templates
// or with ECS fields
return newDefs, nil
}
if !isObject(definition) {
newDefs[path] = definition
return newDefs, nil
}
childMappings, ok := definition["properties"].(map[string]any)
if !ok {
// it should not happen, it is already checked above
return nil, fmt.Errorf("invalid type for properties in path: %s", path)
}
for key, object := range childMappings {
currentPath := currentMappingPath(path, key)
// multi_fields are already managed above
// there is no need to manage that case here
value, ok := object.(map[string]any)
if ok {
other, err := flattenMappings(currentPath, value)
if err != nil {
return nil, err
}
for i, v := range other {
newDefs[i] = v
}
}
}
return newDefs, nil
}
func getMappingDefinitionsField(field string, definition map[string]any) (map[string]any, error) {
anyValue, ok := definition[field]
if !ok {
return nil, fmt.Errorf("field not found: %q", field)
}
object, ok := anyValue.(map[string]any)
if !ok {
return nil, fmt.Errorf("unexpected type found for field %q: %T ", field, anyValue)
}
return object, nil
}
func validateConstantKeywordField(path string, preview, actual map[string]any) (bool, error) {
isConstantKeyword := false
if mappingParameter("type", actual) != "constant_keyword" {
return isConstantKeyword, nil
}
isConstantKeyword = true
if mappingParameter("type", preview) != "constant_keyword" {
return isConstantKeyword, fmt.Errorf("invalid type for %q: no constant_keyword type set in preview mapping", path)
}
actualValue := mappingParameter("value", actual)
previewValue := mappingParameter("value", preview)
if previewValue == "" {
// skip validating value if preview does not have that parameter defined
return isConstantKeyword, nil
}
if previewValue != actualValue {
// This should also be detected by the failure storage (if available)
// or no documents being ingested
return isConstantKeyword, fmt.Errorf("invalid value in field %q: constant_keyword value in preview %q does not match the actual mapping value %q", path, previewValue, actualValue)
}
return isConstantKeyword, nil
}
func (v *MappingValidator) compareMappings(path string, couldBeParametersDefinition bool, preview, actual map[string]any, dynamicTemplates []map[string]any) multierror.Error {
var errs multierror.Error
isConstantKeywordType, err := validateConstantKeywordField(path, preview, actual)
if err != nil {
return multierror.Error{err}
}
if isConstantKeywordType {
return nil
}
if slices.Contains(v.exceptionFields, path) {
logger.Tracef("Found exception field, skip its validation: %q", path)
return nil
}
if isObjectFullyDynamic(actual) {
logger.Warnf("Dynamic object found but no fields ingested under path: \"%s.*\"", path)
return nil
}
// Ensure to validate properties from an object (subfields) in the right location of the mappings
// there could be "sub-fields" with name "properties" too
if couldBeParametersDefinition && isObject(actual) {
if isObjectFullyDynamic(preview) {
dynamicErrors := v.validateMappingsNotInPreview(path, actual, dynamicTemplates)
errs = append(errs, dynamicErrors...)
if len(errs) > 0 {
return errs.Unique()
}
return nil
} else if !isObject(preview) {
errs = append(errs, fmt.Errorf("undefined field mappings found in path: %q", path))
return errs.Unique()
}
previewProperties, err := getMappingDefinitionsField("properties", preview)
if err != nil {
errs = append(errs, fmt.Errorf("found invalid properties type in preview mappings for path %q: %w", path, err))
}
actualProperties, err := getMappingDefinitionsField("properties", actual)
if err != nil {
errs = append(errs, fmt.Errorf("found invalid properties type in actual mappings for path %q: %w", path, err))
}
compareErrors := v.compareMappings(path, false, previewProperties, actualProperties, dynamicTemplates)
errs = append(errs, compareErrors...)
if len(errs) == 0 {
return nil
}
return errs.Unique()
}
containsMultifield := isMultiFields(actual)
if containsMultifield {
if !isMultiFields(preview) {
errs = append(errs, fmt.Errorf("field %q is undefined: not found multi_fields definitions in preview mapping", path))
return errs.Unique()
}
previewFields, err := getMappingDefinitionsField("fields", preview)
if err != nil {
errs = append(errs, fmt.Errorf("found invalid multi_fields type in preview mappings for path %q: %w", path, err))
}
actualFields, err := getMappingDefinitionsField("fields", actual)
if err != nil {
errs = append(errs, fmt.Errorf("found invalid multi_fields type in actual mappings for path %q: %w", path, err))
}
compareErrors := v.compareMappings(path, false, previewFields, actualFields, dynamicTemplates)
errs = append(errs, compareErrors...)
// not returning here to keep validating the other fields of this object if any
}
// Compare and validate the elements under "properties": objects or fields and its parameters
propertiesErrs := v.validateObjectProperties(path, false, containsMultifield, preview, actual, dynamicTemplates)
errs = append(errs, propertiesErrs...)
if len(errs) == 0 {
return nil
}
return errs.Unique()
}
func (v *MappingValidator) validateObjectProperties(path string, couldBeParametersDefinition, containsMultifield bool, preview, actual map[string]any, dynamicTemplates []map[string]any) multierror.Error {
var errs multierror.Error
for key, value := range actual {
if containsMultifield && key == "fields" {
// already checked
continue
}
currentPath := currentMappingPath(path, key)
if skipValidationForField(currentPath) {
continue
}
// This key (object) does not exist in the preview mapping
if _, ok := preview[key]; !ok {
if childField, ok := value.(map[string]any); ok {
if isEmptyObject(childField) {
// TODO: Should this be raised as an error instead?
logger.Debugf("field %q skipped: empty object without definition in the preview", currentPath)
continue
}
ecsErrors := v.validateMappingsNotInPreview(currentPath, childField, dynamicTemplates)
errs = append(errs, ecsErrors...)
continue
}
// Field or Parameter not defined
errs = append(errs, fmt.Errorf("field %q is undefined", currentPath))
continue
}
fieldErrs := v.validateObjectMappingAndParameters(preview[key], value, currentPath, dynamicTemplates, true)
errs = append(errs, fieldErrs...)
}
if len(errs) == 0 {
return nil
}
return errs.Unique()
}
// validateMappingsNotInPreview validates the object and the nested objects in the current path with other resources
// like ECS schema, dynamic templates or local fields defined in the package (type array).
func (v *MappingValidator) validateMappingsNotInPreview(currentPath string, childField map[string]any, rawDynamicTemplates []map[string]any) multierror.Error {
var errs multierror.Error
flattenFields, err := flattenMappings(currentPath, childField)
if err != nil {
errs = append(errs, err)
return errs
}
dynamicTemplates, err := parseDynamicTemplates(rawDynamicTemplates)
if err != nil {
return multierror.Error{fmt.Errorf("failed to parse dynamic templates: %w", err)}
}
for fieldPath, object := range flattenFields {
if slices.Contains(v.exceptionFields, fieldPath) {
logger.Tracef("Found exception field, skip its validation (not present in preview): %q", fieldPath)
return nil
}
def, ok := object.(map[string]any)
if !ok {
errs = append(errs, fmt.Errorf("invalid field definition/mapping for path: %q", fieldPath))
continue
}
if isEmptyObject(def) {
logger.Tracef("Skip field which value is an empty object: %q", fieldPath)
continue
}
// validate whether or not the field has a corresponding dynamic template
if len(rawDynamicTemplates) > 0 {
err := v.matchingWithDynamicTemplates(fieldPath, def, dynamicTemplates)
if err == nil {
continue
}
}
// validate whether or not all fields under this key are defined in ECS
ecsErrs := v.validateMappingInECSSchema(fieldPath, def)
if len(ecsErrs) > 0 {
for _, e := range ecsErrs {
errs = append(errs, fmt.Errorf("field %q is undefined: %w", fieldPath, e))
}
}
}
return errs.Unique()
}
// matchingWithDynamicTemplates validates a given definition (currentPath) with a set of dynamic templates.
// The dynamic templates parameters are based on https://www.elastic.co/guide/en/elasticsearch/reference/8.17/dynamic-templates.html
func (v *MappingValidator) matchingWithDynamicTemplates(currentPath string, definition map[string]any, dynamicTemplates []dynamicTemplate) error {
for _, template := range dynamicTemplates {
matches, err := template.Matches(currentPath, definition)
if err != nil {
return fmt.Errorf("failed to validate %q with dynamic template %q: %w", currentPath, template.name, err)
}
if !matches {
// Look for another dynamic template
continue
}
// Check that all parameters match (setting no dynamic templates to avoid recursion)
errs := v.validateObjectMappingAndParameters(template.mapping, definition, currentPath, []map[string]any{}, true)
if errs != nil {
// Look for another dynamic template
continue
}
return nil
}
return fmt.Errorf("no template matching for path: %q", currentPath)
}
// validateObjectMappingAndParameters validates the current object or field parameter (currentPath) comparing the values
// in the actual mapping with the values in the preview mapping.
func (v *MappingValidator) validateObjectMappingAndParameters(previewValue, actualValue any, currentPath string, dynamicTemplates []map[string]any, couldBeParametersDefinition bool) multierror.Error {
var errs multierror.Error
switch actualValue.(type) {
case map[string]any:
// there could be other objects nested under this key/path
previewField, ok := previewValue.(map[string]any)
if !ok {
errs = append(errs, fmt.Errorf("unexpected type in preview mappings for path: %q", currentPath))
}
actualField, ok := actualValue.(map[string]any)
if !ok {
errs = append(errs, fmt.Errorf("unexpected type in actual mappings for path: %q", currentPath))
}
errs = append(errs, v.compareMappings(currentPath, couldBeParametersDefinition, previewField, actualField, dynamicTemplates)...)
case any:
// Validate each setting/parameter of the mapping
// If a mapping exist in both preview and actual, they should be the same. But forcing to compare each parameter just in case
if previewValue == actualValue {
return nil
}
// Get the string representation of the types via JSON Marshalling
previewData, err := json.Marshal(previewValue)
if err != nil {
errs = append(errs, fmt.Errorf("error marshalling preview value %s (path: %s): %w", previewValue, currentPath, err))
return errs
}
actualData, err := json.Marshal(actualValue)
if err != nil {
errs = append(errs, fmt.Errorf("error marshalling actual value %s (path: %s): %w", actualValue, currentPath, err))
return errs
}
// Strings from `json.Marshal` include double quotes, so they need to be removed (e.g. "\"float\"")
previewDataString := strings.ReplaceAll(string(previewData), "\"", "")
actualDataString := strings.ReplaceAll(string(actualData), "\"", "")
// exceptions related to numbers
// https://github.com/elastic/elastic-package/blob/8cc126ae5015dd336b22901c365e8c98db4e7c15/internal/fields/validate.go#L1234-L1247
if isNumberTypeField(previewDataString, actualDataString) {
logger.Debugf("Allowed number fields with different types (preview %s - actual %s)", previewDataString, actualDataString)
return nil
}
errs = append(errs, fmt.Errorf("unexpected value found in mapping for field %q: preview mappings value (%s) different from the actual mappings value (%s)", currentPath, string(previewData), string(actualData)))
}
return errs
}