validator/validators/basic/basic_validator.go (226 lines of code) (raw):

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT package basic import ( "fmt" "log" "strings" "time" cwtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" cwltypes "github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs/types" "github.com/aws/aws-sdk-go/aws" "go.uber.org/multierr" AppSignalMetrics "github.com/aws/amazon-cloudwatch-agent-test/test/metric" "github.com/aws/amazon-cloudwatch-agent-test/util/awsservice" "github.com/aws/amazon-cloudwatch-agent-test/util/common" "github.com/aws/amazon-cloudwatch-agent-test/util/common/traces" "github.com/aws/amazon-cloudwatch-agent-test/validator/models" "github.com/aws/amazon-cloudwatch-agent-test/validator/validators/util" ) const metricErrorBound = 0.1 const AppSignalNamespace = "AppSignals" type BasicValidator struct { vConfig models.ValidateConfig } var _ models.ValidatorFactory = (*BasicValidator)(nil) func NewBasicValidator(vConfig models.ValidateConfig) models.ValidatorFactory { return &BasicValidator{ vConfig: vConfig, } } func (s *BasicValidator) GenerateLoad() error { var ( metricSendingInterval = time.Minute logGroup = awsservice.GetInstanceId() metricNamespace = s.vConfig.GetMetricNamespace() dataRate = s.vConfig.GetDataRate() dataType = s.vConfig.GetDataType() agentCollectionPeriod = s.vConfig.GetAgentCollectionPeriod() agentConfigFilePath = s.vConfig.GetCloudWatchAgentConfigPath() receiver = s.vConfig.GetPluginsConfig()[0] ) switch dataType { case "logs": return common.StartLogWrite(agentConfigFilePath, agentCollectionPeriod, metricSendingInterval, dataRate) case "traces": return traces.StartTraceGeneration(receiver, agentConfigFilePath, agentCollectionPeriod, metricSendingInterval) default: // Sending metrics based on the receivers; however, for scraping plugin (e.g prometheus), we would need to scrape it instead of sending return common.StartSendingMetrics(receiver, agentCollectionPeriod, metricSendingInterval, dataRate, logGroup, metricNamespace) } } func (s *BasicValidator) CheckData(startTime, endTime time.Time) error { var ( multiErr error ec2InstanceId = awsservice.GetInstanceId() metricNamespace = s.vConfig.GetMetricNamespace() validationMetric = s.vConfig.GetMetricValidation() logValidations = s.vConfig.GetLogValidation() ) for _, metric := range validationMetric { metricDimensions := []cwtypes.Dimension{} //App Signal Metrics don't have instanceid dimension if !isAppSignalMetric(metric) { metricDimensions = []cwtypes.Dimension{ { Name: aws.String("InstanceId"), Value: aws.String(ec2InstanceId), }, } } for _, dimension := range metric.MetricDimension { metricDimensions = append(metricDimensions, cwtypes.Dimension{ Name: aws.String(dimension.Name), Value: aws.String(dimension.Value), }) } //App Signals metric testing (This is because we want to use a different checking method (same that was done for linux test)) if metric.MetricName == "Latency" || metric.MetricName == "Fault" || metric.MetricName == "Error" { err := s.ValidateAppSignalMetrics(metric, metricDimensions) if err != nil { multiErr = multierr.Append(multiErr, err) } else { fmt.Println("App Signal Metrics are correct!") } } else { err := s.ValidateMetric(metric.MetricName, metricNamespace, metricDimensions, metric.MetricValue, metric.MetricSampleCount, startTime, endTime) if err != nil { return err } } } err := s.ValidateTracesMetrics() if err != nil { multiErr = multierr.Append(multiErr, err) } else { fmt.Println("Traces Metrics are correct!") } for _, logValidation := range logValidations { err := s.ValidateLogs(logValidation.LogStream, logValidation.LogValue, logValidation.LogLevel, logValidation.LogSource, logValidation.LogLines, startTime, endTime) if err != nil { multiErr = multierr.Append(multiErr, err) } } return multiErr } func (s *BasicValidator) Cleanup() error { var ( dataType = s.vConfig.GetDataType() ec2InstanceId = awsservice.GetInstanceId() ) switch dataType { case "logs": awsservice.DeleteLogGroup(ec2InstanceId) } return nil } func isAppSignalMetric(metric models.MetricValidation) bool { if metric.MetricName == "Latency" || metric.MetricName == "Fault" || metric.MetricName == "Error" { return true } return false } func (s *BasicValidator) ValidateAppSignalMetrics(metric models.MetricValidation, metricDimensions []cwtypes.Dimension) error { if metric.MetricName == "Latency" || metric.MetricName == "Fault" || metric.MetricName == "Error" { fetcher := AppSignalMetrics.MetricValueFetcher{} values, err := fetcher.Fetch(AppSignalNamespace, metric.MetricName, metricDimensions, "Sum", 60) if err != nil { return err } if !AppSignalMetrics.IsAllValuesGreaterThanOrEqualToExpectedValue(metric.MetricName, values, 0) { fmt.Printf("Error values are not the epected values%v", err) return err } } return nil } func (s *BasicValidator) ValidateTracesMetrics() error { lookbackDuration := time.Duration(-5) * time.Minute serviceName := "service-name" serviceType := "AWS::EC2::Instance" //filtering traces filterExpression := fmt.Sprintf("(service(id(name: \"%s\", type: \"%s\")))", serviceName, serviceType) timeNow := time.Now() traceIds, err := awsservice.GetTraceIDs(timeNow.Add(lookbackDuration), timeNow, filterExpression) if err != nil { fmt.Printf("error getting trace ids: %v", err) return err } else { fmt.Printf("Trace IDs: %v\n", traceIds) if len(traceIds) > 0 { fmt.Println("Trace IDs look good") } else { return err } } return nil } func (s *BasicValidator) ValidateLogs(logStream, logLine, logLevel, logSource string, expectedMinimumEventCount int, startTime, endTime time.Time) error { logGroup := awsservice.GetInstanceId() log.Printf("Start to validate that substring '%s' has at least %d log event(s) within log group %s, log stream %s, between %v and %v", logLine, expectedMinimumEventCount, logGroup, logStream, startTime, endTime) return awsservice.ValidateLogs( logGroup, logStream, &startTime, &endTime, awsservice.AssertLogsNotEmpty(), awsservice.AssertNoDuplicateLogs(), func(events []cwltypes.OutputLogEvent) error { var actualEventCount int for _, event := range events { message := *event.Message switch logSource { case "WindowsEvents": if logLevel != "" && strings.Contains(message, logLine) && strings.Contains(message, logLevel) { actualEventCount += 1 } default: if strings.Contains(message, logLine) { actualEventCount += 1 } } } if actualEventCount < expectedMinimumEventCount { return fmt.Errorf("log event count for %q in %s/%s between %v and %v is %d which is less than the expected %d", logLine, logGroup, logStream, startTime, endTime, actualEventCount, expectedMinimumEventCount) } return nil }, ) } func (s *BasicValidator) ValidateMetric(metricName, metricNamespace string, metricDimensions []cwtypes.Dimension, metricValue float64, metricSampleCount int, startTime, endTime time.Time) error { var ( boundAndPeriod = s.vConfig.GetAgentCollectionPeriod().Seconds() ) metricQueries := s.buildMetricQueries(metricName, metricNamespace, metricDimensions) log.Printf("Start to collect and validate metric %s with the namespace %s, start time %v and end time %v \n", metricName, metricNamespace, startTime, endTime) metrics, err := awsservice.GetMetricData(metricQueries, startTime, endTime) if err != nil { return err } if len(metrics.MetricDataResults) == 0 || len(metrics.MetricDataResults[0].Values) == 0 { return fmt.Errorf("\n getting metric %s failed with the namespace %s and dimension %v", metricName, metricNamespace, util.LogCloudWatchDimension(metricDimensions)) } // Validate if the metrics are not dropping any metrics and able to backfill within the same minute (e.g if the memory_rss metric is having collection_interval 1 // , it will need to have 60 sample counts - 1 datapoint / second) if ok := awsservice.ValidateSampleCount(metricName, metricNamespace, metricDimensions, startTime, endTime, metricSampleCount, metricSampleCount, int32(boundAndPeriod)); !ok { return fmt.Errorf("\n metric %s is not within sample count bound [ %d, %d]", metricName, metricSampleCount, metricSampleCount) } // Validate if the corresponding metrics are within the acceptable range [acceptable value +- 10%] actualMetricValue := metrics.MetricDataResults[0].Values[0] upperBoundValue := metricValue * (1 + metricErrorBound) lowerBoundValue := metricValue * (1 - metricErrorBound) if metricValue != 0.0 && (actualMetricValue < lowerBoundValue || actualMetricValue > upperBoundValue) { return fmt.Errorf("\n metric %s value %f is different from the actual value %f", metricName, metricValue, metrics.MetricDataResults[0].Values[0]) } return nil } func (s *BasicValidator) buildMetricQueries(metricName, metricNamespace string, metricDimensions []cwtypes.Dimension) []cwtypes.MetricDataQuery { var metricQueryPeriod = int32(s.vConfig.GetAgentCollectionPeriod().Seconds()) metricInformation := cwtypes.Metric{ Namespace: aws.String(metricNamespace), MetricName: aws.String(metricName), Dimensions: metricDimensions, } metricDataQueries := []cwtypes.MetricDataQuery{ { MetricStat: &cwtypes.MetricStat{ Metric: &metricInformation, Period: &metricQueryPeriod, Stat: aws.String(string(models.AVERAGE)), }, Id: aws.String(strings.ToLower(metricName)), }, } return metricDataQueries }