internal/vulnerability/events_creator.go (384 lines of code) (raw):
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package vulnerability
import (
"context"
"encoding/json"
"fmt"
"maps"
"slices"
"strings"
"time"
dbTypes "github.com/aquasecurity/trivy-db/pkg/types"
trivyVul "github.com/aquasecurity/trivy-db/pkg/vulnsrc/vulnerability"
trivyTypes "github.com/aquasecurity/trivy/pkg/types"
"github.com/elastic/beats/v7/libbeat/beat"
libevents "github.com/elastic/beats/v7/libbeat/beat/events"
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/cloudbeat/internal/config"
"github.com/elastic/cloudbeat/internal/dataprovider"
"github.com/elastic/cloudbeat/internal/infra/clog"
"github.com/elastic/cloudbeat/internal/resources/fetching"
"github.com/elastic/cloudbeat/internal/resources/providers/awslib/ec2"
"github.com/elastic/cloudbeat/internal/transformer"
)
type Enricher interface {
EnrichEvent(event *beat.Event) error
}
type Vulnerability struct {
Cvss VendorCVSS `json:"cvss,omitempty"`
DataSource *DataSource `json:"data_source,omitempty"`
Scanner Scanner `json:"scanner,omitempty"`
Score Score `json:"score,omitempty"`
Package Package `json:"package,omitempty"`
Cwe []string `json:"cwe,omitempty"`
ID string `json:"id,omitempty"`
Title string `json:"title,omitempty"`
Enumeration string `json:"enumeration,omitempty"`
Reference string `json:"reference,omitempty"`
Description string `json:"description,omitempty"`
Severity string `json:"severity,omitempty"`
Classification string `json:"classification,omitempty"`
PublishedDate *time.Time `json:"published_date,omitempty"`
ReportId int64 `json:"report_id,omitempty"`
// Deprecated field Class renamed to Category
Class trivyTypes.ResultClass `json:"class,omitempty"`
Category trivyTypes.ResultClass `json:"category,omitempty"`
}
// We aren't using the cloud processor here
// because we want to assign information regarding
// the scanned resource and not the scanner
type CloudSection struct {
Service Service `json:"service,omitempty"`
Machine Machine `json:"machine,omitempty"`
Tags map[string]string `json:"Tags,omitempty"`
AvailabilityZone *string `json:"availability_zone,omitempty"`
Region string `json:"region,omitempty"`
Instance Instance `json:"instance,omitempty"`
Security Security `json:"Security,omitempty"`
}
type NetworkSection struct {
PrivateIp *string `json:"Private_ip,omitempty"`
PublicIp *string `json:"Public_ip,omitempty"`
MacAddresses []string `json:"Mac_addresses,omitempty"`
}
type Security struct {
SecurityGroups []ec2.SecurityGroupInfo `json:"security_groups,omitempty"`
}
// We aren't using the cloud processor here
// because we want to assign information regarding
// the scanned resource and not the scanner
type HostSection struct {
Architecture string `json:"architecture,omitempty"`
Os Os `json:"os,omitempty"`
Name string `json:"name,omitempty"`
}
type Os struct {
Platform *string `json:"platform,omitempty"`
}
type Service struct {
Name string `json:"name,omitempty"`
}
type Instance struct {
Id string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
}
type Machine struct {
Type string `json:"type,omitempty"`
Authentication AuthInfo `json:"Authentication,omitempty"`
LaunchTime *time.Time `json:"Launch_time,omitempty"`
Image *string `json:"Image,omitempty"`
}
type AuthInfo struct {
Key *string `json:"key,omitempty"`
}
type CVSS struct {
V2Vector string `json:"V2Vector,omitempty"`
V3Vector string `json:"V3Vector,omitempty"`
V2Score float64 `json:"V2Score,omitempty"`
V3Score float64 `json:"V3Score,omitempty"`
}
type VendorCVSS map[dbTypes.SourceID]CVSS
type DataSource struct {
ID dbTypes.SourceID `json:",omitempty"`
Name string `json:",omitempty"`
URL string `json:",omitempty"`
}
type Package struct {
Version string `json:"version,omitempty"`
Name string `json:"name,omitempty"`
Type string `json:"type,omitempty"`
Path string `json:"path,omitempty"`
FixedVersion string `json:"fixed_version,omitempty"`
}
type Scanner struct {
Version string `json:"version,omitempty"`
Vendor string `json:"vendor,omitempty"`
}
type Score struct {
Base float64 `json:"base,omitempty"`
Version string `json:"version,omitempty"`
}
// Deprecated field Resource transferred to Cloud and Host sections
type Resource struct {
ID string `json:"id"`
Name string `json:"name,omitempty"`
}
const (
vulScannerVersion = "v0.35.0"
vulScannerVendor = "Trivy"
vulScoreSystemClass = "CVSS"
vectorHeader = "CVSS:"
vulEcsCategory = "vulnerability"
)
type EventsCreator struct {
log *clog.Logger
cloudDataProvider dataprovider.CommonDataProvider
commonDataProvider Enricher
ch chan []beat.Event
index string
}
func NewEventsCreator(log *clog.Logger, cfg *config.Config, bdp dataprovider.CommonDataProvider, cdp dataprovider.ElasticCommonDataProvider) EventsCreator {
return EventsCreator{
log: log,
commonDataProvider: dataprovider.NewEnricher(cdp),
cloudDataProvider: bdp,
ch: make(chan []beat.Event),
index: cfg.Datastream(),
}
}
// TODO: Replace sequence with more generic approach
func (e EventsCreator) CreateEvents(ctx context.Context, scanResults chan []Result) {
defer close(e.ch)
for {
select {
case <-ctx.Done():
e.log.Info("EventsCreator.CreateEvents context canceled")
return
case data, ok := <-scanResults:
if !ok {
e.log.Info("EventsCreator.CreateEvents channel is closed")
return
}
events := make([]beat.Event, 0, len(data))
for _, res := range data {
events = append(events, e.generateEvent(res.reportResult, res.vulnerability, res.snapshot.Instance, res.seq))
}
select {
case <-ctx.Done():
e.log.Info("EventsCreator.CreateEvents context canceled")
return
case e.ch <- events:
}
}
}
}
func (e EventsCreator) GetChan() chan []beat.Event {
return e.ch
}
func (e EventsCreator) generateEvent(reportResult trivyTypes.Result, vul trivyTypes.DetectedVulnerability, instance ec2.Ec2Instance, seq time.Time) beat.Event {
timestamp := time.Now().UTC()
sequence := seq.Unix()
cloudSec, err := convertStructToMapStr(CloudSection{
Instance: Instance{
Id: instance.GetResourceId(),
Name: instance.GetResourceName(),
},
Service: Service{
// TODO: Support more services
Name: "AWS EC2",
},
Machine: Machine{
Type: string(instance.InstanceType),
Authentication: AuthInfo{
Key: instance.KeyName,
},
LaunchTime: instance.LaunchTime,
Image: instance.ImageId,
},
AvailabilityZone: getAvailabilityZone(instance),
Region: instance.Region,
Tags: instance.GetResourceTags(),
Security: Security{
SecurityGroups: instance.GetResourceSecurityGroups(),
},
})
// TODO: Should we fail the event if we can't enrich the cloud section?
if err != nil {
e.log.Errorf("failed to enrich cloud section: %v", err)
}
hostSec, err := convertStructToMapStr(HostSection{
Architecture: string(instance.Architecture),
Os: Os{
// TODO: Investigate how to get the full os name
// Property "Platform PlatformValues" shows
// the value Windows for Windows instances; otherwise blank
// this only gives us information if the platform is windows or not
// picked "PlatformDetails" as it gives us more information
Platform: instance.PlatformDetails,
},
Name: instance.GetResourceName(),
})
// TODO: Should we fail the event if we can't enrich the host section?
if err != nil {
e.log.Errorf("failed to enrich host section: %v", err)
}
networkSec, err := convertStructToMapStr(NetworkSection{
PrivateIp: instance.PrivateIpAddress,
PublicIp: instance.PublicIpAddress,
MacAddresses: instance.GetResourceMacAddresses(),
})
// TODO: Should we fail the event if we can't enrich the network section?
if err != nil {
e.log.Errorf("failed to enrich network section: %v", err)
}
event := beat.Event{
// TODO: Maybe configure or get from somewhere else?
Meta: mapstr.M{libevents.FieldMetaIndex: e.index},
Timestamp: timestamp,
Fields: mapstr.M{
// TODO: Replace sequence with more generic approach
"event": transformer.BuildECSEvent(sequence, timestamp, []string{vulEcsCategory}, transformer.EcsOutcomeSuccess),
// Deprecated replaced by cloud and host fields
"resource": Resource{
ID: instance.GetResourceId(),
Name: instance.GetResourceName(),
},
"package": Package{
Path: reportResult.Target,
Type: string(reportResult.Type),
Name: vul.PkgName,
Version: vul.InstalledVersion,
FixedVersion: vul.FixedVersion,
},
"vulnerability": Vulnerability{
// TODO: Replace sequence with more generic approach
// TODO: Do we need to add the ReportID duplication if we already have the sequence in event?
ReportId: sequence,
// Deprecated field Class renamed to Category
Class: reportResult.Class,
Category: reportResult.Class,
Cvss: getCVSS(vul),
DataSource: getDataSource(vul),
Scanner: Scanner{
// TODO: Populate with what?
Version: vulScannerVersion,
Vendor: vulScannerVendor,
},
Score: Score{
Base: e.getCVSSScore(vul),
Version: e.getCVSSVersion(vul),
},
Package: Package{ // kept for backward compatibility, new obj is under root
Name: vul.PkgName,
Version: vul.InstalledVersion,
FixedVersion: vul.FixedVersion,
},
Cwe: vul.CweIDs,
ID: vul.VulnerabilityID,
Title: vul.Title,
Enumeration: getIdentifierType(vul.VulnerabilityID),
Reference: getReference(vul),
Description: vul.Description,
Severity: vul.Severity,
Classification: vulScoreSystemClass,
PublishedDate: vul.PublishedDate,
},
// TODO: These sections might be overridden by the enricher of proccessor
"cloud": cloudSec,
"host": hostSec,
"network": networkSec,
},
}
err = e.cloudDataProvider.EnrichEvent(&event, fetching.ResourceMetadata{Region: instance.Region})
if err != nil {
e.log.Errorf("failed to enrich event with benchmark data provider: %v", err)
}
err = e.commonDataProvider.EnrichEvent(&event)
if err != nil {
e.log.Errorf("failed to enrich event with global data provider: %v", err)
}
return event
}
func getIdentifierType(id string) string {
s := strings.Split(id, "-")
if len(s) == 0 {
return ""
}
return s[0]
}
func (e EventsCreator) getCVSSVersion(vul trivyTypes.DetectedVulnerability) string {
v := e.getCVSSVector(vul)
if v == "" {
e.log.Warnf("No CVSS vector found for vulnerability: %s", vul.VulnerabilityID)
return ""
}
parts := strings.Split(v, "/")
for _, part := range parts {
if strings.HasPrefix(part, vectorHeader) {
// Extract the number after "CVSS:"
return strings.TrimPrefix(part, vectorHeader)
}
}
e.log.Warnf("no CVSS version found in vector: %s fallback to v2", v)
return "2.0"
}
func (e EventsCreator) getCVSSVector(vul trivyTypes.DetectedVulnerability) string {
var zeroVal string
if v := getCVSSValue(vul, func(cvss dbTypes.CVSS) string { return cvss.V3Vector }, zeroVal); v != zeroVal {
return v
}
e.log.Debugf("No CVSS v3 vector found for vulnerability: %s, fallback to v2", vul.VulnerabilityID)
return getCVSSValue(vul, func(cvss dbTypes.CVSS) string { return cvss.V2Vector }, zeroVal)
}
func (e EventsCreator) getCVSSScore(vul trivyTypes.DetectedVulnerability) float64 {
var zeroVal float64
if v := getCVSSValue(vul, func(cvss dbTypes.CVSS) float64 { return cvss.V3Score }, zeroVal); v != zeroVal {
return v
}
e.log.Debugf("No CVSS v3 score found for vulnerability: %s, fallback to v2", vul.VulnerabilityID)
return getCVSSValue(vul, func(cvss dbTypes.CVSS) float64 { return cvss.V2Score }, zeroVal)
}
const NVDVulnDetailBaseURL = "https://nvd.nist.gov/vuln/detail/"
func getReference(vul trivyTypes.DetectedVulnerability) string {
if _, ok := vul.CVSS[trivyVul.NVD]; !ok {
return vul.PrimaryURL
}
return fmt.Sprintf("%s%s", NVDVulnDetailBaseURL, vul.VulnerabilityID)
}
func getCVSSValue[T comparable](vul trivyTypes.DetectedVulnerability, value func(cvss dbTypes.CVSS) T, zeroVal T) T {
// Get all the sources
sources := slices.Collect(maps.Keys(vul.CVSS))
if len(sources) == 0 {
return zeroVal
}
// Detect the data source
var source dbTypes.SourceID
if vul.DataSource != nil {
source = vul.DataSource.ID
}
// Attempt to pull detected data source
if cvss, ok := vul.CVSS[source]; ok {
return value(cvss)
}
// Try NVD as a fallback if it exists
if cvss, ok := vul.CVSS[trivyVul.NVD]; ok {
return value(cvss)
}
// Attempt to pull any other data source
for _, s := range sources {
if cvss, ok := vul.CVSS[s]; ok {
return value(cvss)
}
}
return zeroVal
}
// https://github.com/elastic/cloudbeat/pull/848#discussion_r1165239774
func getCVSS(vul trivyTypes.DetectedVulnerability) VendorCVSS {
c := make(map[dbTypes.SourceID]CVSS)
if len(vul.CVSS) == 0 {
return c
}
for k, v := range vul.CVSS {
c[k] = CVSS{
V2Vector: v.V2Vector,
V3Vector: v.V3Vector,
V2Score: v.V2Score,
V3Score: v.V3Score,
}
}
return c
}
// https://github.com/elastic/cloudbeat/pull/848#discussion_r1165239774
func getDataSource(vul trivyTypes.DetectedVulnerability) *DataSource {
if vul.DataSource == nil {
return nil
}
return &DataSource{
ID: vul.DataSource.ID,
Name: vul.DataSource.Name,
URL: vul.DataSource.URL,
}
}
func getAvailabilityZone(ins ec2.Ec2Instance) *string {
if ins.Placement == nil {
return nil
}
return ins.Placement.AvailabilityZone
}
// This is a workaround because of mapstr.M implementation (toMapStr)
// We cannot use structs as values in mapstr.M
// Input is a struct, output is a map[string]any
// TODO: We need to find a better solution for this, it wastes resources
func convertStructToMapStr[S any](input S) (mapstr.M, error) {
out := make(mapstr.M)
// Decode and encode as JSON to normalized the types.
marshaled, err := json.Marshal(input)
if err != nil {
return nil, fmt.Errorf("convertStructToMapStr error marshalling to JSON: %w", err)
}
err = json.Unmarshal(marshaled, &out)
if err != nil {
return nil, fmt.Errorf("convertStructToMapStr error unmarshalling from JSON: %w", err)
}
return out, nil
}