cvefeed/cvecache.go (214 lines of code) (raw):
// Copyright (c) Facebook, Inc. and its affiliates.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cvefeed
import (
"sort"
"strings"
"sync"
"sync/atomic"
"unsafe"
"github.com/facebookincubator/flog"
"github.com/facebookincubator/nvdtools/wfn"
)
const cacheEvictPercentage = 0.1 // every eviction cycle invalidates this part of cache size at once
// Index maps the CPEs to the entries in the NVD feed they mentioned in
type Index map[string][]Vuln
// NewIndex creates new Index from a slice of CVE entries
func NewIndex(d Dictionary) Index {
idx := Index{}
for _, entry := range d {
set := map[string]bool{}
for _, cpe := range entry.Config() {
// Can happen, for instance, when the feed contains illegal binding of CPE name. Unfortunately, it happens to NVD,
// e.g. embedded ? in cpe:2.3:a:disney:where\\'s_my_perry?_free:1.5.1:*:*:*:*:android:*:* of CVE-2014-5606
if cpe == nil {
continue
}
product := cpe.Product
if wfn.HasWildcard(product) {
product = wfn.Any
}
if !set[product] {
set[product] = true
idx[product] = append(idx[product], entry)
}
}
}
return idx
}
// MatchResult stores CVE and a slice of CPEs that matched it
type MatchResult struct {
CVE Vuln
CPEs []*wfn.Attributes
}
// cachedCVEs stores cached CVEs, a channel to signal if the value is ready
type cachedCVEs struct {
res []MatchResult
ready chan struct{}
size int64
evictionIndex int // position in eviction queue
}
// updateResSize calculates the size of cached MatchResult and assigns it to cves.size
func (cves *cachedCVEs) updateResSize(key string) {
cves.size = int64(int(unsafe.Sizeof(key)) + len(key))
if cves == nil {
return
}
cves.size += int64(unsafe.Sizeof(cves.res))
for i := range cves.res {
cves.size += int64(unsafe.Sizeof(cves.res[i].CVE))
for _, attr := range cves.res[i].CPEs {
cves.size += int64(len(attr.Part)) + int64(unsafe.Sizeof(attr.Part))
cves.size += int64(len(attr.Vendor)) + int64(unsafe.Sizeof(attr.Vendor))
cves.size += int64(len(attr.Product)) + int64(unsafe.Sizeof(attr.Product))
cves.size += int64(len(attr.Version)) + int64(unsafe.Sizeof(attr.Version))
cves.size += int64(len(attr.Update)) + int64(unsafe.Sizeof(attr.Update))
cves.size += int64(len(attr.Edition)) + int64(unsafe.Sizeof(attr.Edition))
cves.size += int64(len(attr.SWEdition)) + int64(unsafe.Sizeof(attr.SWEdition))
cves.size += int64(len(attr.TargetHW)) + int64(unsafe.Sizeof(attr.TargetHW))
cves.size += int64(len(attr.Other)) + int64(unsafe.Sizeof(attr.Other))
cves.size += int64(len(attr.Language)) + int64(unsafe.Sizeof(attr.Language))
}
}
}
// Cache caches CVEs for known CPEs
type Cache struct {
// Used to compute the hit ratio
numLookups int64
numHits int64
// Actual cache data
data map[string]*cachedCVEs
evictionQ *evictionQueue
mu sync.Mutex
Dict Dictionary
Idx Index
MaxSize int64 // maximum size of the cache, 0 -- unlimited, -1 -- no caching
size int64 // current size of the cache
RequireVersion bool // ignore matching specifications that have Version == ANY
}
// NewCache creates new Cache instance with dictionary dict.
func NewCache(dict Dictionary) *Cache {
return &Cache{Dict: dict, evictionQ: new(evictionQueue)}
}
// SetRequireVersion sets if the instance of cache fails matching the dictionary
// records without Version attribute of CPE name.
// Returns a pointer to the instance of Cache, for easy chaining.
func (c *Cache) SetRequireVersion(requireVersion bool) *Cache {
c.RequireVersion = requireVersion
return c
}
// SetMaxSize sets maximum size of the cache to some pre-defined value,
// size of 0 disables eviction (makes the cache grow indefinitely),
// negative size disables caching.
// Returns a pointer to the instance of Cache, for easy chaining.
func (c *Cache) SetMaxSize(size int64) *Cache {
c.MaxSize = size
return c
}
// Get returns slice of CVEs for CPE names from cpes parameter;
// if CVEs aren't cached (and the feature is enabled) it finds them in cveDict and caches the results
func (c *Cache) Get(cpes []*wfn.Attributes) []MatchResult {
atomic.AddInt64(&c.numLookups, 1)
// negative max size of the cache disables caching
if c.MaxSize < 0 {
return c.match(cpes)
}
// otherwise, let's get to the business
key := cacheKey(cpes)
c.mu.Lock()
if c.data == nil {
c.data = make(map[string]*cachedCVEs)
}
cves := c.data[key]
if cves != nil {
atomic.AddInt64(&c.numHits, 1)
// value is being computed, wait till ready
c.mu.Unlock()
<-cves.ready
c.mu.Lock() // TODO: XXX: ugly, consider using atomic.Value instead
cves.evictionIndex = c.evictionQ.touch(cves.evictionIndex)
c.mu.Unlock()
return cves.res
}
// first request; the goroutine that sent it computes the value
cves = &cachedCVEs{ready: make(chan struct{})}
c.data[key] = cves
c.mu.Unlock()
// now other requests for same key wait on the channel, and the requests for the different keys aren't blocked
cves.res = c.match(cpes)
cves.updateResSize(key)
c.mu.Lock()
if c.MaxSize != 0 && c.size+cves.size > c.MaxSize {
c.evict(int64(cacheEvictPercentage*float64(c.MaxSize)) + cves.size)
}
c.size += cves.size
cves.evictionIndex = c.evictionQ.push(key)
c.mu.Unlock()
close(cves.ready)
return cves.res
}
// match will return all match results based on the given cpes
func (c *Cache) match(cpes []*wfn.Attributes) []MatchResult {
d := c.Dict
if c.Idx != nil {
d = c.dictFromIndex(cpes)
}
return c.matchDict(cpes, d)
}
// dictFromIndex creates CVE dictionary from entries indexed by CPE names
func (c *Cache) dictFromIndex(cpes []*wfn.Attributes) Dictionary {
d := Dictionary{}
if c.Idx == nil {
return d
}
knownEntries := map[Vuln]bool{}
addVulns := func(product string) {
for _, vuln := range c.Idx[product] {
if !knownEntries[vuln] {
knownEntries[vuln] = true
d[vuln.ID()] = vuln
}
}
}
for _, cpe := range cpes {
if cpe == nil { // should never happen
flog.Warning("nil CPE in list")
continue
}
// any of the CPEs having product=ANY would mean we need to match against the entire dictionary
if cpe.Product == wfn.Any {
return c.Dict
}
addVulns(cpe.Product)
}
addVulns(wfn.Any)
return d
}
// match matches the CPE names against internal vulnerability dictionary and returns a slice of matching resutls
func (c *Cache) matchDict(cpes []*wfn.Attributes, dict Dictionary) (results []MatchResult) {
for _, v := range dict {
if matches := v.Match(cpes, c.RequireVersion); len(matches) > 0 {
results = append(results, MatchResult{v, matches})
}
}
return results
}
// evict the least recently used records untile nbytes of capacity is achieved or no more records left.
// It is not concurrency-safe, c.mu should be locked before calling it.
func (c *Cache) evict(nbytes int64) {
for c.size > 0 && c.size+nbytes > c.MaxSize {
key := c.evictionQ.pop()
cd, ok := c.data[key]
if !ok { // should not happen
panic("attempted to evict non-existent record")
}
c.size -= cd.size
delete(c.data, key)
}
}
func cacheKey(cpes []*wfn.Attributes) string {
parts := make([]string, 0, len(cpes))
for _, cpe := range cpes {
if cpe == nil {
continue
}
var out strings.Builder
out.WriteString(cpe.Part)
out.WriteByte('^')
out.WriteString(cpe.Vendor)
out.WriteByte('^')
out.WriteString(cpe.Product)
out.WriteByte('^')
out.WriteString(cpe.Version)
out.WriteByte('^')
out.WriteString(cpe.Update)
out.WriteByte('^')
out.WriteString(cpe.Edition)
out.WriteByte('^')
out.WriteString(cpe.SWEdition)
out.WriteByte('^')
out.WriteString(cpe.TargetSW)
out.WriteByte('^')
out.WriteString(cpe.TargetHW)
out.WriteByte('^')
out.WriteString(cpe.Other)
out.WriteByte('^')
out.WriteString(cpe.Language)
parts = append(parts, out.String())
}
sort.Strings(parts)
return strings.Join(parts, "#")
}
// HitRatio returns the cache hit ratio, the number of cache hits to the number
// of lookups, as a percentage.
func (c *Cache) HitRatio() float64 {
if c.numLookups == 0 {
return 0
}
return float64(c.numHits) / float64(c.numLookups) * 100
}