tool/preprocess/match.go (420 lines of code) (raw):
// Copyright (c) 2024 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package preprocess
import (
"bufio"
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/config"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/data"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/errc"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/resource"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/util"
"github.com/dave/dst"
"golang.org/x/mod/module"
"golang.org/x/mod/semver"
)
type ruleMatcher struct {
availableRules map[string][]resource.InstRule
moduleVersions []*vendorModule // vendor used only
}
func newRuleMatcher() *ruleMatcher {
rules := make(map[string][]resource.InstRule)
for _, rule := range findAvailableRules() {
rules[rule.GetImportPath()] = append(rules[rule.GetImportPath()], rule)
}
if config.GetConf().Verbose {
util.Log("Available rules: %v", rules)
}
return &ruleMatcher{availableRules: rules}
}
type ruleHolder struct {
resource.InstBaseRule
resource.InstFileRule
resource.InstStructRule
resource.InstFuncRule
}
func loadRuleFile(path string) ([]resource.InstRule, error) {
content, err := util.ReadFile(path)
if err != nil {
currentDir, _ := os.Getwd()
err = errc.Adhere(err, "pwd", currentDir)
return nil, err
}
return loadRuleRaw(content)
}
func loadRuleRaw(content string) ([]resource.InstRule, error) {
var h []*ruleHolder
err := json.Unmarshal([]byte(content), &h)
if err != nil {
return nil, errc.New(errc.ErrInvalidJSON, err.Error())
}
rules := make([]resource.InstRule, 0)
for _, rule := range h {
if rule.StructType != "" {
r := &rule.InstStructRule
r.InstBaseRule = rule.InstBaseRule
rules = append(rules, r)
} else if rule.Function != "" {
r := &rule.InstFuncRule
r.InstBaseRule = rule.InstBaseRule
rules = append(rules, r)
} else if rule.FileName != "" {
r := &rule.InstFileRule
r.InstBaseRule = rule.InstBaseRule
rules = append(rules, r)
} else {
util.ShouldNotReachHereT("invalid rule type")
}
}
return rules, nil
}
func loadDefaultRules() []resource.InstRule {
rules, err := loadRuleRaw(data.UseDefaultRuleJson())
if err != nil {
util.Log("Failed to load default rules: %v", err)
return nil
}
return rules
}
func findAvailableRules() []resource.InstRule {
util.GuaranteeInPreprocess()
// Disable all instrumentation rules and rebuild the whole project to restore
// all instrumentation actions, this also reverts the modification on Golang
// runtime package.
if config.GetConf().Restore {
return nil
}
rules := make([]resource.InstRule, 0)
// Load default rules unless explicitly disabled
if !config.GetConf().IsDisableDefault() {
defaultRules := loadDefaultRules()
rules = append(rules, defaultRules...)
}
// If rule files are provided, load them
if config.GetConf().RuleJsonFiles != "" {
// Load multiple rule files
if strings.Contains(config.GetConf().RuleJsonFiles, ",") {
ruleFiles := strings.Split(config.GetConf().RuleJsonFiles, ",")
for _, ruleFile := range ruleFiles {
r, err := loadRuleFile(ruleFile)
if err != nil {
util.Log("Failed to load rules: %v", err)
continue
}
rules = append(rules, r...)
}
return rules
}
// Load the one rule file
rs, err := loadRuleFile(config.GetConf().RuleJsonFiles)
if err != nil {
util.Log("Failed to load rules: %v", err)
return nil
}
rules = append(rules, rs...)
}
return rules
}
var versionRegexp = regexp.MustCompile(`@v\d+\.\d+\.\d+(-.*?)?/`)
func extractVersion(path string) string {
// Unify the path to Unix style
path = filepath.ToSlash(path)
version := versionRegexp.FindString(path)
if version == "" {
return ""
}
// Extract version number from the string
return version[1 : len(version)-1]
}
// splitVersionRange splits the version range into two parts, start and end.
func splitVersionRange(vr string) (string, string) {
util.Assert(strings.Contains(vr, ","), "invalid version range format")
util.Assert(strings.Contains(vr, "["), "invalid version range format")
util.Assert(strings.Contains(vr, ")"), "invalid version range format")
start := vr[1:strings.Index(vr, ",")]
end := vr[strings.Index(vr, ",")+1 : len(vr)-1]
return "v" + start, "v" + end
}
// matchVersion checks if the version string matches the version range in the
// rule. The version range is in format [start, end), where start is inclusive
// and end is exclusive. If the rule version string is empty, it always matches.
func matchVersion(version string, ruleVersion string) (bool, error) {
// Fast path, always match if the rule version is not specified
if ruleVersion == "" {
return true, nil
}
// Check if both rule version and package version are in sane
if !strings.Contains(version, "v") {
return false, errc.New(errc.ErrMatchRule,
fmt.Sprintf("invalid version %v", version))
}
if !strings.Contains(ruleVersion, "[") ||
!strings.Contains(ruleVersion, ")") ||
!strings.Contains(ruleVersion, ",") ||
strings.Contains(ruleVersion, "v") {
return false, errc.New(errc.ErrMatchRule,
fmt.Sprintf("invalid rule version %v", ruleVersion))
}
// Remove extra whitespace from the rule version string
ruleVersion = strings.ReplaceAll(ruleVersion, " ", "")
// Compare the version with the rule version, the rule version is in the
// format [start, end), where start is inclusive and end is exclusive
// and start or end can be omitted, which means the range is open-ended.
ruleVersionStart, ruleVersionEnd := splitVersionRange(ruleVersion)
switch {
case ruleVersionStart != "v" && ruleVersionEnd != "v":
// Full version range
if semver.Compare(version, ruleVersionStart) >= 0 &&
semver.Compare(version, ruleVersionEnd) < 0 {
return true, nil
}
case ruleVersionStart == "v":
// Only end is specified
util.Assert(ruleVersionEnd != "v", "sanity check")
if semver.Compare(version, ruleVersionEnd) < 0 {
return true, nil
}
case ruleVersionEnd == "v":
// Only start is specified
util.Assert(ruleVersionStart != "v", "sanity check")
if semver.Compare(version, ruleVersionStart) >= 0 {
return true, nil
}
default:
return false, errc.New(errc.ErrMatchRule,
fmt.Sprintf("invalid rule version range %v", ruleVersion))
}
return false, nil
}
// match gives compilation arguments and finds out all interested rules
// for it.
func (rm *ruleMatcher) match(cmdArgs []string) *resource.RuleBundle {
importPath := findFlagValue(cmdArgs, util.BuildPattern)
util.Assert(importPath != "", "sanity check")
if config.GetConf().Verbose {
util.Log("RunMatch: %v (%v)", importPath, cmdArgs)
}
availables := make([]resource.InstRule, len(rm.availableRules[importPath]))
// Okay, we are interested in these candidates, let's read it and match with
// the instrumentation rule, but first we need to check if the package name
// are already registered, to avoid futile effort
copy(availables, rm.availableRules[importPath])
if len(availables) == 0 {
return nil // fast fail
}
parsedAst := make(map[string]*dst.File)
bundle := resource.NewRuleBundle(importPath)
goVersion := findFlagValue(cmdArgs, util.BuildGoVer)
util.Assert(goVersion != "", "sanity check")
util.Assert(strings.HasPrefix(goVersion, "go"), "sanity check")
goVersion = strings.Replace(goVersion, "go", "v", 1)
for _, candidate := range cmdArgs {
// It's not a go file, ignore silently
if !util.IsGoFile(candidate) {
continue
}
file := candidate
// If it's a vendor build, we need to extract the version of the module
// from vendor/modules.txt, otherwise we find the version from source
// code file path
version := extractVersion(file)
if rm.moduleVersions != nil {
recorded := findVendorModuleVersion(rm.moduleVersions, importPath)
if recorded != "" {
version = recorded
}
}
for i := len(availables) - 1; i >= 0; i-- {
rule := availables[i]
// Check if the version is supported
matched, err := matchVersion(version, rule.GetVersion())
if err != nil {
util.Log("Bad match: file %s, rule %s, version %s",
file, rule, version)
continue
}
if !matched {
continue
}
// Check if the rule requires a specific Go version(range)
if rule.GetGoVersion() != "" {
matched, err = matchVersion(goVersion, rule.GetGoVersion())
if err != nil {
util.Log("Bad match: file %s, rule %s, go version %s",
file, rule, goVersion)
continue
}
if !matched {
continue
}
}
// Check if it matches with file rule early as we try to avoid
// parsing the file content, which is time consuming
if _, ok := rule.(*resource.InstFileRule); ok {
ast, err := util.ParseAstFromFileOnlyPackage(file)
if ast == nil || err != nil {
util.Log("Failed to parse %s: %v", file, err)
continue
}
util.Log("Match file rule %s", rule)
bundle.AddFileRule(rule.(*resource.InstFileRule))
bundle.SetPackageName(ast.Name.Name)
availables = append(availables[:i], availables[i+1:]...)
continue
}
// Fair enough, parse the file content
var tree *dst.File
if _, ok := parsedAst[file]; !ok {
fileAst, err := util.ParseAstFromFileFast(file)
if fileAst == nil || err != nil {
util.Log("failed to parse file %s: %v", file, err)
continue
}
parsedAst[file] = fileAst
util.Assert(fileAst.Name.Name != "", "empty package name")
bundle.SetPackageName(fileAst.Name.Name)
tree = fileAst
} else {
tree = parsedAst[file]
}
if tree == nil {
// Failed to parse the file, stop here and log only
// sicne it's a tolerant failure
util.Log("Failed to parse file %s", file)
continue
}
// Let's match with the rule precisely
valid := false
for _, decl := range tree.Decls {
if genDecl, ok := decl.(*dst.GenDecl); ok {
if rl, ok := rule.(*resource.InstStructRule); ok {
if util.MatchStructDecl(genDecl, rl.StructType) {
util.Log("Match struct rule %s with %v",
rule, cmdArgs)
err = bundle.AddFile2StructRule(file, rl)
if err != nil {
util.Log("Failed to add struct rule: %v", err)
continue
}
valid = true
break
}
}
} else if funcDecl, ok := decl.(*dst.FuncDecl); ok {
if rl, ok := rule.(*resource.InstFuncRule); ok {
if util.MatchFuncDecl(funcDecl, rl.Function, rl.ReceiverType) {
util.Log("Match func rule %s with %v", rule, cmdArgs)
err = bundle.AddFile2FuncRule(file, rl)
if err != nil {
util.Log("Failed to add func rule: %v", err)
continue
}
valid = true
break
}
}
}
}
if valid {
// Remove the rule from the available rules
availables = append(availables[:i], availables[i+1:]...)
}
}
}
return bundle
}
func findFlagValue(cmd []string, flag string) string {
for i, v := range cmd {
if v == flag {
return cmd[i+1]
}
}
return ""
}
// vendorModule represents a module in vendor/modules.txt file, it contains
// the module name, version and all submodules of the module, which looks like
//
// # golang.org/x/text v0.21.0
// ## explicit; go 1.18
// golang.org/x/text/secure/bidirule
// golang.org/x/text/transform
// golang.org/x/text/unicode/bidi
// golang.org/x/text/unicode/norm
// # golang.org/x/time v0.5.0
// ## explicit; go 1.18
// golang.org/x/time/rate
//
// The module name is the first line of the module, the version is the second
// part of the first line, and all submodules are listed in the following lines
// starting with the module name.
type vendorModule struct {
path string
version string
submodules []string
}
func findVendorModuleVersion(modules []*vendorModule, importPath string) string {
for _, module := range modules {
if module.path == importPath {
return module.version
}
for _, submodule := range module.submodules {
if submodule == importPath {
return module.version
}
}
}
return ""
}
func cutPrefix(s, prefix string) (after string, found bool) {
// Compatible with go1.18 as we use this version internally
if !strings.HasPrefix(s, prefix) {
return s, false
}
return s[len(prefix):], true
}
func parseVendorModules(projDir string) ([]*vendorModule, error) {
vendorFile := filepath.Join(projDir, "vendor", "modules.txt")
if util.PathNotExists(vendorFile) {
return nil, errc.New(errc.ErrNotExist, "vendor/modules.txt not found")
}
file, err := os.Open(vendorFile)
if err != nil {
return nil, errc.New(errc.ErrOpenFile, err.Error())
}
defer func(dryRunLog *os.File) {
err := dryRunLog.Close()
if err != nil {
util.Log("Failed to close dry run log file: %v", err)
}
}(file)
scanner := bufio.NewScanner(file)
// 10MB should be enough to accommodate most long line
buffer := make([]byte, 0, 10*1024*1024)
scanner.Buffer(buffer, cap(buffer))
vms := make([]*vendorModule, 0)
var mod *vendorModule
vendorVersion := make(map[string]string)
// From src/cmd/go/internal/modload/vendor.go
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "# ") {
f := strings.Fields(line)
if len(f) < 3 {
continue
}
if semver.IsValid(f[2]) {
// A module, but we don't yet know whether it is in the build list or
// only included to indicate a replacement.
mod = &vendorModule{path: f[1], version: f[2]}
f = f[3:]
} else if f[2] == "=>" {
// A wildcard replacement found in the main module's go.mod file.
mod = &vendorModule{path: f[1]}
f = f[2:]
} else {
// Not a version or a wildcard replacement.
// We don't know how to interpret this module line, so ignore it.
mod = &vendorModule{}
continue
}
if len(f) >= 2 && f[0] == "=>" {
// Skip replacement lines
}
continue
}
// Not a module line. Must be a package within a module or a metadata
// directive, either of which requires a preceding module line.
if mod.path == "" {
continue
}
if _, ok := cutPrefix(line, "## "); ok {
// Skip annotations lines
continue
}
if f := strings.Fields(line); len(f) == 1 && module.CheckImportPath(f[0]) == nil {
// A package within the current module.
mod.submodules = append(mod.submodules, f[0])
// Since this module provides a package for the build, we know that it
// is in the build list and is the selected version of its path.
// If this information is new, record it.
if v, ok := vendorVersion[mod.path]; !ok || semver.Compare(v, mod.version) < 0 {
vms = append(vms, mod)
vendorVersion[mod.path] = mod.version
}
}
}
err = scanner.Err()
if err != nil {
return nil, errc.New(errc.ErrParseCode,
"cannot parse vendor/modules.txt")
}
return vms, nil
}
func runMatch(matcher *ruleMatcher, cmd string, ch chan *resource.RuleBundle) {
bundle := matcher.match(util.SplitCmds(cmd))
ch <- bundle
}
func (dp *DepProcessor) matchRules() error {
defer util.PhaseTimer("Match")()
// Run a dry build to get all dependencies needed for the project
// Match the dependencies with available rules and prepare them
// for the actual instrumentation
// Run dry build to the build blueprint
compileCmds, err := runDryBuild(dp.goBuildCmd)
if err != nil {
// Tell us more about what happened in the dry run
errLog, _ := util.ReadFile(util.GetLogPath(DryRunLog))
err = errc.Adhere(err, "reason", errLog)
return err
}
matcher := newRuleMatcher()
// If we are in vendor mode, we need to parse the vendor/modules.txt file
// to get the version of each module for future matching
if dp.vendorMode {
modules, err := parseVendorModules(dp.getGoModDir())
if err != nil {
return err
}
if config.GetConf().Verbose {
util.Log("Vendor modules: %v", modules)
}
matcher.moduleVersions = modules
}
// Find used instrumentation rule according to compile commands
ch := make(chan *resource.RuleBundle)
for _, cmd := range compileCmds {
go runMatch(matcher, cmd, ch)
}
cnt := 0
for cnt < len(compileCmds) {
bundle := <-ch
if bundle.IsValid() {
dp.bundles = append(dp.bundles, bundle)
}
cnt++
}
return nil
}