tool/preprocess/preprocess.go (565 lines of code) (raw):
// Copyright (c) 2024 Alibaba Group Holding Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package preprocess
import (
"bufio"
_ "embed"
"fmt"
"os"
"os/exec"
"os/signal"
"path/filepath"
"runtime"
"strings"
"syscall"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/config"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/errc"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/resource"
"github.com/alibaba/opentelemetry-go-auto-instrumentation/tool/util"
"golang.org/x/mod/modfile"
"golang.org/x/tools/go/packages"
)
// -----------------------------------------------------------------------------
// Preprocess
//
// The preprocess package is used to preprocess the source code before the actual
// instrumentation. Instrumentation rules may introduces additional dependencies
// that are not present in original source code. The preprocess is responsible
// for preparing these dependencies in advance.
const (
OtelPkgDir = "otel_pkg"
OtelImporter = "otel_importer.go"
OtelUser = "otel_user"
OtelRuleCache = "rule_cache"
OtelBackups = "backups"
OtelBackupSuffix = ".bk"
DryRunLog = "dry_run.log"
CompileRemix = "remix"
VendorDir = "vendor"
)
type DepProcessor struct {
bundles []*resource.RuleBundle // All dependent rule bundles
backups map[string]string
moduleName string // Module name from go.mod
modulePath string // Where go.mod is located
goBuildCmd []string
vendorMode bool
pkgLocalCache string // Local module cache path of alibaba-otel pkg module
}
func newDepProcessor() *DepProcessor {
dp := &DepProcessor{
bundles: []*resource.RuleBundle{},
backups: map[string]string{},
vendorMode: false,
pkgLocalCache: "",
}
return dp
}
func (dp *DepProcessor) getGoModPath() string {
util.Assert(dp.modulePath != "", "modulePath is empty")
return dp.modulePath
}
func (dp *DepProcessor) getGoModDir() string {
return filepath.Dir(dp.getGoModPath())
}
func (dp *DepProcessor) generatedOf(dir string) string {
return filepath.Join(dp.getGoModDir(), dir)
}
// Run runs the command and returns the combined standard output and standard
// error. dir specifies the working directory of the command. If dir is the
// empty string, run runs the command in the calling process's current directory.
func runCmdCombinedOutput(dir string, args ...string) (string, error) {
path := args[0]
args = args[1:]
cmd := exec.Command(path, args...)
cmd.Dir = dir
out, err := cmd.CombinedOutput()
if err != nil {
return "", errc.New(errc.ErrRunCmd, string(out)).
With("command", fmt.Sprintf("%v", args))
}
return string(out), nil
}
// Find go.mod from dir and its parent recursively
func findGoMod(dir string) (string, error) {
for dir != "" {
mod := filepath.Join(dir, util.GoModFile)
if util.PathExists(mod) {
return mod, nil
}
par := filepath.Dir(dir)
if par == dir {
break
}
dir = par
}
return "", errc.New(errc.ErrPreprocess, "cannot find go.mod")
}
func parseGoMod(gomod string) (*modfile.File, error) {
data, err := util.ReadFile(gomod)
if err != nil {
return nil, err
}
modFile, err := modfile.Parse(util.GoModFile, []byte(data), nil)
if err != nil {
return nil, errc.New(errc.ErrParseCode, err.Error())
}
return modFile, nil
}
func (dp *DepProcessor) initCmd() {
// There is a tricky, all arguments after the tool itself are saved for
// later use, which means the subcommand "go build" are also included
dp.goBuildCmd = make([]string, len(os.Args)-1)
copy(dp.goBuildCmd, os.Args[1:])
util.AssertGoBuild(dp.goBuildCmd)
util.Log("Go build command: %v", dp.goBuildCmd)
}
func (dp *DepProcessor) initMod() (err error) {
// Find compiling module and package information from the build command
pkgs, err := findModule(dp.goBuildCmd)
if err != nil {
return err
}
util.Log("Find Go packages %v", util.Jsonify(pkgs))
for _, pkg := range pkgs {
util.Log("Find Go package %v", util.Jsonify(pkg))
if pkg.GoFiles == nil {
continue
}
if pkg.Module != nil {
// Best case, we find the module information from the package field
util.Log("Find Go module %v", util.Jsonify(pkg.Module))
util.Assert(pkg.Module.Path != "", "pkg.Module.Path is empty")
util.Assert(pkg.Module.GoMod != "", "pkg.Module.GoMod is empty")
dp.moduleName = pkg.Module.Path
dp.modulePath = pkg.Module.GoMod
} else {
// If we cannot find the module information from the package field,
// we try to find it from the go.mod file, where go.mod file is in
// the same directory as the source file.
util.Assert(pkg.Name != "", "pkg.Name is empty")
if pkg.Name == "main" {
gofile := pkg.GoFiles[0]
gomod, err := findGoMod(filepath.Dir(gofile))
if err != nil {
return err
}
util.Assert(gomod != "", "gomod is empty")
util.Assert(util.PathExists(gomod), "gomod does not exist")
dp.modulePath = gomod
// Get module name from go.mod file
modfile, err := parseGoMod(gomod)
if err != nil {
return err
}
dp.moduleName = modfile.Module.Mod.Path
// We generate additional source file(otel_importer.go) in the
// same directory as the go.mod file, we should append this file
// into build commands to make sure it is compiled together with
// the original source files.
found := false
for _, cmd := range dp.goBuildCmd {
if strings.Contains(cmd, OtelImporter) {
found = true
break
}
}
if !found {
dp.goBuildCmd = append(dp.goBuildCmd, OtelImporter)
}
}
}
}
if dp.moduleName == "" || dp.modulePath == "" {
return errc.New(errc.ErrPreprocess, "cannot find compiled module")
}
util.Log("Found module %v in %v", dp.moduleName, dp.modulePath)
// We will import alibaba-otel/pkg module in generated code, which is not
// published yet, so we also need to add a replace directive to the go.mod file
// to tell the go tool to use the local module cache instead of the remote
// module, that's why we do this here.
// TODO: Once we publish the alibaba-otel/pkg module, we can remove this code
// along with the replace directive in the go.mod file.
pkgUrl := "github.com/alibaba/opentelemetry-go-auto-instrumentation/pkg@f55e1e8"
dp.pkgLocalCache, err = dp.findModCacheDir(pkgUrl)
if err != nil {
return err
}
if dp.pkgLocalCache == "" {
return errc.New(errc.ErrPreprocess, "cannot find rule cache dir")
}
util.Log("Local module cache: %s", dp.pkgLocalCache)
return nil
}
func (dp *DepProcessor) initBuildMode() {
// Check if the build mode
ignoreVendor := false
for _, arg := range dp.goBuildCmd {
// -mod=mod and -mod=readonly tells the go command to ignore the vendor
// directory. We should not use the vendor directory in this case.
if strings.HasPrefix(arg, "-mod=mod") ||
strings.HasPrefix(arg, "-mod=readonly") {
dp.vendorMode = false
ignoreVendor = true
break
}
}
if !ignoreVendor {
// FIXME: vendor directory name can be anything, but we assume it's "vendor"
// for now
vendor := filepath.Join(dp.getGoModDir(), VendorDir)
dp.vendorMode = util.PathExists(vendor)
}
// If we are building with vendored dependencies, we should not pull any
// additional dependencies online, which means all dependencies should be
// available in the vendor directory. This requires users to add these
// dependencies proactively
util.Log("Vendor mode: %v", dp.vendorMode)
}
func (dp *DepProcessor) initSignalHandler() {
// Register signal handler to catch up SIGINT/SIGTERM interrupt signals and
// do necessary cleanup
sigc := make(chan os.Signal, 1)
signal.Notify(sigc, syscall.SIGINT, syscall.SIGTERM)
go func() {
s := <-sigc
switch s {
case syscall.SIGTERM, syscall.SIGINT:
util.Log("Interrupted instrumentation, cleaning up")
default:
}
}()
}
func (dp *DepProcessor) init() error {
dp.initCmd()
err := dp.initMod()
if err != nil {
return err
}
dp.initBuildMode()
dp.initSignalHandler()
return nil
}
func (dp *DepProcessor) postProcess() {
util.GuaranteeInPreprocess()
// Using -debug? Leave all changes for debugging
if config.GetConf().Debug {
return
}
_ = os.RemoveAll(dp.generatedOf(OtelImporter))
_ = os.RemoveAll(dp.generatedOf(OtelPkgDir))
// Restore everything we have modified during instrumentation
_ = dp.restoreBackupFiles()
}
func (dp *DepProcessor) backupFile(origin string) error {
util.GuaranteeInPreprocess()
backup := filepath.Base(origin) + OtelBackupSuffix
backup = util.GetLogPath(filepath.Join(OtelBackups, backup))
err := os.MkdirAll(filepath.Dir(backup), 0777)
if err != nil {
return errc.New(errc.ErrMkdirAll, err.Error())
}
if _, exist := dp.backups[origin]; !exist {
err = util.CopyFile(origin, backup)
if err != nil {
return err
}
dp.backups[origin] = backup
util.Log("Backup %v", origin)
} else if config.GetConf().Verbose {
util.Log("Backup %v already exists", origin)
}
return nil
}
func (dp *DepProcessor) restoreBackupFiles() error {
util.GuaranteeInPreprocess()
for origin, backup := range dp.backups {
err := util.CopyFile(backup, origin)
if err != nil {
return err
}
util.Log("Restore %v", origin)
}
return nil
}
func getCompileCommands() ([]string, error) {
dryRunLog, err := os.Open(util.GetLogPath(DryRunLog))
if err != nil {
return nil, errc.New(errc.ErrOpenFile, err.Error())
}
defer func(dryRunLog *os.File) {
err := dryRunLog.Close()
if err != nil {
util.Log("Failed to close dry run log file: %v", err)
}
}(dryRunLog)
// Filter compile commands from dry run log
compileCmds := make([]string, 0)
scanner := bufio.NewScanner(dryRunLog)
// 10MB should be enough to accommodate most long line
buffer := make([]byte, 0, 10*1024*1024)
scanner.Buffer(buffer, cap(buffer))
for scanner.Scan() {
line := scanner.Text()
if util.IsCompileCommand(line) {
line = strings.Trim(line, " ")
compileCmds = append(compileCmds, line)
}
}
err = scanner.Err()
if err != nil {
return nil, errc.New(errc.ErrParseCode, "cannot parse dry run log")
}
return compileCmds, nil
}
// $ go help packages
// Many commands apply to a set of packages:
//
// go <action> [packages]
//
// Usually, [packages] is a list of import paths.
//
// An import path that is a rooted path or that begins with
// a . or .. element is interpreted as a file system path and
// denotes the package in that directory.
//
// Otherwise, the import path P denotes the package found in
// the directory DIR/src/P for some DIR listed in the GOPATH
// environment variable (For more details see: 'go help gopath').
//
// If no import paths are given, the action applies to the
// package in the current directory.
//
// There are four reserved names for paths that should not be used
// for packages to be built with the go tool:
//
// - "main" denotes the top-level package in a stand-alone executable.
//
// - "all" expands to all packages found in all the GOPATH
// trees. For example, 'go list all' lists all the packages on the local
// system. When using modules, "all" expands to all packages in
// the main module and their dependencies, including dependencies
// needed by tests of any of those.
//
// - "std" is like all but expands to just the packages in the standard
// Go library.
//
// - "cmd" expands to the Go repository's commands and their
// internal libraries.
//
// Import paths beginning with "cmd/" only match source code in
// the Go repository.
//
// An import path is a pattern if it includes one or more "..." wildcards,
// each of which can match any string, including the empty string and
// strings containing slashes. Such a pattern expands to all package
// directories found in the GOPATH trees with names matching the
// patterns.
//
// To make common patterns more convenient, there are two special cases.
// First, /... at the end of the pattern can match an empty string,
// so that net/... matches both net and packages in its subdirectories, like net/http.
// Second, any slash-separated pattern element containing a wildcard never
// participates in a match of the "vendor" element in the path of a vendored
// package, so that ./... does not match packages in subdirectories of
// ./vendor or ./mycode/vendor, but ./vendor/... and ./mycode/vendor/... do.
// Note, however, that a directory named vendor that itself contains code
// is not a vendored package: cmd/vendor would be a command named vendor,
// and the pattern cmd/... matches it.
// See golang.org/s/go15vendor for more about vendoring.
//
// An import path can also name a package to be downloaded from
// a remote repository. Run 'go help importpath' for details.
//
// Every package in a program must have a unique import path.
// By convention, this is arranged by starting each path with a
// unique prefix that belongs to you. For example, paths used
// internally at Google all begin with 'google', and paths
// denoting remote repositories begin with the path to the code,
// such as 'github.com/user/repo'.
//
// Packages in a program need not have unique package names,
// but there are two reserved package names with special meaning.
// The name main indicates a command, not a library.
// Commands are built into binaries and cannot be imported.
// The name documentation indicates documentation for
// a non-Go program in the directory. Files in package documentation
// are ignored by the go command.
//
// As a special case, if the package list is a list of .go files from a
// single directory, the command is applied to a single synthesized
// package made up of exactly those files, ignoring any build constraints
// in those files and ignoring any other files in the directory.
//
// Directory and file names that begin with "." or "_" are ignored
// by the go tool, as are directories named "testdata".
func tryLoadPackage(path string) ([]*packages.Package, error) {
cfg := &packages.Config{
// Change it unless you know what you are doing
Mode: packages.NeedModule | packages.NeedFiles | packages.NeedName,
}
pkgs, err := packages.Load(cfg, path)
if err != nil {
return nil, errc.New(errc.ErrPreprocess, err.Error())
}
return pkgs, nil
}
func findModule(buildCmd []string) ([]*packages.Package, error) {
candidates := make([]*packages.Package, 0)
found := false
// Find from build arguments e.g. go build test.go or go build cmd/app
for i := len(buildCmd) - 1; i >= 0; i-- {
buildArg := buildCmd[i]
// Stop canary when we see a build flag or a "build" command
if strings.HasPrefix("-", buildArg) || buildArg == "build" {
break
}
// Special case. If the file named with test_ prefix, we create a fake
// package for it. This is a workaround for the case that the test file
// is compiled with other normal files.
if strings.HasSuffix(buildArg, ".go") &&
strings.HasPrefix(buildArg, "test_") {
artificialPkg := &packages.Package{
GoFiles: []string{buildArg},
Name: "main",
}
candidates = append(candidates, artificialPkg)
found = true
continue
}
// Trying to load package from the build argument, error is tolerated
// because we dont know what the build argument is. One exception is
// when we already found packages, in this case, we expect subsequent
// build arguments are packages, so we should not tolerate any error.
pkgs, err := tryLoadPackage(buildArg)
if err != nil {
if found {
// If packages are already found, we expect subsequent build
// arguments are packages, so we should not tolerate any error
break
}
util.Log("Cannot load package from %v", buildArg)
continue
}
for _, pkg := range pkgs {
if pkg.Errors != nil {
continue
}
found = true
candidates = append(candidates, pkg)
}
}
// If no import paths are given, the action applies to the package in the
// current directory.
if !found {
pkgs, err := tryLoadPackage(".")
if err != nil {
return nil, err
}
for _, pkg := range pkgs {
if pkg.Errors != nil {
continue
}
candidates = append(candidates, pkg)
}
}
if len(candidates) == 0 {
return nil, errc.New(errc.ErrPreprocess, "no package found")
}
return candidates, nil
}
func (dp *DepProcessor) storeRuleBundles() error {
err := resource.StoreRuleBundles(dp.bundles)
if err != nil {
return err
}
// No longer valid from now on
dp.bundles = nil
return nil
}
// runDryBuild runs a dry build to get all dependencies needed for the project.
func runDryBuild(goBuildCmd []string) ([]string, error) {
dryRunLog, err := os.Create(util.GetLogPath(DryRunLog))
if err != nil {
return nil, errc.New(errc.ErrCreateFile, err.Error())
}
// The full build command is: "go build -a -x -n {...}"
args := []string{"go", "build", "-a", "-x", "-n"}
args = append(args, goBuildCmd[2:]...)
util.AssertGoBuild(goBuildCmd)
util.AssertGoBuild(args)
// Run the dry build
util.Log("Run dry build %v", args)
cmd := exec.Command(args[0], args[1:]...)
// This is a little anti-intuitive as the error message is not printed to
// the stderr, instead it is printed to the stdout, only the build tool
// knows the reason why.
cmd.Stdout = os.Stdout
cmd.Stderr = dryRunLog
// @@Note that dir should not be set, as the dry build should be run in the
// same directory as the original build command
cmd.Dir = ""
err = cmd.Run()
if err != nil {
return nil, errc.New(errc.ErrRunCmd, err.Error()).
With("command", fmt.Sprintf("%v", args))
}
// Find compile commands from dry run log
compileCmds, err := getCompileCommands()
if err != nil {
return nil, err
}
return compileCmds, nil
}
func (dp *DepProcessor) runModTidy() error {
out, err := runCmdCombinedOutput(dp.getGoModDir(),
"go", "mod", "tidy")
util.Log("Run go mod tidy: %v", out)
return err
}
func nullDevice() string {
if runtime.GOOS == "windows" {
return "NUL"
}
return "/dev/null"
}
func runBuildWithToolexec(goBuildCmd []string) error {
exe, err := os.Executable()
if err != nil {
return errc.New(errc.ErrGetExecutable, err.Error())
}
args := []string{
"go",
"build",
// Add remix subcommand to tell the tool this is toolexec mode
"-toolexec=" + exe + " " + CompileRemix,
}
// Leave the temporary compilation directory
args = append(args, util.BuildWork)
// Force rebuilding
args = append(args, "-a")
if config.GetConf().Debug {
// Disable compiler optimizations for debugging mode
args = append(args, "-gcflags=all=-N -l")
}
// Append additional build arguments provided by the user
args = append(args, goBuildCmd[2:]...)
if config.GetConf().Restore {
// Dont generate any compiled binary when using -restore
args = append(args, "-o")
args = append(args, nullDevice())
}
if config.GetConf().Verbose {
util.Log("Run go build with args %v in toolexec mode", args)
}
util.AssertGoBuild(args)
// @@ Note that we should not set the working directory here, as the build
// with toolexec should be run in the same directory as the original build
// command
out, err := runCmdCombinedOutput("", args...)
util.Log("Run go build with toolexec: %v", out)
return err
}
func precheck() error {
// Check if the project is modularized
go11module := os.Getenv("GO111MODULE")
if go11module == "off" {
return errc.New(errc.ErrNotModularized, "GO111MODULE is off")
}
// Check if the build arguments is sane
if len(os.Args) < 3 {
config.PrintVersion()
os.Exit(0)
}
if !strings.Contains(os.Args[1], "go") {
config.PrintVersion()
os.Exit(0)
}
if os.Args[2] != "build" {
// exec original go command
err := util.RunCmd(os.Args[1:]...)
if err != nil {
os.Exit(1)
}
os.Exit(0)
}
return nil
}
func (dp *DepProcessor) rectifyMod() error {
// Backup go.mod and go.sum files
gomodDir := dp.getGoModDir()
files := []string{}
files = append(files, filepath.Join(gomodDir, util.GoModFile))
files = append(files, filepath.Join(gomodDir, util.GoSumFile))
files = append(files, filepath.Join(gomodDir, util.GoWorkSumFile))
for _, file := range files {
if util.PathExists(file) {
err := dp.backupFile(file)
if err != nil {
return err
}
}
}
// Since we haven't published the alibaba-otel pkg module, we need to add
// a replace directive to tell the go tool to use the local module cache
// instead of the remote module. This is a workaround for the case that
// the remote module is not available(published).
gomod := dp.getGoModPath()
modfile, err := parseGoMod(gomod)
if err != nil {
return err
}
hasReplace := false
for _, r := range modfile.Replace {
if r.Old.Path == pkgPrefix {
hasReplace = true
break
}
}
if !hasReplace {
err = modfile.AddReplace(pkgPrefix, "", dp.pkgLocalCache, "")
if err != nil {
return err
}
bs, err := modfile.Format()
if err != nil {
return err
}
_, err = util.WriteFile(gomod, string(bs))
if err != nil {
return err
}
}
return nil
}
func (dp *DepProcessor) saveDebugFiles() {
dir := filepath.Join(util.GetTempBuildDir(), OtelPkgDir)
err := os.MkdirAll(dir, os.ModePerm)
if err == nil {
util.CopyDir(dp.generatedOf(OtelPkgDir), dir)
}
dir = filepath.Join(util.GetTempBuildDir(), OtelUser)
err = os.MkdirAll(dir, os.ModePerm)
if err == nil {
for origin := range dp.backups {
util.CopyFile(origin, filepath.Join(dir, filepath.Base(origin)))
}
}
}
//go:embed template.go
var importerTemplate string
func (dp *DepProcessor) newRuleImporter() {
importerTemplate = strings.ReplaceAll(importerTemplate, util.GoBuildIgnoreComment, "")
util.WriteFile(dp.generatedOf(OtelImporter), importerTemplate)
}
func (dp *DepProcessor) addRuleImporter() error {
paths := map[string]bool{}
for _, bundle := range dp.bundles {
for _, funcRules := range bundle.File2FuncRules {
for _, rules := range funcRules {
for _, rule := range rules {
if rule.GetPath() != "" {
paths[rule.GetPath()] = true
}
}
}
}
}
content, err := util.ReadFile(dp.generatedOf(OtelImporter))
if err != nil {
return err
}
for path := range paths {
content += fmt.Sprintf("import _ %q\n", path)
}
cnt := 0
for _, bundle := range dp.bundles {
lb := fmt.Sprintf("//go:linkname getstatck%d %s.OtelGetStackImpl\n", cnt, bundle.ImportPath)
content += lb
s := fmt.Sprintf("var getstatck%d = debug.Stack\n", cnt)
content += s
lb = fmt.Sprintf("//go:linkname printstack%d %s.OtelPrintStackImpl\n", cnt, bundle.ImportPath)
content += lb
s = fmt.Sprintf("var printstack%d = func (bt []byte){ log.Printf(string(bt)) }\n", cnt)
content += s
cnt++
}
util.WriteFile(dp.generatedOf(OtelImporter), content)
return nil
}
func Preprocess() error {
// Make sure the project is modularized otherwise we cannot proceed
err := precheck()
if err != nil {
return err
}
dp := newDepProcessor()
err = dp.init()
if err != nil {
return err
}
defer func() { dp.postProcess() }()
{
defer util.PhaseTimer("Preprocess")()
// Backup go.mod and add additional repalce directives for the
// alibaba-otel pkg module
err = dp.rectifyMod()
if err != nil {
return err
}
// Add otel dependencies as part of the project dependencies
dp.newRuleImporter()
dp.runModTidy()
// Match rules based on the source files plus added otel imports
err = dp.matchRules()
if err != nil {
return err
}
// Add hook rule dependency as part of the project dependencies
dp.addRuleImporter()
// Update go.mod with the all additional dependencies
dp.runModTidy()
// Rectify file rules to make sure we can find them locally
err = dp.rectifyRule()
if err != nil {
return err
}
// From this point on, we no longer modify the rules
err = dp.storeRuleBundles()
if err != nil {
return err
}
// Retain otel rules and modified user files for debugging
dp.saveDebugFiles()
}
{
defer util.PhaseTimer("Instrument")()
// Run go build with toolexec to start instrumentation
err = runBuildWithToolexec(dp.goBuildCmd)
if err != nil {
return err
}
}
util.Log("Build completed successfully")
return nil
}