toutoumomoma.go (234 lines of code) (raw):

// Copyright ©2022 Elastic N.V. All rights reserved. // Copyright ©2021 Dan Kortschak. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package toutoumomoma import ( "bytes" "debug/gosym" "errors" "io" "math" "os" "strings" ) var ( // ErrUnknownFormat is returned for files that are not recognized. ErrUnknownFormat = errors.New("unknown format") // ErrNotGoExecutable indicates a file was not a Go executable. ErrNotGoExecutable = errors.New("not a Go executable") ) // File holds an executable object file. type File struct { file } type file interface { isGoExecutable() (ok bool, err error) hasBuildID() (ok bool, err error) hasRealFiles() (ok bool, err error) importedSymbols() ([]string, error) goSymbols(stdlib bool) ([]string, error) sectionStats() ([]Section, error) io.Closer } // Open opens the file at at the provided path. // // If the file at path is not an ELF, Mach-O or PE format // executable, Open will return ErrUnknownFormat. Files without // execute permissions may be opened. func Open(path string) (*File, error) { f, err := os.Open(path) if err != nil { return nil, err } file, err := NewFile(f) if err != nil { f.Close() return nil, err } return file, nil } // NewFile creates a new File for accessing a binary object in an underlying // reader. The binary is expected to start at position 0 in the ReaderAt. // // If the in the reader is not an ELF, Mach-O or PE format // executable, NewFile will return ErrUnknownFormat. func NewFile(r io.ReaderAt) (*File, error) { var magic [4]byte _, err := r.ReadAt(magic[:], 0) if err != nil { if err == io.EOF { err = ErrUnknownFormat } return nil, err } switch { case bytes.Equal(magic[:], []byte("\x7FELF")): exe, err := openELF(r) if err != nil { return nil, err } return &File{exe}, nil case bytes.Equal(magic[:3], []byte("\xfe\xed\xfa")), bytes.Equal(magic[1:], []byte("\xfa\xed\xfe")): exe, err := openMachO(r) if err != nil { return nil, err } return &File{exe}, nil case bytes.Equal(magic[:2], []byte("MZ")): exe, err := openPE(r) if err != nil { return nil, err } return &File{exe}, nil default: return nil, ErrUnknownFormat } } // Type returns the type of the executable object file. It will be one of, // "elf", "mach-o" or "pe". func (f *File) Type() string { switch f.file.(type) { case *elfFile: return "elf" case *machoFile: return "mach-o" case *peFile: return "pe" default: panic("unreachable") } } // Close closes the file. Close must be called if the File was created using // Open. If NewFile was used to create the File, Close will close the underlying // io.ReaderAt if it implements the io.Closer interface. func (f *File) Close() error { return f.file.Close() } // Stripped examines the file and returns whether it is likely to be a Go // executable that has had its symbols stripped. func (f *File) Stripped() (sneaky bool, err error) { isGo, err := f.isGoExecutable() if err != nil { return false, err } if !isGo { return false, nil } hasBuildID, err := f.hasBuildID() if err != nil { return false, err } if !hasBuildID { return true, nil } hasRealFiles, err := f.hasRealFiles() if err != nil { return false, err } return !hasRealFiles, nil } // ImportHash returns the import hash of an executable and the list of dynamic imports // in the executable examined to generate the hash. For Windows PE format, the hash // is calculated according to the algorithm described in the FireEye blog post // https://www.fireeye.com/blog/threat-research/2014/01/tracking-malware-import-hashing.html. // For Linux, a similar construction is used with each imported symbol represented // as library.symbol without trimming the extension from the library part, while // Darwin imports are the list of symbols without a library prefix and is equivalent // to the Anomali SymHash https://www.anomali.com/blog/symhash. // // The algorithm obtains the list of imported function names and converts them to all // lowercase. Any file extension is removed and then the MD5 hash of the ordered list of // symbols, separated by commas, is calculated. // // Darwin: // // ___error // __exit // _clock_gettime // // Linux: // // libc.so.6.free // .agwrite // libc.so.6.puts // // Windows: // // kernel32.writefile // kernel32.writeconsolew // kernel32.waitformultipleobjects func (f *File) ImportHash() (hash []byte, imports []string, err error) { return f.importHash() } // Imports returns the list of dynamic imports in the executable examined. func (f *File) Imports() (imports []string, err error) { return f.importedSymbols() } // GoSymbolHash returns the symbol hash of a Go executable and the list of symbols // in the executable examined to generate the hash. If stdlib is true, symbols // from the Go standard library are included, otherwise only third-party symbols // are considered. // // The algorithm is analogous to the algorithm described for ImportHash with the exception // that Go's static symbols are used in place of the dynamic import symbols used by the // ImportHash. The list of symbols referenced by the executable is obtained and the MD5 hash // of the ordered list of symbols, separated by commas, is calculated. The order of the // symbols is as exists in the executable and returned by the standard library debug packages // The fully qualified import path of each symbol is included and while symbols used by // ImportHash are canonicalised to lowercase, GoSymbolHash retains the case of the original // symbol. // // If the file is an executable, but not a gc-compiled Go executable, ErrNotGoExecutable // will be returned. func (f *File) GoSymbolHash(stdlib bool) (hash []byte, imports []string, err error) { return f.goSymbolHash(stdlib) } func (f *File) GoSymbols(stdlib bool) (imports []string, err error) { return f.goSymbols(stdlib) } // Sections returns the names and sizes of object file sections in the order // that they appear in file. func (f *File) Sections() ([]Section, error) { return f.sectionStats() } // Section holds basic executable section information. type Section struct { Name string // Name is the platform-specific name of the section. Size uint64 // Size of the uncompressed data of the section. FileSize uint64 // Size of the section data in the file. Entropy float64 // Entropy is the Shannon entropy of the section data in bits. VarEntropy float64 // VarEntropy is an estimate of the variance of the section entropy. Flags uint32 // Flags holds platform-specific section flags. } // Stripped is a convenience wrapper around File.Stripped. func Stripped(path string) (sneaky bool, err error) { f, err := Open(path) if err != nil { return false, err } defer f.Close() return f.Stripped() } // ImportHash is a convenience wrapper around File.ImportHash. func ImportHash(path string) (hash []byte, imports []string, err error) { f, err := Open(path) if err != nil { return nil, nil, err } defer f.Close() return f.ImportHash() } // GoSymbolHash is a convenience wrapper around File.GoSymbolHash. func GoSymbolHash(path string, stdlib bool) (hash []byte, imports []string, err error) { f, err := Open(path) if err != nil { return nil, nil, err } defer f.Close() return f.GoSymbolHash(stdlib) } // Sections is a convenience wrapper around File.Sections. func Sections(path string) ([]Section, error) { f, err := Open(path) if err != nil { return nil, err } defer f.Close() return f.Sections() } func isStdlib(s string, addr uint64, tab *gosym.Table) bool { if tab != nil { file, _, _ := tab.PCToLine(addr) if file == "??" { return false } } slash := strings.IndexByte(s, '/') if slash < 0 { return true } dot := strings.IndexByte(s[:slash], '.') return dot < 0 } // NameEntropy returns the entropy and entropy variance for the given import // symbols names as a set. func NameEntropy(symbols []string) (entropy, variance float64) { // Tally classes. var ( counts [256]float64 n int ) for _, data := range symbols { n += len(data) for _, b := range []byte(data) { counts[b]++ } } return entropyVariance(&counts, n) } // streamEntropy returns the entropy and entropy variance for bytes in the // provided io.Reader. func streamEntropy(r io.Reader) (entropy, variance float64, err error) { // Tally classes. var ( counts [256]float64 n int buf [4096]byte ) for { _n, err := r.Read(buf[:]) n += _n for _, b := range buf[:_n] { counts[b]++ } if err != nil { if err != io.EOF { return 0, 0, err } break } } entropy, variance = entropyVariance(&counts, n) return entropy, variance, nil } // entropyVariance returns the entropy and entropy variance for counts in // counts for a sequence that is n long. See https://arxiv.org/pdf/1807.02603.pdf // for details of the variance calculation. func entropyVariance(counts *[256]float64, n int) (entropy, variance float64) { if n == 0 { return 0, 0 } // H = -∑i=1..k((p_i)*log(p_i)) // F² = ∑i=1..k((p_i)*log²(p_i)) - H² // // Variance in H is F²/N // // Calculated using the weighted incremental algorithm for // mean and variance estimates. // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm var pSum, pSum2 float64 for _, cnt := range counts { if cnt == 0 { // Ignore zero counts. continue } p := cnt / float64(n) l2p := math.Log2(p) pSum += p pSum2 += p * p tmp := entropy entropy = tmp + (p/pSum)*(l2p-tmp) variance += p * (l2p - tmp) * (l2p - entropy) } variance /= float64(n) if entropy == 0 { // Don't negate zero. return 0, variance } return -entropy, variance }