bolt/lib/Rewrite/BinaryPassManager.cpp (330 lines of code) (raw):

//===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "bolt/Rewrite/BinaryPassManager.h" #include "bolt/Passes/ADRRelaxationPass.h" #include "bolt/Passes/Aligner.h" #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/FrameOptimizer.h" #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" #include "bolt/Passes/Instrumentation.h" #include "bolt/Passes/JTFootprintReduction.h" #include "bolt/Passes/LongJmp.h" #include "bolt/Passes/LoopInversionPass.h" #include "bolt/Passes/PLTCall.h" #include "bolt/Passes/PatchEntries.h" #include "bolt/Passes/RegReAssign.h" #include "bolt/Passes/ReorderData.h" #include "bolt/Passes/ReorderFunctions.h" #include "bolt/Passes/RetpolineInsertion.h" #include "bolt/Passes/SplitFunctions.h" #include "bolt/Passes/StokeInfo.h" #include "bolt/Passes/TailDuplication.h" #include "bolt/Passes/ThreeWayBranch.h" #include "bolt/Passes/ValidateInternalCalls.h" #include "bolt/Passes/VeneerElimination.h" #include "bolt/Utils/CommandLineOpts.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include <memory> #include <numeric> using namespace llvm; namespace opts { extern cl::opt<bool> PrintAll; extern cl::opt<bool> PrintDynoStats; extern cl::opt<bool> DumpDotAll; extern cl::opt<std::string> AsmDump; extern cl::opt<bolt::PLTCall::OptType> PLT; static cl::opt<bool> DynoStatsAll("dyno-stats-all", cl::desc("print dyno stats after each stage"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); static cl::opt<bool> EliminateUnreachable("eliminate-unreachable", cl::desc("eliminate unreachable code"), cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> JTFootprintReductionFlag("jt-footprint-reduction", cl::desc("make jump tables size smaller at the cost of using more " "instructions at jump sites"), cl::ZeroOrMore, cl::cat(BoltOptCategory)); cl::opt<bool> NeverPrint("never-print", cl::desc("never print"), cl::init(false), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); cl::opt<bool> PrintAfterBranchFixup("print-after-branch-fixup", cl::desc("print function after fixing local branches"), cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintAfterLowering("print-after-lowering", cl::desc("print function after instruction lowering"), cl::Hidden, cl::cat(BoltOptCategory)); cl::opt<bool> PrintFinalized("print-finalized", cl::desc("print function after CFG is finalized"), cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintFOP("print-fop", cl::desc("print functions after frame optimizer pass"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintICF("print-icf", cl::desc("print functions after ICF optimization"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintICP("print-icp", cl::desc("print functions after indirect call promotion"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintInline("print-inline", cl::desc("print functions after inlining optimization"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintJTFootprintReduction("print-after-jt-footprint-reduction", cl::desc("print function after jt-footprint-reduction pass"), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintLongJmp("print-longjmp", cl::desc("print functions after longjmp pass"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); cl::opt<bool> PrintNormalized("print-normalized", cl::desc("print functions after CFG is normalized"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory)); static cl::opt<bool> PrintOptimizeBodyless("print-optimize-bodyless", cl::desc("print functions after bodyless optimization"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintPeepholes("print-peepholes", cl::desc("print functions after peephole optimization"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintPLT("print-plt", cl::desc("print functions after PLT optimization"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintProfileStats("print-profile-stats", cl::desc("print profile quality/bias analysis"), cl::ZeroOrMore, cl::init(false), cl::cat(BoltCategory)); static cl::opt<bool> PrintRegReAssign("print-regreassign", cl::desc("print functions after regreassign pass"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); cl::opt<bool> PrintReordered("print-reordered", cl::desc("print functions after layout optimization"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintReorderedFunctions("print-reordered-functions", cl::desc("print functions after clustering"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintRetpolineInsertion("print-retpoline-insertion", cl::desc("print functions after retpoline insertion pass"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltCategory)); static cl::opt<bool> PrintSCTC("print-sctc", cl::desc("print functions after conditional tail call simplification"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintSimplifyROLoads("print-simplify-rodata-loads", cl::desc("print functions after simplification of RO data loads"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintSplit("print-split", cl::desc("print functions after code splitting"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintVeneerElimination("print-veneer-elimination", cl::desc("print functions after veneer elimination pass"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> PrintUCE("print-uce", cl::desc("print functions after unreachable code elimination"), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); static cl::opt<bool> RegReAssign("reg-reassign", cl::desc("reassign registers so as to avoid using REX prefixes in hot code"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> SimplifyConditionalTailCalls("simplify-conditional-tail-calls", cl::desc("simplify conditional tail calls by removing unnecessary jumps"), cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> SimplifyRODataLoads("simplify-rodata-loads", cl::desc("simplify loads from read-only sections by replacing the memory " "operand with the constant found in the corresponding section"), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::list<std::string> SpecializeMemcpy1("memcpy1-spec", cl::desc("list of functions with call sites for which to specialize memcpy() " "for size 1"), cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> StringOps("inline-memcpy", cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"), cl::init(false), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> StripRepRet("strip-rep-ret", cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"), cl::init(true), cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> VerifyCFG("verify-cfg", cl::desc("verify the CFG after every pass"), cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::cat(BoltOptCategory)); static cl::opt<bool> TailDuplicationFlag("tail-duplication", cl::desc("duplicate unconditional branches that cross a cache line"), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); static cl::opt<bool> ThreeWayBranchFlag("three-way-branch", cl::desc("reorder three way branches"), cl::ZeroOrMore, cl::ReallyHidden, cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { namespace bolt { using namespace opts; const char BinaryFunctionPassManager::TimerGroupName[] = "passman"; const char BinaryFunctionPassManager::TimerGroupDesc[] = "Binary Function Pass Manager"; void BinaryFunctionPassManager::runPasses() { auto &BFs = BC.getBinaryFunctions(); for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) { const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>> &OptPassPair = Passes[PassIdx]; if (!OptPassPair.first) continue; const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second; std::string PassIdName = formatv("{0:2}_{1}", PassIdx, Pass->getName()).str(); if (opts::Verbosity > 0) outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n"; NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName, TimerGroupDesc, TimeOpts); callWithDynoStats([this, &Pass] { Pass->runOnFunctions(BC); }, BFs, Pass->getName(), opts::DynoStatsAll); if (opts::VerifyCFG && !std::accumulate( BFs.begin(), BFs.end(), true, [](const bool Valid, const std::pair<const uint64_t, BinaryFunction> &It) { return Valid && It.second.validateCFG(); })) { errs() << "BOLT-ERROR: Invalid CFG detected after pass " << Pass->getName() << "\n"; exit(1); } if (opts::Verbosity > 0) outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n"; if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass()) continue; const std::string Message = std::string("after ") + Pass->getName(); for (auto &It : BFs) { BinaryFunction &Function = It.second; if (!Pass->shouldPrint(Function)) continue; Function.print(outs(), Message, true); if (opts::DumpDotAll) Function.dumpGraphForPass(PassIdName); } } } void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { BinaryFunctionPassManager Manager(BC); const DynoStats InitialDynoStats = getDynoStats(BC.getBinaryFunctions()); Manager.registerPass(std::make_unique<AsmDumpPass>(), opts::AsmDump.getNumOccurrences()); if (opts::Instrument) Manager.registerPass(std::make_unique<Instrumentation>(NeverPrint)); // Here we manage dependencies/order manually, since passes are run in the // order they're registered. // Run this pass first to use stats for the original functions. Manager.registerPass(std::make_unique<PrintProgramStats>(NeverPrint)); if (opts::PrintProfileStats) Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint)); Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint)); Manager.registerPass(std::make_unique<ShortenInstructions>(NeverPrint)); Manager.registerPass(std::make_unique<RemoveNops>(NeverPrint)); Manager.registerPass(std::make_unique<NormalizeCFG>(PrintNormalized)); Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint), opts::StripRepRet); Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), opts::ICF); if (BC.isAArch64()) Manager.registerPass( std::make_unique<VeneerElimination>(PrintVeneerElimination)); Manager.registerPass( std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1), !opts::SpecializeMemcpy1.empty()); Manager.registerPass(std::make_unique<InlineMemcpy>(NeverPrint), opts::StringOps); Manager.registerPass(std::make_unique<IndirectCallPromotion>(PrintICP)); Manager.registerPass( std::make_unique<JTFootprintReduction>(PrintJTFootprintReduction), opts::JTFootprintReductionFlag); Manager.registerPass( std::make_unique<SimplifyRODataLoads>(PrintSimplifyROLoads), opts::SimplifyRODataLoads); Manager.registerPass(std::make_unique<RegReAssign>(PrintRegReAssign), opts::RegReAssign); Manager.registerPass(std::make_unique<Inliner>(PrintInline)); Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF), opts::ICF); Manager.registerPass(std::make_unique<PLTCall>(PrintPLT)); Manager.registerPass(std::make_unique<ThreeWayBranch>(), opts::ThreeWayBranchFlag); Manager.registerPass(std::make_unique<ReorderBasicBlocks>(PrintReordered)); Manager.registerPass(std::make_unique<EliminateUnreachableBlocks>(PrintUCE), opts::EliminateUnreachable); Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit)); Manager.registerPass(std::make_unique<LoopInversionPass>()); Manager.registerPass(std::make_unique<TailDuplication>(), opts::TailDuplicationFlag); // This pass syncs local branches with CFG. If any of the following // passes breaks the sync - they either need to re-run the pass or // fix branches consistency internally. Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup)); // This pass should come close to last since it uses the estimated hot // size of a function to determine the order. It should definitely // also happen after any changes to the call graph are made, e.g. inlining. Manager.registerPass( std::make_unique<ReorderFunctions>(PrintReorderedFunctions)); // Print final dyno stats right while CFG and instruction analysis are intact. Manager.registerPass( std::make_unique<DynoStatsPrintPass>( InitialDynoStats, "after all optimizations before SCTC and FOP"), opts::PrintDynoStats | opts::DynoStatsAll); // Add the StokeInfo pass, which extract functions for stoke optimization and // get the liveness information for them Manager.registerPass(std::make_unique<StokeInfo>(PrintStoke), opts::Stoke); // This pass introduces conditional jumps into external functions. // Between extending CFG to support this and isolating this pass we chose // the latter. Thus this pass will do double jump removal and unreachable // code elimination if necessary and won't rely on peepholes/UCE for these // optimizations. // More generally this pass should be the last optimization pass that // modifies branches/control flow. This pass is run after function // reordering so that it can tell whether calls are forward/backward // accurately. Manager.registerPass( std::make_unique<SimplifyConditionalTailCalls>(PrintSCTC), opts::SimplifyConditionalTailCalls); Manager.registerPass(std::make_unique<Peepholes>(PrintPeepholes)); Manager.registerPass(std::make_unique<AlignerPass>()); // Perform reordering on data contained in one or more sections using // memory profiling data. Manager.registerPass(std::make_unique<ReorderData>()); if (BC.isAArch64()) { Manager.registerPass(std::make_unique<ADRRelaxationPass>()); // Tighten branches according to offset differences between branch and // targets. No extra instructions after this pass, otherwise we may have // relocations out of range and crash during linking. Manager.registerPass(std::make_unique<LongJmpPass>(PrintLongJmp)); } // This pass should always run last.* Manager.registerPass(std::make_unique<FinalizeFunctions>(PrintFinalized)); // FrameOptimizer has an implicit dependency on FinalizeFunctions. // FrameOptimizer move values around and needs to update CFIs. To do this, it // must read CFI, interpret it and rewrite it, so CFIs need to be correctly // placed according to the final layout. Manager.registerPass(std::make_unique<FrameOptimizerPass>(PrintFOP)); Manager.registerPass(std::make_unique<AllocCombinerPass>(PrintFOP)); Manager.registerPass( std::make_unique<RetpolineInsertion>(PrintRetpolineInsertion)); // Assign each function an output section. Manager.registerPass(std::make_unique<AssignSections>()); // Patch original function entries if (BC.HasRelocations) Manager.registerPass(std::make_unique<PatchEntries>()); // This pass turns tail calls into jumps which makes them invisible to // function reordering. It's unsafe to use any CFG or instruction analysis // after this point. Manager.registerPass( std::make_unique<InstructionLowering>(PrintAfterLowering)); // In non-relocation mode, mark functions that do not fit into their original // space as non-simple if we have to (e.g. for correct debug info update). // NOTE: this pass depends on finalized code. if (!BC.HasRelocations) Manager.registerPass(std::make_unique<CheckLargeFunctions>(NeverPrint)); Manager.registerPass(std::make_unique<LowerAnnotations>(NeverPrint)); Manager.runPasses(); } } // namespace bolt } // namespace llvm