bolt/lib/Profile/YAMLProfileReader.cpp (332 lines of code) (raw):

//===- bolt/Profile/YAMLProfileReader.cpp - YAML profile de-serializer ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "bolt/Profile/YAMLProfileReader.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/MCF.h" #include "bolt/Profile/ProfileYAMLMapping.h" #include "bolt/Utils/Utils.h" #include "llvm/Support/CommandLine.h" using namespace llvm; namespace opts { extern cl::opt<unsigned> Verbosity; extern cl::OptionCategory BoltOptCategory; static llvm::cl::opt<bool> IgnoreHash("profile-ignore-hash", cl::desc("ignore hash while reading function profile"), cl::init(false), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); } namespace llvm { namespace bolt { bool YAMLProfileReader::isYAML(const StringRef Filename) { ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = MB.getError()) report_error(Filename, EC); StringRef Buffer = MB.get()->getBuffer(); if (Buffer.startswith("---\n")) return true; return false; } void YAMLProfileReader::buildNameMaps( std::map<uint64_t, BinaryFunction> &Functions) { for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { StringRef Name = YamlBF.Name; const size_t Pos = Name.find("(*"); if (Pos != StringRef::npos) Name = Name.substr(0, Pos); ProfileNameToProfile[Name] = &YamlBF; if (const Optional<StringRef> CommonName = getLTOCommonName(Name)) LTOCommonNameMap[*CommonName].push_back(&YamlBF); } for (auto &BFI : Functions) { const BinaryFunction &Function = BFI.second; for (StringRef Name : Function.getNames()) if (const Optional<StringRef> CommonName = getLTOCommonName(Name)) LTOCommonNameFunctionMap[*CommonName].insert(&Function); } } bool YAMLProfileReader::hasLocalsWithFileName() const { for (const StringMapEntry<yaml::bolt::BinaryFunctionProfile *> &KV : ProfileNameToProfile) { const StringRef &FuncName = KV.getKey(); if (FuncName.count('/') == 2 && FuncName[0] != '/') return true; } return false; } bool YAMLProfileReader::parseFunctionProfile( BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) { BinaryContext &BC = BF.getBinaryContext(); bool ProfileMatched = true; uint64_t MismatchedBlocks = 0; uint64_t MismatchedCalls = 0; uint64_t MismatchedEdges = 0; uint64_t FunctionExecutionCount = 0; BF.setExecutionCount(YamlBF.ExecCount); if (!opts::IgnoreHash && YamlBF.Hash != BF.computeHash(/*UseDFS=*/true)) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: function hash mismatch\n"; ProfileMatched = false; } if (YamlBF.NumBasicBlocks != BF.size()) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: number of basic blocks mismatch\n"; ProfileMatched = false; } BinaryFunction::BasicBlockOrderType DFSOrder = BF.dfs(); for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) { if (YamlBB.Index >= DFSOrder.size()) { if (opts::Verbosity >= 2) errs() << "BOLT-WARNING: index " << YamlBB.Index << " is out of bounds\n"; ++MismatchedBlocks; continue; } BinaryBasicBlock &BB = *DFSOrder[YamlBB.Index]; // Basic samples profile (without LBR) does not have branches information // and needs a special processing. if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) { if (!YamlBB.EventCount) { BB.setExecutionCount(0); continue; } uint64_t NumSamples = YamlBB.EventCount * 1000; if (NormalizeByInsnCount && BB.getNumNonPseudos()) NumSamples /= BB.getNumNonPseudos(); else if (NormalizeByCalls) NumSamples /= BB.getNumCalls() + 1; BB.setExecutionCount(NumSamples); if (BB.isEntryPoint()) FunctionExecutionCount += NumSamples; continue; } BB.setExecutionCount(YamlBB.ExecCount); for (const yaml::bolt::CallSiteInfo &YamlCSI : YamlBB.CallSites) { BinaryFunction *Callee = YamlCSI.DestId < YamlProfileToFunction.size() ? YamlProfileToFunction[YamlCSI.DestId] : nullptr; bool IsFunction = Callee ? true : false; MCSymbol *CalleeSymbol = nullptr; if (IsFunction) CalleeSymbol = Callee->getSymbolForEntryID(YamlCSI.EntryDiscriminator); BF.getAllCallSites().emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds, YamlCSI.Offset); if (YamlCSI.Offset >= BB.getOriginalSize()) { if (opts::Verbosity >= 2) errs() << "BOLT-WARNING: offset " << YamlCSI.Offset << " out of bounds in block " << BB.getName() << '\n'; ++MismatchedCalls; continue; } MCInst *Instr = BF.getInstructionAtOffset(BB.getInputOffset() + YamlCSI.Offset); if (!Instr) { if (opts::Verbosity >= 2) errs() << "BOLT-WARNING: no instruction at offset " << YamlCSI.Offset << " in block " << BB.getName() << '\n'; ++MismatchedCalls; continue; } if (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr)) { if (opts::Verbosity >= 2) errs() << "BOLT-WARNING: expected call at offset " << YamlCSI.Offset << " in block " << BB.getName() << '\n'; ++MismatchedCalls; continue; } auto setAnnotation = [&](StringRef Name, uint64_t Count) { if (BC.MIB->hasAnnotation(*Instr, Name)) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: ignoring duplicate " << Name << " info for offset 0x" << Twine::utohexstr(YamlCSI.Offset) << " in function " << BF << '\n'; return; } BC.MIB->addAnnotation(*Instr, Name, Count); }; if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) { auto &CSP = BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( *Instr, "CallProfile"); CSP.emplace_back(CalleeSymbol, YamlCSI.Count, YamlCSI.Mispreds); } else if (BC.MIB->getConditionalTailCall(*Instr)) { setAnnotation("CTCTakenCount", YamlCSI.Count); setAnnotation("CTCMispredCount", YamlCSI.Mispreds); } else { setAnnotation("Count", YamlCSI.Count); } } for (const yaml::bolt::SuccessorInfo &YamlSI : YamlBB.Successors) { if (YamlSI.Index >= DFSOrder.size()) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: index out of bounds for profiled block\n"; ++MismatchedEdges; continue; } BinaryBasicBlock &SuccessorBB = *DFSOrder[YamlSI.Index]; if (!BB.getSuccessor(SuccessorBB.getLabel())) { if (opts::Verbosity >= 1) errs() << "BOLT-WARNING: no successor for block " << BB.getName() << " that matches index " << YamlSI.Index << " or block " << SuccessorBB.getName() << '\n'; ++MismatchedEdges; continue; } BinaryBasicBlock::BinaryBranchInfo &BI = BB.getBranchInfo(SuccessorBB); BI.Count += YamlSI.Count; BI.MispredictedCount += YamlSI.Mispreds; } } // If basic block profile wasn't read it should be 0. for (BinaryBasicBlock &BB : BF) if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE) BB.setExecutionCount(0); if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) { BF.setExecutionCount(FunctionExecutionCount); estimateEdgeCounts(BF); } ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges; if (ProfileMatched) BF.markProfiled(YamlBP.Header.Flags); if (!ProfileMatched && opts::Verbosity >= 1) errs() << "BOLT-WARNING: " << MismatchedBlocks << " blocks, " << MismatchedCalls << " calls, and " << MismatchedEdges << " edges in profile did not match function " << BF << '\n'; return ProfileMatched; } Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = MB.getError()) { errs() << "ERROR: cannot open " << Filename << ": " << EC.message() << "\n"; return errorCodeToError(EC); } yaml::Input YamlInput(MB.get()->getBuffer()); // Consume YAML file. YamlInput >> YamlBP; if (YamlInput.error()) { errs() << "BOLT-ERROR: syntax error parsing profile in " << Filename << " : " << YamlInput.error().message() << '\n'; return errorCodeToError(YamlInput.error()); } // Sanity check. if (YamlBP.Header.Version != 1) return make_error<StringError>( Twine("cannot read profile : unsupported version"), inconvertibleErrorCode()); if (YamlBP.Header.EventNames.find(',') != StringRef::npos) return make_error<StringError>( Twine("multiple events in profile are not supported"), inconvertibleErrorCode()); // Match profile to function based on a function name. buildNameMaps(BC.getBinaryFunctions()); // Preliminary assign function execution count. for (auto &KV : BC.getBinaryFunctions()) { BinaryFunction &BF = KV.second; for (StringRef Name : BF.getNames()) { auto PI = ProfileNameToProfile.find(Name); if (PI != ProfileNameToProfile.end()) { yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue(); BF.setExecutionCount(YamlBF.ExecCount); break; } } } return Error::success(); } bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) { for (StringRef Name : BF.getNames()) { if (ProfileNameToProfile.find(Name) != ProfileNameToProfile.end()) return true; if (const Optional<StringRef> CommonName = getLTOCommonName(Name)) { if (LTOCommonNameMap.find(*CommonName) != LTOCommonNameMap.end()) return true; } } return false; } Error YAMLProfileReader::readProfile(BinaryContext &BC) { YamlProfileToFunction.resize(YamlBP.Functions.size() + 1); auto profileMatches = [](const yaml::bolt::BinaryFunctionProfile &Profile, BinaryFunction &BF) { if (opts::IgnoreHash && Profile.NumBasicBlocks == BF.size()) return true; if (!opts::IgnoreHash && Profile.Hash == static_cast<uint64_t>(BF.getHash())) return true; return false; }; // We have to do 2 passes since LTO introduces an ambiguity in function // names. The first pass assigns profiles that match 100% by name and // by hash. The second pass allows name ambiguity for LTO private functions. for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; // Clear function call count that may have been set while pre-processing // the profile. Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE); // Recompute hash once per function. if (!opts::IgnoreHash) Function.computeHash(/*UseDFS=*/true); for (StringRef FunctionName : Function.getNames()) { auto PI = ProfileNameToProfile.find(FunctionName); if (PI == ProfileNameToProfile.end()) continue; yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue(); if (profileMatches(YamlBF, Function)) matchProfileToFunction(YamlBF, Function); } } for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; if (ProfiledFunctions.count(&Function)) continue; for (StringRef FunctionName : Function.getNames()) { const Optional<StringRef> CommonName = getLTOCommonName(FunctionName); if (CommonName) { auto I = LTOCommonNameMap.find(*CommonName); if (I == LTOCommonNameMap.end()) continue; bool ProfileMatched = false; std::vector<yaml::bolt::BinaryFunctionProfile *> &LTOProfiles = I->getValue(); for (yaml::bolt::BinaryFunctionProfile *YamlBF : LTOProfiles) { if (YamlBF->Used) continue; if ((ProfileMatched = profileMatches(*YamlBF, Function))) { matchProfileToFunction(*YamlBF, Function); break; } } if (ProfileMatched) break; // If there's only one function with a given name, try to // match it partially. if (LTOProfiles.size() == 1 && LTOCommonNameFunctionMap[*CommonName].size() == 1 && !LTOProfiles.front()->Used) { matchProfileToFunction(*LTOProfiles.front(), Function); break; } } else { auto PI = ProfileNameToProfile.find(FunctionName); if (PI == ProfileNameToProfile.end()) continue; yaml::bolt::BinaryFunctionProfile &YamlBF = *PI->getValue(); if (!YamlBF.Used) { matchProfileToFunction(YamlBF, Function); break; } } } } for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) if (!YamlBF.Used && opts::Verbosity >= 1) errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name << '\n'; // Set for parseFunctionProfile(). NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); NormalizeByCalls = usesEvent("branches"); uint64_t NumUnused = 0; for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) { if (YamlBF.Id >= YamlProfileToFunction.size()) { // Such profile was ignored. ++NumUnused; continue; } if (BinaryFunction *BF = YamlProfileToFunction[YamlBF.Id]) parseFunctionProfile(*BF, YamlBF); else ++NumUnused; } BC.setNumUnusedProfiledObjects(NumUnused); return Error::success(); } bool YAMLProfileReader::usesEvent(StringRef Name) const { return YamlBP.Header.EventNames.find(std::string(Name)) != StringRef::npos; } } // end namespace bolt } // end namespace llvm