hphp/tools/debug-parser/debug-parser-dwarf.cpp (2,186 lines of code) (raw):
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#if defined(__linux__) || defined(__FreeBSD__)
#include <folly/Demangle.h>
#include <folly/Format.h>
#include <folly/Memory.h>
#include <folly/ScopeGuard.h>
#include <folly/String.h>
#include <folly/container/F14Map.h>
#include <folly/container/F14Set.h>
#include <folly/portability/Unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dwarf.h>
#include "hphp/util/assertions.h"
#include "hphp/util/functional.h"
#include "hphp/util/job-queue.h"
#include "hphp/util/timer.h"
#include "hphp/util/trace.h"
#include "hphp/tools/debug-parser/debug-parser.h"
#include "hphp/tools/debug-parser/dwarfstate.h"
/*
* Debug parser for DWARF (using dwarfstate)
*
* DWARF is structured as a forest of DIEs (Debug Information Entry). Each DIE
* has a tag, which describes what kind of DIE it is, and a list of
* attributes. Each attribute has a type, which identifies what it is, and a
* value (the type of the value is implied by the attribute type). Furthermore,
* a DIE can have other DIEs as children. The top-level DIEs correspond to
* compilation-units, and all the children of these top-level DIEs correspond to
* the information in that compilation-unit.
*
* The meaning and interpretation of the DIEs is deliberately left vague by the
* standard, so different compilers can encode things in different ways (and no
* implementation is bug free).
*/
namespace debug_parser { namespace {
TRACE_SET_MOD(trans);
////////////////////////////////////////////////////////////////////////////////
// Allow foreach on a range (as returned by equal_range)
template<typename It> It begin(std::pair<It,It> p) { return p.first; }
template<typename It> It end(std::pair<It,It> p) { return p.second; }
/*
* Fully qualified names aren't represented explicitly in DWARF. Instead the
* structure of the DIEs mimics the nesting structure in the source (IE, a
* nested class within a class nested within a namespace). So, in order to
* infer the fully qualified name for any given class, the current scope is
* tracked as the DIEs are walked.
*
* Likewise, DWARF has no concept of linkage, but the linkage is needed to know
* which types are actually equivalent. Luckily, a type's linkage is closely
* related to its scope (except for templates, see below), so it can be inferred
* the same way.
*
* The scope is tracked as a stack of contexts, pushing and popping off contexts
* when a namespace or type is entered or exited.
*/
struct Scope {
explicit Scope(GlobalOff cu_offset)
: m_cu_offset{cu_offset}
{
m_scope.emplace_back(
ObjectTypeName{std::string{}, ObjectTypeName::Linkage::external},
true
);
}
GlobalOff cuOffset() const { return m_cu_offset; }
ObjectTypeName name() const;
// Fix the name of a type to match where it is in the namespace/type
// hierarchy.
void fixName(ObjectTypeName newName);
ObjectTypeName::Linkage linkage() const {
return m_scope.back().name.linkage;
}
std::size_t unnamedTypeCount() const {
return m_scope.back().unnamed_count;
}
bool isInNamespaceScope() const {
return m_scope.back().in_namespace_scope;
}
void incUnnamedTypeCount() { ++m_scope.back().unnamed_count; }
HPHP::Optional<GlobalOff> typeOffset() const {
return m_scope.back().offset;
}
void pushType(std::string name, GlobalOff offset) {
m_scope.emplace_back(
ObjectTypeName{std::move(name), linkage()},
false
);
m_scope.back().offset = offset;
}
void pushUnnamedType(std::string name, GlobalOff offset) {
m_scope.emplace_back(
ObjectTypeName{
std::move(name),
ObjectTypeName::Linkage::none
},
false
);
m_scope.back().offset = offset;
}
void pushNamespace(std::string ns) {
m_scope.emplace_back(
ObjectTypeName{std::move(ns), linkage()},
true
);
}
void pushUnnamedNamespace() {
m_scope.emplace_back(
ObjectTypeName{
"(unnamed namespace)",
ObjectTypeName::Linkage::internal
},
true
);
}
void pop() { m_scope.pop_back(); }
private:
struct Context {
Context(ObjectTypeName name, bool in_namespace_scope)
: name(std::move(name))
, in_namespace_scope{in_namespace_scope} {}
ObjectTypeName name;
bool in_namespace_scope;
std::size_t unnamed_count = 0;
HPHP::Optional<GlobalOff> offset;
};
std::vector<Context> m_scope;
GlobalOff m_cu_offset;
public:
static const std::string s_pseudo_type_name;
};
/*
* Actual implementation of TypeParser for DWARF.
*/
struct TypeParserImpl : TypeParser {
explicit TypeParserImpl(const std::string& filename, int num_threads);
Object getObject(ObjectTypeKey key) override;
size_t getObjectBlockCount() const override;
protected:
const std::vector<ObjectType>& getObjectBlock(size_t index) const override;
private:
struct StateBlock;
struct LinkageDependents {
folly::F14FastSet<GlobalOff> template_uses;
folly::F14FastSet<GlobalOff> children;
};
struct StaticSpec {
static auto constexpr kNoAddress = std::numeric_limits<uint64_t>::max();
std::string linkage_name;
uint64_t address{kNoAddress};
bool is_member{false};
};
struct Env {
const DwarfState* dwarf;
std::unique_ptr<StateBlock> state;
folly::F14FastMap<GlobalOff, GlobalOff> local_mappings;
folly::F14FastMap<GlobalOff, LinkageDependents> linkage_dependents;
std::vector<std::pair<GlobalOff, StaticSpec>> raw_static_definitions;
};
// Functions used while concurrently building state. Since these functions are
// invoked from multiple threads, they are static and take all their state
// explicitly as parameters.
static void genNames(Env& env,
Dwarf_Die die,
Scope& scope,
std::vector<GlobalOff>* template_params = nullptr);
static HPHP::Optional<uintptr_t> interpretLocAddress(const DwarfState& dwarf,
Dwarf_Attribute attr);
static HPHP::Optional<GlobalOff> parseSpecification(const DwarfState& dwarf,
Dwarf_Die die,
bool first,
StaticSpec& spec);
void fixTemplateLinkage();
// Functions used after state is built. These are not thread-safe.
Object genObject(Dwarf_Die die,
ObjectTypeName name,
ObjectTypeKey key);
Type genType(Dwarf_Die die);
Object::Member genMember(Dwarf_Die die,
const ObjectTypeName& parent_name);
Object::Function genFunction(Dwarf_Die die);
Object::Base genBase(Dwarf_Die die, const ObjectTypeName& parent_name);
Object::TemplateParam genTemplateParam(Dwarf_Die die);
HPHP::Optional<size_t> determineArrayBound(Dwarf_Die die);
void fillFuncArgs(Dwarf_Die die, FuncType& func);
// Map a given offset to the state block which contains state for that offset
// (see below).
const StateBlock& stateForOffset(GlobalOff offset) const {
assertx(!m_state_map.empty());
auto it = std::upper_bound(
m_state_map.begin(),
m_state_map.end(),
offset,
[](GlobalOff offset, const std::pair<GlobalOff, StateBlock*>& p) {
return offset < p.first;
}
);
if (it != m_state_map.begin()) --it;
return *it->second;
}
// All of the parser's persistent state is stored in some number of
// blocks. All of the blocks are computed concurrently, one block per
// thread. To avoid the overhead of merging the blocks together, they are kept
// separated. Instead m_state_map is used to map a given offset into the block
// which contains the state for that offset. It is a list of offset/state
// pairs. Any offset between the offset given in the pair and the one in the
// next pair is mapped to the state block in the pair.
//
// Note: this scheme only works because each compilation unit is
// self-contained and does not reference data in another compilation
// unit. However, nothing in DWARF prevents this and its not guaranteed to
// always be true.
struct StateBlock {
std::vector<ObjectType> all_objs;
folly::F14FastMap<GlobalOff, size_t> obj_offsets;
std::multimap<GlobalOff, StaticSpec> static_definitions;
};
std::vector<std::unique_ptr<StateBlock>> m_states;
std::vector<std::pair<GlobalOff, StateBlock*>> m_state_map;
tbb::concurrent_hash_map<GlobalOff,
LinkageDependents,
GlobalOff::Hash> m_linkage_dependents;
DwarfState m_dwarf;
};
// Purposefully fake name to avoid confusion with an actual type.
const std::string Scope::s_pseudo_type_name = "@_PSEUDO_TY";
ObjectTypeName Scope::name() const {
auto iter = m_scope.begin();
std::string str = iter->name.name;
++iter;
for (; iter != m_scope.end(); ++iter) {
if (str.empty()) str = iter->name.name;
else str = folly::sformat("{}::{}", str, iter->name.name);
}
return ObjectTypeName{std::move(str), linkage()};
}
void Scope::fixName(ObjectTypeName newName) {
if (m_scope.size() == 1) {
m_scope.back().name = std::move(newName);
return;
}
auto context = std::move(m_scope.back());
m_scope.pop_back();
auto outerName = name();
assertx(newName.name.size() > outerName.name.size());
if (outerName.name.size()) {
assertx(!outerName.name.compare(0, outerName.name.size(), newName.name));
newName.name = newName.name.substr(outerName.name.size() + 2);
}
context.name = std::move(newName);
m_scope.push_back(std::move(context));
}
TypeParserImpl::TypeParserImpl(const std::string& filename, int num_threads)
: m_dwarf{filename}
{
// Processing each compiliation unit is very expensive, as it involves walking
// a large part of the debug information. To speed things up (a lot), we buid
// up the state concurrently. Create a job corresponding to each compiliation
// unit in the file and enqueue the jobs with a thread pool. We'll find the
// offsets of the compiliation unit in the main thread, enqueuing them as we
// find them. This lets us not only exploit concurrency between processing
// compiliation units, but between finding them and processing them.
//
// Each worker maintains its own private state which it populates for all the
// compiliation units its assigned (each worker can process multiple
// compiliation units). Once done, all the different states are kept separate
// (merging them would be too expensive), but a mapping is constructed to map
// offsets to the appropriate state block.
//
// This whole scheme is only viable because (right now), debug information in
// a given compilation unit doesn't reference anything outside of that unit,
// so the state for any given compiliation unit can be processed
// independently.
// The context serves as the link between a worker and the TypeParserImpl
// state (this is forced by the JobQueueWorker interface).
struct Context {
const decltype(m_dwarf)& dwarf;
decltype(m_states)& states;
decltype(m_state_map)& state_map;
decltype(m_linkage_dependents)& linkage_dependents;
// The lock protects states, state_map, and the exception field (but only
// when the workers are running).
std::mutex lock;
// Set to the exception if any of the workers threw (first one wins).
std::exception_ptr exception;
};
// Thread worker. We'll end up with a state block for each one of these.
struct Worker : HPHP::JobQueueWorker<GlobalOff, Context*> {
Env env;
// Remember each offset we processed so we can record it the global state
// map when we finish.
std::vector<GlobalOff> offsets;
void doJob(GlobalOff offset) override {
// Process a compiliation unit at the given offset.
try {
// We're going to use it so let's mark this worker active.
if (!env.dwarf) {
env.dwarf = &m_context->dwarf;
env.state = std::make_unique<StateBlock>();
}
offsets.emplace_back(offset);
// Do the actual processing, adding to the state block:
Scope scope{offset};
env.dwarf->onDIEAtOffset(
offset,
[&](Dwarf_Die cu) { genNames(env, cu, scope); }
);
auto const remap = [&] (GlobalOff o) {
auto const it = env.local_mappings.find(o);
if (it != env.local_mappings.end()) {
return it->second;
}
return o;
};
// Generate static_definitions by updating their keys collected during
// genNames. Some keys refer back to a DW_AT_member that belongs to a
// struct whose definition was in another type-unit. We want to add an
// entry for the member in the definition.
std::transform(
env.raw_static_definitions.begin(),
env.raw_static_definitions.end(),
std::inserter(
env.state->static_definitions,
env.state->static_definitions.end()),
[&](const auto& elem) {
return std::make_pair(remap(elem.first), std::move(elem.second));
});
env.raw_static_definitions.clear();
for (auto& linkage : env.linkage_dependents) {
if (!linkage.second.template_uses.size()) continue;
std::decay_t<decltype(m_context->linkage_dependents)>::accessor acc;
auto const inserted =
m_context->linkage_dependents.insert(acc, remap(linkage.first));
if (inserted && !env.local_mappings.size()) {
acc->second = std::move(linkage.second);
} else {
auto const process = [&] (auto const& from, auto& to) {
for (auto& elm : from) {
to.insert(remap(elm));
}
};
process(linkage.second.template_uses, acc->second.template_uses);
process(linkage.second.children, acc->second.children);
}
}
env.linkage_dependents.clear();
env.local_mappings.clear();
} catch (...) {
// Store any exception thrown so it can be rethrown in the main
// thread. We only bother to store the first one.
stop();
std::lock_guard<std::mutex> guard{m_context->lock};
if (!m_context->exception) {
m_context->exception = std::current_exception();
}
}
}
void onThreadExit() override {
// The worker is done (we've been told to stop). Now that we know we won't
// be processing anymore offsets, do the needed post-processing on the
// rest of the state.
if (!env.dwarf) return;
try {
// Compute a mapping of an object type's offset to its location in the
// all_objs vector.
env.state->obj_offsets.reserve(env.state->all_objs.size());
for (auto i = size_t{0}; i < env.state->all_objs.size(); ++i) {
env.state->obj_offsets.emplace(
GlobalOff::fromRaw(env.state->all_objs[i].key.object_id), i
);
}
// Record all the offsets this worker processed (along with the state
// block) in the global state map. This is done using a lock because its
// quick and only done when the thread is finishing.
std::lock_guard<std::mutex> guard{m_context->lock};
auto const state = env.state.get();
m_context->states.emplace_back(std::move(env.state));
for (auto offset : offsets) {
m_context->state_map.emplace_back(offset, state);
}
} catch (...) {
// Store any exception thrown so it can be rethrown in the main
// thread. We only bother to store the first one.
stop();
std::lock_guard<std::mutex> guard{m_context->lock};
if (!m_context->exception) {
m_context->exception = std::current_exception();
}
}
}
};
// Create the thread pool
Context context{m_dwarf, m_states, m_state_map, m_linkage_dependents};
HPHP::JobQueueDispatcher<Worker> dispatcher{
num_threads, num_threads, 0, false, &context
};
dispatcher.start();
size_t num_tu = 0;
FTRACE(1, "Adding type-units to dispatcher...\n");
// Iterate over every type-unit, enqueuing jobs which will
// concurrently scan that unit.
m_dwarf.forEachTopLevelUnit(
[&] (Dwarf_Die tu) {
dispatcher.enqueue(m_dwarf.getDIEOffset(tu));
++num_tu;
return true;
},
false
);
FTRACE(1, "... {} type-units added.\n", num_tu);
size_t num_cu = 0;
FTRACE(1, "Adding compilation-units to dispatcher...\n");
// Iterate over every compilation-unit, enqueuing jobs which will
// concurrently scan that unit.
m_dwarf.forEachCompilationUnit(
[&](Dwarf_Die cu) { dispatcher.enqueue(m_dwarf.getDIEOffset(cu)); ++num_cu;}
);
FTRACE(1, "... {} compilation-units added.\n", num_cu);
// Wait for all the workers to finish.
dispatcher.stop();
FTRACE(1, "Finished with genNames\n");
// If any of the workers caught an exception, rethrow here in the main
// thread. We don't need to bother taking the lock because all the workers are
// gone.
if (context.exception) std::rethrow_exception(context.exception);
// Since the state map was appended to by the workers in a non-deterministic
// order, we need to sort it by offset so we can do efficient lookups later.
std::sort(
m_state_map.begin(), m_state_map.end(),
[&](const std::pair<GlobalOff, StateBlock*>& p1,
const std::pair<GlobalOff, StateBlock*>& p2) {
return p1.first < p2.first;
}
);
// Some of the static_definitions entries need to be moved to the
// correct block; eg they were seen when processing the cu
// containing the definition of the static member, but need to be
// moved to the state for the tu which contains the definition of
// the struct (which may or may not be the same state block).
folly::F14FastSet<void*> seen;
for (auto const& p : m_state_map) {
if (!seen.insert(p.second).second) continue;
auto curOff = p.first;
auto curState = p.second;
for (auto it = p.second->static_definitions.begin();
it != p.second->static_definitions.end(); ) {
if (it->first != curOff) {
curOff = it->first;
curState = const_cast<decltype(p.second)>(&stateForOffset(curOff));
}
if (curState == p.second) {
++it;
continue;
}
curState->static_definitions.insert(*it);
it = p.second->static_definitions.erase(it);
}
}
fixTemplateLinkage();
m_linkage_dependents.clear();
}
size_t TypeParserImpl::getObjectBlockCount() const {
return m_states.size();
}
const std::vector<ObjectType>&
TypeParserImpl::getObjectBlock(size_t index) const {
return m_states[index]->all_objs;
}
/*
* As stated above, the linkage of templates is tricky. The linkage of a
* template is the most restrictive linkage of its original linkage and the
* linkage of its template parameters. Since some of the template parameters may
* not yet be parsed when we parse the template, the inference of the correct
* template linkage is deferred until all the types' linkages are computed.
*
* However, since templates can be parameters to other templates, this process
* must be repeated until the linkage of no types are changed.
*
* As an additional complication, the linkage of any nested class is inherited
* from its parent, so when a template's linkage changes, it must be bubbled
* down to any of its nested classes.
*
* When the name and initial linkages of all the types was generated, the
* relationship between templates, their parameters, and nested classes is
* recorded in linkage_dependents, which is used here.
*/
void TypeParserImpl::fixTemplateLinkage() {
using ChangedSet = folly::F14FastSet<GlobalOff>;
ChangedSet changed;
for (const auto& pair : m_linkage_dependents) {
if (pair.second.template_uses.empty()) continue;
changed.emplace(pair.first);
}
ChangedSet old_changed;
while (!changed.empty()) {
std::swap(changed, old_changed);
// For every type which has its linkage changed, update its dependents
// (templates where the type is used as a parameter, or nested classes) with
// the new linkage, and mark as being changed as well.
for (auto changed_offset : old_changed) {
decltype(m_linkage_dependents)::const_accessor acc;
if (!m_linkage_dependents.find(acc, changed_offset)) continue;
auto const& children = acc->second.children;
auto const& template_uses = acc->second.template_uses;
auto const& changed_state = stateForOffset(changed_offset);
auto const it = changed_state.obj_offsets.find(changed_offset);
if (it == changed_state.obj_offsets.end()) {
// This isn't right - if (eg) its a pointer to an object type
// with internal linkage, we need to mark the dependents
// internal; but we don't track pointer types at all - so just
// assume this type doesn't matter. The same goes for other
// things like const struct types etc.
continue;
}
auto const& changed_obj = changed_state.all_objs[it->second];
// Only update and mark if we actually make the linkage more restrictive.
if (changed_obj.name.linkage != ObjectTypeName::Linkage::external) {
const auto process = [&](GlobalOff dependent_offset) {
auto& dep_state = const_cast<StateBlock&>(
stateForOffset(dependent_offset)
);
auto const it = dep_state.obj_offsets.find(dependent_offset);
if (it == dep_state.obj_offsets.end()) return;
auto& dependent_obj = dep_state.all_objs[it->second];
if (dependent_obj.name.linkage < changed_obj.name.linkage) {
FTRACE(4,
"Reducing linkage for {}({}) from {} to {} due to {}({})\n",
dependent_obj.name.name,
GlobalOff::fromRaw(dependent_obj.key.object_id),
show(dependent_obj.name.linkage),
show(changed_obj.name.linkage),
changed_obj.name.name,
GlobalOff::fromRaw(changed_obj.key.object_id));
dependent_obj.name.linkage = changed_obj.name.linkage;
changed.emplace(dependent_offset);
}
};
for (auto template_offset : template_uses) process(template_offset);
for (auto child_offset : children) process(child_offset);
}
}
old_changed.clear();
}
}
Object TypeParserImpl::getObject(ObjectTypeKey key) {
auto const& state = stateForOffset(GlobalOff::fromRaw(key.object_id));
auto iter = state.obj_offsets.find(GlobalOff::fromRaw(key.object_id));
// If we don't know of an object type at the given location, assume its
// referring to something we never parsed in the first place, so return the
// pseudo-type.
if (iter == state.obj_offsets.end()) {
return Object{
ObjectTypeName{
Scope::s_pseudo_type_name,
ObjectTypeName::Linkage::pseudo,
},
0,
key,
Object::Kind::k_other,
true
};
}
return m_dwarf.onDIEAtOffset(
GlobalOff::fromRaw(key.object_id),
[&](Dwarf_Die die) {
return genObject(
die,
state.all_objs[iter->second].name,
key
);
}
);
}
// For static members, determine how that member's address can be
// determined. In theory, this can be any arbitrary expression, but we only
// support constant addresses right now.
HPHP::Optional<uintptr_t>
TypeParserImpl::interpretLocAddress(const DwarfState& dwarf,
Dwarf_Attribute attr) {
auto form = dwarf.getAttributeForm(attr);
if (form != DW_FORM_exprloc) return std::nullopt;
auto exprs = dwarf.getAttributeValueExprLoc(attr);
if (exprs.size() != 1) return std::nullopt;
if (exprs[0].lr_atom != DW_OP_addr) return std::nullopt;
return HPHP::Optional<uintptr_t>{exprs[0].lr_number};
}
HPHP::Optional<GlobalOff>
TypeParserImpl::parseSpecification(const DwarfState& dwarf,
Dwarf_Die die,
bool first,
StaticSpec &spec) {
HPHP::Optional<GlobalOff> offset;
bool is_inline = false;
dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (dwarf.getAttributeType(attr)) {
case DW_AT_abstract_origin:
offset = dwarf.onDIEAtOffset(
dwarf.getAttributeValueRef(attr),
[&](Dwarf_Die die2) {
return parseSpecification(dwarf, die2, false, spec);
}
);
break;
case DW_AT_specification:
offset = dwarf.getAttributeValueRef(attr);
break;
case DW_AT_linkage_name:
if (spec.linkage_name.empty()) {
spec.linkage_name = dwarf.getAttributeValueString(attr);
}
break;
case DW_AT_location:
if (spec.address == StaticSpec::kNoAddress) {
if (auto const address = interpretLocAddress(dwarf, attr)) {
spec.address = *address;
}
}
break;
case DW_AT_low_pc:
if (spec.address == StaticSpec::kNoAddress) {
spec.address = dwarf.getAttributeValueAddr(attr);
// Sometimes GCC and Clang will emit invalid function
// addresses. Usually zero, but sometimes a very low
// number. These numbers have the appearance of being
// un-relocated addresses, but its in the final executable. As
// a safety net, if an address is provided, but its abnormally
// low, ignore it.
if (spec.address < 4096) spec.address = StaticSpec::kNoAddress;
}
break;
case DW_AT_object_pointer:
// Just in case we actually have a definition, use it to infer
// member-ness.
spec.is_member = true;
break;
default:
break;
}
return true;
}
);
if (first && (is_inline ||
(spec.linkage_name.empty() &&
spec.address == StaticSpec::kNoAddress &&
!spec.is_member))) {
return std::nullopt;
}
return offset;
}
/*
* Given a DIE, and the current scope, recursively generate the names/linkages
* for all the object types in this DIE and children. If template_params is
* provided, the parent DIE is an object type, so template_params should be
* filled with any template parameters in the child DIE.
*/
void TypeParserImpl::genNames(Env& env,
Dwarf_Die die,
Scope& scope,
std::vector<GlobalOff>* template_params) {
auto& dwarf = *env.dwarf;
auto& state = *env.state;
const auto recurse = [&](std::vector<GlobalOff>* params = nullptr){
dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
genNames(env, child, scope, params);
return true;
}
);
};
auto tag = dwarf.getTag(die);
switch (tag) {
case DW_TAG_base_type:
case DW_TAG_union_type:
case DW_TAG_enumeration_type:
case DW_TAG_structure_type:
case DW_TAG_class_type:
case DW_TAG_unspecified_type: {
// Object-types. These have names and linkages, so we must record them.
// If this is a type-unit definition with a separate declaration
// in the same tu, declarationOffset will point to the
// declaration.
HPHP::Optional<GlobalOff> declarationOffset;
// If this is a declaration in a cu, referring back to a
// tu-definition, definitionOffset will point to that
// definition. Such declarations are emitted for the
// *definitions* of static members (which always happen in cus,
// not tus)
HPHP::Optional<GlobalOff> definitionOffset;
// Determine the base name, whether this type was unnamed, and whether
// this is an incomplete type or not from the DIE's attributes.
auto get_info = [&](Dwarf_Die cur,
bool updateOffsets) ->
std::tuple<std::string, bool, bool> {
std::string name;
std::string linkage_name;
auto incomplete = false;
dwarf.forEachAttribute(
cur,
[&](Dwarf_Attribute attr) {
switch (dwarf.getAttributeType(attr)) {
case DW_AT_name:
name = dwarf.getAttributeValueString(attr);
break;
case DW_AT_linkage_name:
linkage_name = dwarf.getAttributeValueString(attr);
break;
case DW_AT_declaration:
incomplete = dwarf.getAttributeValueFlag(attr);
break;
case DW_AT_specification:
// The compiler can spit out a declaration for a
// struct, followed later by the full definition. The
// full definition has a DW_AT_specification pointing
// back to the declaration - but note that the full
// definition may not be defined in the correct
// namespace - so we're going to keep the declaration,
// and update it based on the definition ignoring the
// definition's name (this feels a little backwards,
// but its how dwarf works).
if (updateOffsets) {
declarationOffset = dwarf.getAttributeValueRef(attr);
}
break;
case DW_AT_signature:
if (updateOffsets &&
dwarf.getAttributeForm(attr) == DW_FORM_ref_sig8) {
// The actual definition is in another type-unit, we
// can ignore this declaration.
definitionOffset = dwarf.getAttributeValueRef(attr);
break;
}
default:
break;
}
return true;
}
);
// If there's an explicit name, just use that.
if (!name.empty()) return std::make_tuple(name, false, incomplete);
// Otherwise, if there's a linkage name, demangle it, and strip off
// everything except the last section, and use that as the base
// name. For types which have external linkage, this lets us use
// whatever naming scheme the compiler has chosen for unnamed types.
if (!linkage_name.empty()) {
auto demangled = folly::demangle(linkage_name.c_str()).toStdString();
auto index = demangled.rfind("::");
if (index != decltype(demangled)::npos) demangled.erase(0, index+2);
return std::make_tuple(demangled, false, incomplete);
}
// No explicit name and no linkage name to use, so we have to try to
// infer one ourself (making it a synthetic name).
// Try the first named member
auto const first_member = [&](const char* type,
auto member_type) {
std::string first_member;
dwarf.forEachChild(
cur,
[&](Dwarf_Die child) {
if (dwarf.getTag(child) == member_type) {
first_member = dwarf.getDIEName(child);
}
return first_member.empty();
}
);
if (!first_member.empty()) {
return folly::sformat(
"(unnamed {} containing '{}')", type, first_member
);
}
return std::string{};
};
auto const type_name = [&]{
if (tag == DW_TAG_enumeration_type) return "enumeration";
if (tag == DW_TAG_union_type) return "union";
if (tag == DW_TAG_structure_type) return "struct";
if (tag == DW_TAG_class_type) return "class";
return "type";
};
auto const member_type = [&]() {
if (tag == DW_TAG_enumeration_type) return DW_TAG_enumerator;
return DW_TAG_member;
};
auto first_member_name = first_member(type_name(), member_type());
if (!first_member_name.empty()) {
return std::make_tuple(
std::move(first_member_name), true, incomplete
);
}
// If this is within a namespace, don't infer any name at all, keep it
// nameless. If its not within a namespace (IE, within a class), give it
// a unique name based on how many unnamed types we've seen so far. We
// can't do this for types within a namespace because namespaces are
// open and thus we can't force a global numbering of all types within
// it.
if (!scope.isInNamespaceScope()) {
scope.incUnnamedTypeCount();
return std::make_tuple(
folly::sformat(
"(unnamed {} #{})",
type_name(),
scope.unnamedTypeCount()
),
true,
incomplete
);
}
return std::make_tuple(
folly::sformat("(unnamed {})", type_name()),
true,
incomplete
);
};
const auto info = get_info(die, /*updateOffsets=*/true);
auto offset = dwarf.getDIEOffset(die);
if (definitionOffset) {
// This is a declaration which refers to the definition via
// DW_AT_signature. We'll see one of these for a class in the
// cu where its static members are defined. Later
// DW_TAG_variable nodes will refer back to the ones here,
// rather than the ones in the definition, so we need to
// record a map from any members defined here back to the
// original definition. We could also see them for parent
// classes, or for template param (a template param can refer
// to an out-of-unit type either by using a ref_sig8 directly,
// in which case we will have resolved the offset correctly,
// or it could have an offset to a type with a
// DW_AT_signature, in which case we'll need to fix it up
// later). In any case, add an entry to map our offset to the
// true definition, and entries to map any members to their
// true definitions.
env.local_mappings.emplace(offset, *definitionOffset);
folly::F14FastMap<std::string, GlobalOff> map;
dwarf.forEachChild(
die,
[&] (Dwarf_Die child) {
if (dwarf.getTag(child) == DW_TAG_member) {
map.emplace(dwarf.getDIEName(child), dwarf.getDIEOffset(child));
}
return true;
}
);
if (!map.empty()) {
dwarf.onDIEAtOffset(
*definitionOffset,
[&] (Dwarf_Die orig) {
dwarf.forEachChild(
orig,
[&] (Dwarf_Die child) {
auto it = map.find(dwarf.getDIEName(child));
if (it != map.end()) {
env.local_mappings.emplace(it->second,
dwarf.getDIEOffset(child));
}
return true;
}
);
}
);
}
}
auto parent_offset = scope.typeOffset();
// If we inferred a base name, use that to form the fully qualified name,
// otherwise treat it as an unnamed type.
if (!definitionOffset) {
std::get<1>(info) ?
scope.pushUnnamedType(std::get<0>(info), offset) :
scope.pushType(std::get<0>(info), offset);
} else {
// Push the name of the definition, not of the declaration
dwarf.onDIEAtOffset(
*definitionOffset,
[&] (Dwarf_Die def) {
const auto info_def = get_info(def, /*updateOffsets=*/false);
std::get<1>(info_def) ?
scope.pushUnnamedType(std::get<0>(info_def), offset) :
scope.pushType(std::get<0>(info_def), offset);
});
}
SCOPE_EXIT { scope.pop(); };
if (declarationOffset) {
// This completes a previous declaration. search backwards for
// it, which should be fine because its normally right after
// the declaration (and its always in the same cu/tu).
auto i = state.all_objs.size();
while (true) {
assert(i);
auto& obj = state.all_objs[--i];
if (obj.key.object_id == declarationOffset->raw()) {
assert(obj.incomplete);
FTRACE(5,
"Completing previous definition of {}.\n"
" Was {}, Now {}, Linkage: {}\n",
obj.name.name,
GlobalOff::fromRaw(obj.key.object_id), offset,
show(obj.name.linkage)
);
obj.incomplete = false;
obj.key.object_id = offset.raw();
// map declarationOffset to offset, because any ref_sig8s
// will point to the definition, not the declaration.
env.local_mappings.emplace(*declarationOffset, offset);
// Fixup the name in the scope stack
scope.fixName(obj.name);
assertx(scope.name().name == obj.name.name);
break;
}
}
} else {
// Record this object type, with fully qualified name, key, and linkage.
auto obj = ObjectType{
scope.name(),
ObjectTypeKey{offset.raw(), scope.cuOffset().raw()},
std::get<2>(info)
};
FTRACE(5,
"{} {} at {} Linkage: {}\n",
obj.incomplete ? "Declaring" : "Defining",
obj.name.name,
offset,
show(obj.name.linkage)
);
state.all_objs.emplace_back(std::move(obj));
}
// This object type is done, so recurse into any nested classes. Provide a
// list of template parameters to be filled in case this is a template. If
// it is, we'll record the linkage dependence for the later template
// linkage fix-up.
std::vector<GlobalOff> recurse_template_params;
recurse(&recurse_template_params);
for (auto param_offset : recurse_template_params) {
FTRACE(9, "linkage: {} depends on template param {}\n",
offset, param_offset);
env.linkage_dependents[param_offset].template_uses.emplace(offset);
}
if (parent_offset) {
FTRACE(9, "linkage: {} depends on child {}\n",
*parent_offset, offset);
env.linkage_dependents[*parent_offset].children.emplace(offset);
}
break;
}
case DW_TAG_namespace: {
// Record the namespace in the scope and recurse. If this is an unnamed
// namespace, that means any type found in child DIEs will have internal
// linkage.
auto name = dwarf.getDIEName(die);
name.empty() ?
scope.pushUnnamedNamespace() :
scope.pushNamespace(std::move(name));
SCOPE_EXIT { scope.pop(); };
recurse();
break;
}
case DW_TAG_variable: {
// Normally we don't care about variables since we're only looking for
// types. However, certain aspects of object types can't be completely
// inferred at the declaration site (mainly static variable linkage
// related things like linkage name and address). We need a definition for
// that, so record all the variable definitions along with their
// specification, which we can consult later.
// Neither GCC nor Clang record a name for a variable which is a static
// definition, so ignore any that do have a name. This speeds things up.
if (!dwarf.getDIEName(die).empty()) break;
StaticSpec spec;
if (auto off = parseSpecification(dwarf, die, true, spec)) {
env.raw_static_definitions.emplace_back(*off, spec);
}
// Note that we don't recurse into any child DIEs here. There shouldn't be
// anything interesting in them.
break;
}
case DW_TAG_subprogram: {
// For the same reason we care about DW_TAG_variables, we examine
// DW_TAG_subprogram as well. Certain interesting aspects of a static
// function are only present in its definition.
if (!dwarf.getDIEName(die).empty()) break;
StaticSpec spec;
if (auto off = parseSpecification(dwarf, die, true, spec)) {
env.raw_static_definitions.emplace_back(*off, spec);
}
// Don't recurse. There might be valid types within a subprogram
// definition, but we deliberately ignore those. A large portion of the
// debug information lies within subprogram definitions, and scanning all
// of that consumes a large amount of time. Moreover, these types usually
// aren't very interesting, so we deliberately ignore them for
// efficiency. If there's actually any reference to these types, they'll
// be reported as the pseudo-type.
break;
}
case DW_TAG_template_type_param: {
// Template type parameters are represented using child DIEs, not
// attributes. If the parent DIE was an object type, fill the supplied
// vector with the template parameters. Don't recurse because there
// shouldn't be anything interesting in the children.
if (template_params) {
dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (dwarf.getAttributeType(attr)) {
case DW_AT_type: {
auto offset = dwarf.getAttributeValueRef(attr);
// Check this type to see if it is a declaration and use the
// real type instead
dwarf.onDIEAtOffset(
offset,
[&] (Dwarf_Die type_die) {
dwarf.forEachAttribute(
type_die,
[&](Dwarf_Attribute attr) {
if (dwarf.getAttributeType(attr) == DW_AT_signature &&
dwarf.getAttributeForm(attr) == DW_FORM_ref_sig8) {
offset = dwarf.getAttributeValueRef(attr);
return false;
}
return true;
}
);
});
template_params->emplace_back(offset);
return false;
}
default:
return true;
}
}
);
}
break;
}
default:
recurse();
break;
}
}
/*
* Given the DIE representing an object type, its name, and its key, return the
* detailed specification of the object.
*/
Object TypeParserImpl::genObject(Dwarf_Die die,
ObjectTypeName name,
ObjectTypeKey key) {
const auto kind = [&]{
switch (m_dwarf.getTag(die)) {
case DW_TAG_structure_type: return Object::Kind::k_class;
case DW_TAG_class_type: return Object::Kind::k_class;
case DW_TAG_union_type: return Object::Kind::k_union;
case DW_TAG_base_type: return Object::Kind::k_primitive;
case DW_TAG_enumeration_type: return Object::Kind::k_enum;
// Strange things like "decltype(nullptr_t)"
case DW_TAG_unspecified_type: return Object::Kind::k_other;
// Shouldn't happen because we only call genObject() on offsets already
// visited and verified to be an object type.
default: always_assert(0);
}
}();
HPHP::Optional<std::size_t> size;
bool incomplete = false;
HPHP::Optional<GlobalOff> definition_offset;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_byte_size:
size = m_dwarf.getAttributeValueUData(attr);
break;
case DW_AT_declaration:
incomplete = m_dwarf.getAttributeValueFlag(attr);
break;
case DW_AT_signature:
definition_offset = m_dwarf.getAttributeValueRef(attr);
break;
default:
break;
}
return true;
}
);
if (definition_offset) {
return m_dwarf.onDIEAtOffset(
*definition_offset,
[&](Dwarf_Die die2) { return genObject(die2, name, key); }
);
}
// No size was provided. This is expected for incomplete types or the strange
// "other" types sometimes seen, but an error otherwise.
if (!size) {
if (incomplete || kind == Object::Kind::k_other) {
size = 0;
} else {
throw Exception{
folly::sformat(
"Object type '{}' at offset {} is a complete definition, "
"but has no size!",
name.name,
key.object_id
)
};
}
}
Object obj{std::move(name), *size, key, kind, incomplete};
m_dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
switch (m_dwarf.getTag(child)) {
case DW_TAG_inheritance:
obj.bases.emplace_back(genBase(child, obj.name));
break;
case DW_TAG_member:
obj.members.emplace_back(genMember(child, obj.name));
if (obj.name.linkage != ObjectTypeName::Linkage::external) {
// Clang gives linkage names to things that don't actually have
// linkage. Don't let any members have linkage names if the object
// type doesn't have external linkage.
obj.members.back().linkage_name.clear();
}
break;
case DW_TAG_template_type_parameter:
obj.template_params.emplace_back(genTemplateParam(child));
break;
case DW_TAG_GNU_template_parameter_pack:
// Flatten parameter packs as if they were just a normally provided
// parameter list. This is enough for our purposes.
m_dwarf.forEachChild(
child,
[&](Dwarf_Die template_die) {
if (m_dwarf.getTag(template_die) ==
DW_TAG_template_type_parameter) {
obj.template_params.emplace_back(
genTemplateParam(template_die)
);
}
return true;
}
);
break;
case DW_TAG_subprogram:
obj.functions.emplace_back(genFunction(child));
if (obj.name.linkage != ObjectTypeName::Linkage::external) {
// Clang gives linkage names to things that don't actually have
// linkage. Don't let any functions have linkage names if the object
// type doesn't have external linkage.
obj.functions.back().linkage_name.clear();
}
break;
default:
break;
}
return true;
}
);
// The base classes and members aren't always reported in DWARF in offset
// order, but make the output deterministic here to simplify consumers of the
// information.
std::sort(
obj.bases.begin(),
obj.bases.end(),
[&](const Object::Base& b1, const Object::Base& b2) {
return std::tie(b1.offset, b1.type.name.name) <
std::tie(b2.offset, b2.type.name.name);
}
);
std::sort(
obj.members.begin(),
obj.members.end(),
[&](const Object::Member& m1, const Object::Member& m2) {
return std::tie(m1.offset, m1.name) <
std::tie(m2.offset, m2.name);
}
);
return obj;
}
/*
* Given a DIE representing an arbitrary type, return its equivalent Type. This
* can involve chasing a chain of such type DIEs.
*/
Type TypeParserImpl::genType(Dwarf_Die die) {
// Offset of a different type this type refers to. If not present, that type
// is implicitly "void".
HPHP::Optional<GlobalOff> type_offset;
// For pointers to members, the type referring to the object the member
// belongs to.
HPHP::Optional<GlobalOff> containing_type_offset;
// A struct can have a declaration which refers to the definition
// via a DW_AT_signature.
HPHP::Optional<GlobalOff> definition_offset;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_type:
type_offset = m_dwarf.getAttributeValueRef(attr);
break;
case DW_AT_containing_type:
containing_type_offset = m_dwarf.getAttributeValueRef(attr);
break;
case DW_AT_signature:
definition_offset = m_dwarf.getAttributeValueRef(attr);
return false;
default:
break;
}
return true;
}
);
const auto recurse = [&](GlobalOff offset) {
return m_dwarf.onDIEAtOffset(
offset,
[&](Dwarf_Die die2) { return genType(die2); }
);
};
// Pointers to member functions aren't represented in DWARF. Instead the
// compiler creates a struct internally which stores all the information.
switch (m_dwarf.getTag(die)) {
case DW_TAG_base_type:
case DW_TAG_structure_type:
case DW_TAG_class_type:
case DW_TAG_union_type:
case DW_TAG_enumeration_type:
case DW_TAG_unspecified_type: {
if (definition_offset) return recurse(*definition_offset);
auto offset = m_dwarf.getDIEOffset(die);
auto const& state = stateForOffset(offset);
auto iter = state.obj_offsets.find(offset);
if (iter == state.obj_offsets.end()) {
// Must be the pseudo-type.
return ObjectType{
ObjectTypeName{
Scope::s_pseudo_type_name,
ObjectTypeName::Linkage::pseudo
},
ObjectTypeKey{offset.raw(), 0},
true
};
} else {
return state.all_objs[iter->second];
}
}
case DW_TAG_pointer_type:
return PtrType{type_offset ? recurse(*type_offset) : VoidType{}};
case DW_TAG_reference_type: {
if (!type_offset) {
throw Exception{
folly::sformat(
"Encountered reference to void at offset {}",
m_dwarf.getDIEOffset(die)
)
};
}
return RefType{recurse(*type_offset)};
}
case DW_TAG_rvalue_reference_type: {
if (!type_offset) {
throw Exception{
folly::sformat(
"Encountered rvalue reference to void at offset {}",
m_dwarf.getDIEOffset(die)
)
};
}
return RValueRefType{recurse(*type_offset)};
}
case DW_TAG_array_type: {
if (!type_offset) {
throw Exception{
folly::sformat(
"Encountered array of voids at offset {}",
m_dwarf.getDIEOffset(die)
)
};
}
return ArrType{recurse(*type_offset), determineArrayBound(die)};
}
case DW_TAG_const_type:
return ConstType{type_offset ? recurse(*type_offset) : VoidType{}};
case DW_TAG_volatile_type:
return VolatileType{type_offset ? recurse(*type_offset) : VoidType{}};
case DW_TAG_restrict_type:
return RestrictType{type_offset ? recurse(*type_offset) : VoidType{}};
case DW_TAG_typedef:
return type_offset ? recurse(*type_offset) : VoidType{};
case DW_TAG_subroutine_type: {
FuncType func{type_offset ? recurse(*type_offset) : VoidType{}};
fillFuncArgs(die, func);
return std::move(func);
}
case DW_TAG_ptr_to_member_type: {
if (!containing_type_offset) {
throw Exception{
folly::sformat(
"Encountered ptr-to-member at offset {} without a "
"containing object",
m_dwarf.getDIEOffset(die)
)
};
}
auto containing = recurse(*containing_type_offset);
if (auto obj = containing.asObject()) {
return PtrType{
MemberType{std::move(*obj), recurse(*type_offset)}
};
} else {
throw Exception{
folly::sformat(
"Encountered ptr-to-member at offset {} with a "
"containing object of type '{}'",
m_dwarf.getDIEOffset(die),
containing.toString()
)
};
}
}
default:
throw Exception{
folly::sformat(
"Encountered non-type tag '{}' at offset {} while "
"traversing type description",
m_dwarf.tagToString(m_dwarf.getTag(die)),
m_dwarf.getDIEOffset(die)
)
};
}
}
Object::Member TypeParserImpl::genMember(Dwarf_Die die,
const ObjectTypeName& parent_name) {
std::string name;
std::string linkage_name;
std::size_t offset = 0;
HPHP::Optional<GlobalOff> die_offset;
HPHP::Optional<uintptr_t> address;
bool is_static = false;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_name:
name = m_dwarf.getAttributeValueString(attr);
break;
case DW_AT_linkage_name:
linkage_name = m_dwarf.getAttributeValueString(attr);
break;
case DW_AT_location:
address = interpretLocAddress(m_dwarf, attr);
break;
case DW_AT_data_member_location:
offset = m_dwarf.getAttributeValueUData(attr);
break;
case DW_AT_type:
die_offset = m_dwarf.getAttributeValueRef(attr);
break;
case DW_AT_declaration:
is_static = m_dwarf.getAttributeValueFlag(attr);
break;
default:
break;
}
return true;
}
);
if (!die_offset) {
// No DW_AT_type means "void", but you can't have void members!
throw Exception{
folly::sformat(
"Encountered member (name: '{}') of type void "
"in object type '{}' at offset {}",
name,
parent_name.name,
m_dwarf.getDIEOffset(die)
)
};
}
if (is_static) {
// If this is a static member, look up any definitions which refer to this
// member, and pull any additional information out of it.
auto const static_offset = m_dwarf.getDIEOffset(die);
auto const& state = stateForOffset(static_offset);
auto const range = state.static_definitions.equal_range(static_offset);
for (auto const& elm : range) {
if (linkage_name.empty() && !elm.second.linkage_name.empty()) {
linkage_name = elm.second.linkage_name;
}
if (!address && elm.second.address != StaticSpec::kNoAddress) {
address = elm.second.address;
}
}
}
auto type = m_dwarf.onDIEAtOffset(
*die_offset,
[&](Dwarf_Die die2){ return genType(die2); }
);
if (name.empty()) {
name = is_static
? folly::sformat("(unnamed static member of type '{}')", type.toString())
: folly::sformat("(unnamed member of type '{}')", type.toString());
}
return Object::Member{
name,
is_static ? std::nullopt : HPHP::Optional<std::size_t>{offset},
linkage_name,
address,
std::move(type)
};
}
Object::Function TypeParserImpl::genFunction(Dwarf_Die die) {
std::string name;
Type ret_type{VoidType{}};
std::string linkage_name;
bool is_virtual = false;
bool is_member = false;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_name:
name = m_dwarf.getAttributeValueString(attr);
break;
case DW_AT_type:
ret_type = m_dwarf.onDIEAtOffset(
m_dwarf.getAttributeValueRef(attr),
[&](Dwarf_Die ty_die) { return genType(ty_die); }
);
break;
case DW_AT_linkage_name:
linkage_name = m_dwarf.getAttributeValueString(attr);
break;
case DW_AT_virtuality:
is_virtual =
(m_dwarf.getAttributeValueUData(attr) != DW_VIRTUALITY_none);
break;
case DW_AT_object_pointer:
is_member = true;
break;
default:
break;
}
return true;
}
);
/*
* We need to determine if this function is a static function or a member
* function. The straight-forward way is to look for the DW_AT_object_pointer
* attribute (which is only present for member functions). This works fine for
* GCC, but not Clang.
*
* On Clang, the DW_AT_object_pointer is only present in a function's
* definition, not its declaration. Moreover, it doesn't reliably emit
* function declarations if it thinks the function isn't used. As a result, we
* can't reliably distinguish member functions from static functions on clang.
*
* As an alternative, if the first formal parameter of a function is marked as
* being "artificial" (which means its not present in the actual source),
* assume its actually the this pointer, and that the function is a member
* function.
*/
std::vector<Type> arg_types;
m_dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
if (m_dwarf.getTag(child) != DW_TAG_formal_parameter) {
return true;
}
bool is_artificial = false;
Type arg_type{VoidType()};
m_dwarf.forEachAttribute(
child,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_type:
arg_type = m_dwarf.onDIEAtOffset(
m_dwarf.getAttributeValueRef(attr),
[&](Dwarf_Die ty_die) { return genType(ty_die); }
);
break;
case DW_AT_artificial:
is_artificial = m_dwarf.getAttributeValueFlag(attr);
break;
default:
break;
}
return true;
}
);
// Only consider this a member function if this arg if the first and its
// artificial.
if (is_artificial && arg_types.empty()) {
is_member = true;
}
arg_types.emplace_back(std::move(arg_type));
return true;
}
);
HPHP::Optional<std::uintptr_t> address;
// Similar to static variables, find any definitions which refer to this
// function in order to extract linkage information.
auto const offset = m_dwarf.getDIEOffset(die);
auto const& state = stateForOffset(offset);
auto range = state.static_definitions.equal_range(offset);
for (auto const& elm : range) {
if (linkage_name.empty() && !elm.second.linkage_name.empty()) {
linkage_name = elm.second.linkage_name;
}
if (!address && elm.second.address != StaticSpec::kNoAddress) {
address = elm.second.address;
}
if (elm.second.is_member) is_member = true;
}
return Object::Function{
name,
std::move(ret_type),
std::move(arg_types),
is_virtual ?
Object::Function::Kind::k_virtual :
(is_member ? Object::Function::Kind::k_member :
Object::Function::Kind::k_static),
linkage_name,
address,
};
}
Object::Base TypeParserImpl::genBase(Dwarf_Die die,
const ObjectTypeName& parent_name) {
std::string name;
HPHP::Optional<std::size_t> offset;
HPHP::Optional<GlobalOff> die_offset;
bool is_virtual = false;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_name:
name = m_dwarf.getAttributeValueString(attr);
break;
case DW_AT_type:
die_offset = m_dwarf.getAttributeValueRef(attr);
break;
case DW_AT_virtuality:
is_virtual =
(m_dwarf.getAttributeValueUData(attr) != DW_VIRTUALITY_none);
break;
default:
break;
}
return true;
}
);
if (!is_virtual) {
offset = 0;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_data_member_location:
offset = m_dwarf.getAttributeValueUData(attr);
break;
default:
break;
}
return true;
}
);
}
if (!die_offset) {
throw Exception{
folly::sformat(
"Encountered base '{}' of object type '{}' without "
"type information at offset {}",
name,
parent_name.name,
m_dwarf.getDIEOffset(die)
)
};
}
auto type =
m_dwarf.onDIEAtOffset(
*die_offset,
[&](Dwarf_Die die2) { return genType(die2); }
);
if (auto obj = type.asObject()) {
// Base class better be an actual class!
return Object::Base{*obj, offset};
} else {
throw Exception{
folly::sformat(
"Encountered base '{}' of object type '{}' of "
"non-object type '{}' at offset {}",
name,
parent_name.name,
type.toString(),
m_dwarf.getDIEOffset(die)
)
};
}
}
Object::TemplateParam TypeParserImpl::genTemplateParam(Dwarf_Die die) {
HPHP::Optional<GlobalOff> die_offset;
m_dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_type:
die_offset = m_dwarf.getAttributeValueRef(attr);
break;
default:
break;
}
return true;
}
);
return Object::TemplateParam{
die_offset ?
m_dwarf.onDIEAtOffset(
*die_offset,
[&](Dwarf_Die die2){ return genType(die2); }
) :
VoidType{}
};
}
HPHP::Optional<std::size_t>
TypeParserImpl::determineArrayBound(Dwarf_Die die) {
HPHP::Optional<std::size_t> bound;
m_dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
switch (m_dwarf.getTag(child)) {
case DW_TAG_subrange_type:
m_dwarf.forEachAttribute(
child,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_count:
bound = m_dwarf.getAttributeValueUData(attr);
break;
case DW_AT_upper_bound:
bound = m_dwarf.getAttributeValueUData(attr)+1;
break;
default:
break;
}
return true;
}
);
break;
default:
break;
}
return true;
}
);
if (bound && !*bound) bound.reset();
return bound;
}
void TypeParserImpl::fillFuncArgs(Dwarf_Die die, FuncType& func) {
m_dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
switch (m_dwarf.getTag(child)) {
case DW_TAG_formal_parameter: {
HPHP::Optional<GlobalOff> type_offset;
m_dwarf.forEachAttribute(
child,
[&](Dwarf_Attribute attr) {
switch (m_dwarf.getAttributeType(attr)) {
case DW_AT_type:
type_offset = m_dwarf.getAttributeValueRef(attr);
break;
default:
break;
}
return true;
}
);
if (!type_offset) {
throw Exception{
folly::sformat(
"Encountered function at offset {} taking a void parameter",
m_dwarf.getDIEOffset(die)
)
};
}
func.args.push_back(
m_dwarf.onDIEAtOffset(
*type_offset,
[&](Dwarf_Die die) { return genType(die); }
)
);
break;
}
default:
break;
}
return true;
}
);
}
/*
* Print out the given DIE (including children) in textual format to the given
* ostream. Only actually print out DIEs which begin in the range between the
* begin and end parameters.
*/
void printDIE(std::ostream& os,
const DwarfState& dwarf,
Dwarf_Die die,
std::pair<uint64_t,GlobalOff>* sig,
std::size_t begin,
std::size_t end,
int indent = 0) {
auto tag = dwarf.getTag(die);
auto tag_name = dwarf.tagToString(tag);
auto name = dwarf.getDIEName(die);
auto offset = dwarf.getDIEOffset(die).offset();
const auto recurse = [&]{
// Find the last child DIE which does not start with the begin/end
// range. This DIE is the first one which contains some data within the
// begin/end range, so that must be the first one to begin recursion at.
HPHP::Optional<uint64_t> first;
if (begin > 0) {
dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
const auto offset = dwarf.getDIEOffset(child).offset();
if (offset <= begin) {
first = offset;
return true;
} else {
return false;
}
}
);
}
// Only actually recurse if this child DIE is the above computed first DIE,
// or one following it, and begins before the end parameter.
dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
const auto offset = dwarf.getDIEOffset(child).offset();
if ((!first || offset >= *first) && offset < end) {
printDIE(os, dwarf, child, nullptr, begin, end, indent+1);
}
return offset < end;
}
);
};
if (offset < begin) {
recurse();
return;
} else if (offset >= end) {
return;
}
auto const printSig = [&] (uint64_t sig) {
return folly::sformat("ref_sig8:{:016x}", sig);
};
for (int i = 0; i < indent; ++i) {
os << " ";
}
os << "#" << offset << ": " << tag_name << " (" << tag << ") \""
<< name << "\"";
if (sig && sig->first) {
os << folly::sformat(" {{{} -> #{}}}", printSig(sig->first), sig->second);
}
os << "\n";
dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
auto const type = dwarf.getAttributeType(attr);
auto const attr_name = dwarf.attributeTypeToString(type);
auto const form = dwarf.getAttributeForm(attr);
auto const attr_form = dwarf.attributeFormToString(form);
auto attr_value = [&]() -> std::string {
if (type == DW_AT_ranges) {
auto const ranges = dwarf.getRanges(attr);
std::string res;
for (auto range : ranges) {
if (range.dwr_addr1 == DwarfState::Dwarf_Ranges::kSelection) {
folly::format(&res, "0x{:x} ", range.dwr_addr2);
} else {
folly::format(&res, "0x{:x}-0x{:x} ",
range.dwr_addr1, range.dwr_addr2);
}
}
return res;
}
switch (dwarf.getAttributeForm(attr)) {
case DW_FORM_data1:
case DW_FORM_data2:
case DW_FORM_data4:
case DW_FORM_data8:
case DW_FORM_udata:
return folly::sformat("{}", dwarf.getAttributeValueUData(attr));
case DW_FORM_sdata:
return folly::sformat("{}", dwarf.getAttributeValueSData(attr));
case DW_FORM_string:
case DW_FORM_strp:
return folly::sformat(
"\"{}\"",
dwarf.getAttributeValueString(attr)
);
case DW_FORM_flag:
case DW_FORM_flag_present:
return dwarf.getAttributeValueFlag(attr) ? "true" : "false";
case DW_FORM_addr:
return folly::sformat(
"{:#010x}",
dwarf.getAttributeValueAddr(attr)
);
case DW_FORM_ref1:
case DW_FORM_ref2:
case DW_FORM_ref4:
case DW_FORM_ref8:
case DW_FORM_ref_udata:
case DW_FORM_ref_addr:
return folly::sformat("#{}", dwarf.getAttributeValueRef(attr));
case DW_FORM_ref_sig8: {
return printSig(dwarf.getAttributeValueSig8(attr));
}
case DW_FORM_exprloc: {
std::string output;
for (const auto& expr : dwarf.getAttributeValueExprLoc(attr)) {
if (expr.lr_atom == DW_OP_addr) {
output += folly::sformat(
"<OP_addr: {:#x}>,",
expr.lr_number
);
} else {
output += folly::sformat(
"<{}:{}:{}:{}>,",
dwarf.opToString(expr.lr_atom),
expr.lr_number,
expr.lr_number2,
expr.lr_offset
);
}
}
return folly::sformat("Location: [{}]", output);
}
case DW_FORM_block1:
case DW_FORM_block2:
case DW_FORM_block4:
case DW_FORM_block: return "{BLOCK}";
case DW_FORM_indirect: return "{INDIRECT}";
case DW_FORM_sec_offset: return "{SECTION OFFSET}";
default: return "{UNKNOWN}";
}
}();
for (int i = 0; i < indent; ++i) {
os << " ";
}
os << folly::sformat(" **** {} ({}) ==> {} [{}:{}]\n",
attr_name, type, attr_value,
attr_form, form);
return true;
}
);
recurse();
}
struct PrinterImpl : Printer {
explicit PrinterImpl(const std::string& filename): m_filename{filename} {}
void operator()(std::ostream& os,
std::size_t begin,
std::size_t end) const override {
DwarfState dwarf{m_filename};
print_section(os, dwarf, false, begin, end);
print_section(os, dwarf, true, begin, end);
os << std::flush;
}
private:
void print_section(std::ostream& os,
const DwarfState& dwarf,
bool isInfo,
std::size_t begin,
std::size_t end) const {
// If a non-default begin parameter was specified, first iterate over all
// the compilation units. Find the first compilation unit which at least
// partially lies within the range given by the begin parameter. This is the
// first compilation unit to begin printing from.
HPHP::Optional<uint64_t> last;
if (begin > 0) {
dwarf.forEachTopLevelUnit(
[&](Dwarf_Die cu) {
const auto offset = dwarf.getDIEOffset(cu).offset();
if (offset <= begin) last = offset;
},
isInfo
);
}
// Now iterate over all the compilation units again. Only actually print out
// compilation units if they lie within the begin/end parameter range.
dwarf.forEachTopLevelUnit(
[&] (Dwarf_Die cu) {
auto context = cu->context;
auto type_offset = GlobalOff { context->typeOffset, context->isInfo };
auto pair = std::make_pair(context->typeSignature, type_offset);
const auto offset = dwarf.getDIEOffset(cu).offset();
if (offset >= end) return false;
if ((!last || offset >= *last)) {
printDIE(
os,
dwarf,
cu,
&pair,
// If this compilation unit entirely lies within the begin/end
// range, specify a begin parameter of "0", which will stop
// printDIE() from doing range checks (which is more efficient).
(!last || (offset > *last)) ? 0 : begin,
end
);
}
return true;
},
isInfo
);
}
std::string m_filename;
};
struct GDBIndexerImpl : GDBIndexer {
explicit GDBIndexerImpl(const std::string& filename, int num_threads)
: m_filename{filename}
, m_numThreads{num_threads}
{
if (num_threads < 1) {
throw Exception{folly::sformat("Invalid number of threads: {}",
num_threads)};
}
}
void operator()(const std::string& output_file) const override {
auto begin_time = ::HPHP::Timer::GetCurrentTimeMicros();
DwarfState dwarf{m_filename};
log_time(begin_time, "Parsing dwarf file");
std::FILE* fd = std::fopen(output_file.c_str(), "wb");
if (!fd) {
throw Exception{folly::sformat("Cannot open file: {}", output_file)};
}
auto const gdb_index_version = 8;
std::vector<uint32_t> header{gdb_index_version, 0, 0, 0, 0, 0};
auto time_index_begin = ::HPHP::Timer::GetCurrentTimeMicros();
auto addresses_and_symbols = collect_addresses_and_symbols(dwarf);
auto time = log_time(time_index_begin, "collect_addresses_and_symbols");
auto const cu = get_cu(dwarf);
time = log_time(time, "Get_cu");
auto const tu = get_tu(dwarf);
time = log_time(time, "Get_tu");
auto const address = get_address(addresses_and_symbols.first);
time = log_time(time, "Get_address");
auto const symbol_and_constants =
get_symbol_and_constants(addresses_and_symbols.second);
log_time(time, "Get_symbol_and_constants");
time = log_time(time_index_begin, "Index generation");
// The offset, from the start of the file, of the CU list.
header[1] = sizeof header[0] * header.size();
// The offset, from the start of the file, of the types CU list.
header[2] = header[1] + sizeof cu[0] * cu.size();
// The offset, from the start of the file, of the address area.
header[3] = header[2] + sizeof tu[0] * tu.size();
// The offset, from the start of the file, of the symbol table.
header[4] = header[3] + sizeof address[0] * address.size();
// The offset, from the start of the file, of the constant pool.
header[5] = header[4] +
sizeof symbol_and_constants.symbol_pool.m_hashtable[0] *
symbol_and_constants.symbol_pool.m_hashtable.size();
print_section(fd, header);
print_section(fd, cu);
print_section(fd, tu);
print_section(fd, address);
print_section(fd, symbol_and_constants.symbol_pool.m_hashtable);
print_section(fd, symbol_and_constants.cu_vector_offsets);
print_section(fd, symbol_and_constants.strings);
log_time(time, "Print");
log_time(begin_time, "Full index creation");
std::fclose(fd);
}
private:
int32_t log_time(int32_t time, const char* msg) const {
int32_t now = ::HPHP::Timer::GetCurrentTimeMicros();
std::cout << msg << " took " << (now - time) / 1000 << " ms" << std::endl;
return now;
}
void print_section(std::FILE* fd,
const std::vector<std::string>& data) const {
if (!data.size()) return;
assertx(fd);
for (auto s : data) {
std::fwrite(s.c_str(), sizeof(char), s.length() + 1, fd);
}
}
template <typename T>
void print_section(std::FILE* fd, const std::vector<T>& data) const {
if (!data.size()) return;
assertx(fd);
std::fwrite(data.data(), sizeof data[0], data.size(), fd);
}
std::vector<uint64_t> get_cu(const DwarfState& dwarf) const {
std::vector<uint64_t> result = {};
dwarf.forEachCompilationUnit(
[&](Dwarf_Die cu) {
result.push_back(cu->context->offset);
result.push_back(cu->context->size);
}
);
return result;
}
std::vector<uint64_t> get_tu(const DwarfState& dwarf) const {
std::vector<uint64_t> result = {};
dwarf.forEachTopLevelUnit(
[&](Dwarf_Die cu) {
result.push_back(cu->context->offset);
result.push_back(cu->context->typeOffset - cu->context->offset);
result.push_back(cu->context->typeSignature);
}, false
);
return result;
}
struct AddressTableEntry {
union {
uint64_t low;
struct {
uint32_t low_bottom;
uint32_t low_top;
};
};
union {
uint64_t high;
struct {
uint32_t high_bottom;
uint32_t high_top;
};
};
uint32_t index;
};
static bool compareAddressTableEntry(AddressTableEntry a,
AddressTableEntry b) {
return a.low == b.low ? a.high < b.high : a.low < b.low;
}
void visit_die_for_address(const DwarfState& dwarf, const Dwarf_Die die,
std::vector<AddressTableEntry>& entries,
uint32_t cu_index) const {
HPHP::Optional<uint64_t> low, high;
std::vector<DwarfState::Dwarf_Ranges> ranges;
bool is_high_udata = false;
dwarf.forEachAttribute(
die,
[&](Dwarf_Attribute attr) {
switch (dwarf.getAttributeType(attr)) {
case DW_AT_ranges:
ranges = dwarf.getRanges(attr);
break;
case DW_AT_low_pc:
// Some times GCC/Clang emits very low numbers for addresses in
// the form of UData. Let's drop them.
if (attr->form == DW_FORM_addr) {
low = dwarf.getAttributeValueAddr(attr);
}
break;
case DW_AT_high_pc:
if (attr->form != DW_FORM_addr) {
is_high_udata = true;
high = dwarf.getAttributeValueUData(attr);
} else {
high = dwarf.getAttributeValueAddr(attr);
}
break;
default:
break;
}
return true;
}
);
if (!ranges.empty()) {
uint64_t base = low ? *low : 0;
bool added = false;
for (auto range : ranges) {
if (range.dwr_addr1 == DwarfState::Dwarf_Ranges::kSelection) {
base = range.dwr_addr2;
continue;
}
if (base + range.dwr_addr1 == 0) continue;
// Drop all the addresses under 2M
if (base + range.dwr_addr2 < 2000000) continue;
added = true;
entries.push_back(
AddressTableEntry {
base + range.dwr_addr1,
base + range.dwr_addr2,
cu_index
}
);
}
if (added) return;
}
if (low && high) {
high = is_high_udata ? *low + *high : *high;
// Drop all the addresses under 2M
if (*low != 0 && *high >= 2000000) {
entries.push_back(AddressTableEntry{*low, *high, cu_index});
return;
}
}
dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
visit_die_for_address(dwarf, child, entries, cu_index);
return true;
}
);
}
std::vector<uint32_t>
get_address(std::vector<AddressTableEntry>& entries) const {
sort(entries.begin(), entries.end(), compareAddressTableEntry);
// Split into little-endian formatting
std::vector<uint32_t> result = {};
for (auto& e : entries) {
result.push_back(e.low_bottom);
result.push_back(e.low_top);
result.push_back(e.high_bottom);
result.push_back(e.high_top);
result.push_back(e.index);
}
return result;
}
struct GDBSymbol {
uint32_t name_offset{};
uint32_t cu_vector_offset{};
bool valid() { return name_offset; }
};
struct GDBHashtable {
GDBHashtable() : m_size(0), m_capacity(0), m_hashtable({}) {}
size_t m_size;
size_t m_capacity;
std::vector<GDBSymbol> m_hashtable;
void init(size_t size) {
assertx(m_size == 0 && m_capacity == 0);
auto const nextPowerOfTwo = [](size_t n) -> size_t {
if (n == 0) return 1;
n--;
n |= n >> 1;
n |= n >> 2;
n |= n >> 4;
n |= n >> 8;
n |= n >> 16;
n++;
return n;
};
auto initial_size = nextPowerOfTwo(size * 4 / 3);
m_hashtable = std::vector<GDBSymbol>(initial_size, GDBSymbol{});
m_capacity = initial_size;
}
GDBSymbol* findSlot(uint32_t hash) {
uint32_t index = hash;
uint32_t step = ((hash * 17) & (m_capacity - 1)) | 1;
while (true) {
index &= m_capacity - 1;
if (!m_hashtable[index].valid()) {
return &m_hashtable[index];
}
index += step;
}
}
bool add(uint32_t hash, GDBSymbol s) {
auto const loc = this->findSlot(hash);
assert(!loc->valid());
*loc = s;
m_size++;
return true;
}
};
using SymbolMap = tbb::concurrent_hash_map<std::string,
std::vector<uint32_t>,
::HPHP::stringHashCompare>;
using SpecMap = folly::F14FastMap<GlobalOff, std::string>;
void visit_die_for_symbols(const DwarfState& dwarf,
const Dwarf_Die die,
SymbolMap& symbols,
SpecMap& spec_names,
std::string parent_name,
uint32_t language,
uint32_t cu_index) const {
bool is_declaration = false;
bool is_external = false;
std::string name;
bool full_name = false;
bool is_inlined = false;
bool has_location = false;
bool in_specification = false;
auto specification = GlobalOff::fromRaw(0);
auto collect_attributes = [&] (Dwarf_Attribute attr) {
switch (dwarf.getAttributeType(attr)) {
case DW_AT_declaration:
if (!in_specification) {
is_declaration = dwarf.getAttributeValueFlag(attr);
}
break;
case DW_AT_external:
is_external = dwarf.getAttributeValueFlag(attr);
break;
case DW_AT_linkage_name:
is_external = true;
break;
case DW_AT_location:
has_location = true;
break;
case DW_AT_name:
if (!full_name) {
name = dwarf.getAttributeValueString(attr);
}
break;
case DW_AT_inline: {
auto const val = dwarf.getAttributeValueUData(attr);
is_inlined =
(val == DW_INL_inlined) ||
(val == DW_INL_declared_inlined);
break;
}
case DW_AT_language:
language = dwarf.getAttributeValueUData(attr);
break;
case DW_AT_specification: {
specification = dwarf.getAttributeValueRef(attr);
auto const it = spec_names.find(specification);
if (it != spec_names.end()) {
name = it->second;
auto const pos = name.rfind("::");
if (pos != std::string::npos) {
parent_name = name.substr(0, pos);
}
full_name = true;
}
break;
}
default:
return true;
}
return true;
};
dwarf.forEachAttribute(die, collect_attributes);
if (specification.raw()) {
dwarf.onDIEAtOffset(
specification,
[&] (Dwarf_Die d) {
in_specification = true;
dwarf.forEachAttribute(d, collect_attributes);
}
);
}
struct IndexAndFlags {
IndexAndFlags(uint32_t index, uint32_t kind, uint32_t is_static) {
assertx((index >> 24) == 0);
// Bits 0-23 is CU index
// Bits 24-27 are reserved and must be 0
// Bits 28-30 The kind of the symbol in the CU.
// Bit 31 is zero if the value is global and one if it is static.
m_data = (is_static << 31) | (kind << 28) | index;
}
explicit IndexAndFlags(uint32_t data) : m_data(data) {}
uint32_t m_data;
uint32_t get_kind() const { return (m_data >> 28) & 7; }
uint32_t get_is_static() const { return m_data >> 31; }
};
constexpr int TYPE = 1;
constexpr int VARIABLE = 2;
//constexpr int ENUM = 2;
constexpr int FUNCTION = 3;
// constexpr int OTHER = 4;
auto const index_and_flags = [&] {
uint32_t kind = 0;
auto is_static = false;
switch (dwarf.getTag(die)) {
case DW_TAG_typedef:
case DW_TAG_base_type:
case DW_TAG_subrange_type:
kind = TYPE;
is_static = 1;
break;
case DW_TAG_enumerator:
kind = VARIABLE;
is_static = language != DW_LANG_C_plus_plus;
break;
case DW_TAG_subprogram:
kind = FUNCTION;
is_static = !(is_external || language == DW_LANG_Ada83 ||
language == DW_LANG_Ada95);
break;
case DW_TAG_constant:
kind = VARIABLE;
is_static = !is_external;
break;
case DW_TAG_variable:
kind = VARIABLE;
is_static = !is_external;
break;
case DW_TAG_namespace:
kind = TYPE;
is_static = 0;
break;
case DW_TAG_class_type:
case DW_TAG_interface_type:
case DW_TAG_structure_type:
case DW_TAG_union_type:
case DW_TAG_enumeration_type:
kind = TYPE;
is_static = language != DW_LANG_C_plus_plus;
break;
default:
throw Exception{"Invalid tag"};
}
return IndexAndFlags{cu_index, kind, is_static}.m_data;
};
auto const hasSameFlags = [&](std::vector<uint32_t> v, uint32_t input) {
auto const flags = IndexAndFlags{input};
for (auto const e : v) {
auto const f = IndexAndFlags{e};
if (f.get_kind() == flags.get_kind()) {
if ((f.get_kind() == TYPE &&
f.get_is_static() == flags.get_is_static()) ||
(!f.get_is_static() && !flags.get_is_static())) {
return true;
}
}
}
return false;
};
auto const addSymbol = [&](std::string name) {
auto value = index_and_flags();
SymbolMap::accessor acc;
if (symbols.insert(acc, name) || !hasSameFlags(acc->second, value)) {
acc->second.push_back(value);
}
};
auto const addParent = [&] {
if (full_name) return;
if (name.empty()) return;
if (!parent_name.empty()) {
name = folly::sformat("{}::{}", parent_name, name);
}
if (is_declaration) {
spec_names.emplace(dwarf.getDIEOffset(die), name);
}
};
auto const visitChildren = [&](std::string name) {
dwarf.forEachChild(
die,
[&](Dwarf_Die child) {
visit_die_for_symbols(dwarf, child, symbols, spec_names, name,
language, cu_index);
return true;
}
);
};
auto const tag = dwarf.getTag(die);
switch (tag) {
case DW_TAG_base_type:
// don't canonicalize!
addSymbol(name);
break;
case DW_TAG_member:
// static members appear first here as a declaration, then
// later as a DW_TAG_variable whose specification points
// here. We need to note the name just in case.
if (is_declaration) addParent();
break;
case DW_TAG_subprogram:
if (is_inlined) break;
case DW_TAG_constant:
case DW_TAG_enumerator:
if (name.empty()) break;
addParent();
if (is_declaration) break;
addSymbol(name);
break;
case DW_TAG_variable:
if (name.empty() || (!is_external && !has_location)) break;
addParent();
if (is_declaration) break;
addSymbol(name);
break;
case DW_TAG_namespace:
if (name.empty()) name = "(anonymous namespace)";
addParent();
visitChildren(name);
break;
case DW_TAG_typedef:
case DW_TAG_subrange_type:
addParent();
if (is_declaration || name.empty()) break;
addSymbol(name);
break;
case DW_TAG_union_type:
case DW_TAG_class_type:
case DW_TAG_interface_type:
case DW_TAG_structure_type:
case DW_TAG_enumeration_type:
addParent();
if (!is_declaration && !name.empty()) {
addSymbol(name);
}
if (tag == DW_TAG_enumeration_type || !name.empty()) {
visitChildren(tag == DW_TAG_enumeration_type ? parent_name : name);
}
break;
case DW_TAG_compile_unit:
case DW_TAG_type_unit:
visitChildren(parent_name);
break;
default:
break;
}
}
std::pair<std::vector<AddressTableEntry>, SymbolMap>
collect_addresses_and_symbols(const DwarfState& dwarf) const {
auto time = ::HPHP::Timer::GetCurrentTimeMicros();
folly::F14FastMap<uint32_t, uint32_t> unit_indices_cu;
folly::F14FastMap<uint32_t, uint32_t> unit_indices_tu;
uint32_t count = 0;
dwarf.forEachTopLevelUnit(
[&](Dwarf_Die die) {
unit_indices_cu.insert({die->context->offset, count});
count++;
}, true /* Compilation Unit */
);
size_t numCUs = count;
dwarf.forEachTopLevelUnit(
[&](Dwarf_Die die) {
unit_indices_tu[die->context->offset] = count;
count++;
}, false /* Type Unit */
);
std::vector<std::vector<AddressTableEntry>>
entryList(numCUs, std::vector<AddressTableEntry>{});
SymbolMap symbols;
dwarf.forEachTopLevelUnitParallel(
[&](Dwarf_Die die) {
uint32_t index = unit_indices_cu[die->context->offset];
assertx(index < entryList.size());
std::vector<AddressTableEntry> entry;
visit_die_for_address(dwarf, die, entry, index);
sort(entry.begin(), entry.end(), compareAddressTableEntry);
std::vector<AddressTableEntry> merged;
for (auto& e : entry) {
if (!merged.empty()) {
auto& prev = merged.back();
if (e.low <= prev.high) {
if (e.high <= prev.high) continue;
assertx(prev.index == e.index);
prev.high = e.high;
continue;
}
}
merged.push_back(e);
}
entryList[index] = std::move(merged);
SpecMap spec_names;
visit_die_for_symbols(dwarf, die, symbols, spec_names, "",
0, index);
}, true /* Compilation Unit */, m_numThreads
);
std::vector<AddressTableEntry> entries;
for (auto& list : entryList) {
for (auto &e : list) {
entries.push_back(e);
}
}
time = log_time(time, "collect_addresses_and_symbols: Visit CUs");
dwarf.forEachTopLevelUnitParallel(
[&](Dwarf_Die die) {
uint32_t index = unit_indices_tu[die->context->offset];
SpecMap spec_names;
visit_die_for_symbols(dwarf, die, symbols, spec_names, "",
0, index);
}, false /* Type Unit */, m_numThreads
);
log_time(time, "collect_addresses_and_symbols: Visit TUs");
return {std::move(entries), std::move(symbols)};
}
struct SymbolAndConstantPool {
GDBHashtable symbol_pool;
std::vector<uint32_t> cu_vector_offsets;
std::vector<std::string> strings;
};
SymbolAndConstantPool
get_symbol_and_constants(const SymbolMap& symbols) const {
auto time = ::HPHP::Timer::GetCurrentTimeMicros();
GDBHashtable symbol_hash_table;
symbol_hash_table.init(symbols.size());
auto const getHashVal = [](std::string name) {
uint32_t r = 0;
for (char& c : name) {
c = tolower(c);
r = r * 67 + c - 113;
}
return r;
};
// The first value is the number of CU indices in the vector
std::vector<uint32_t> cu_vector_values;
std::vector<std::string> strings;
// set name_off to 1 so can use non-zero as the valid test for a
// hash table entry.
uint32_t name_off = 1;
for (auto& entry : symbols) {
uint32_t cu_vector_offset = cu_vector_values.size() * 4;
cu_vector_values.push_back(entry.second.size());
for (auto& elem : entry.second) {
cu_vector_values.push_back(elem);
}
strings.push_back(entry.first);
symbol_hash_table.add(getHashVal(entry.first),
GDBSymbol{name_off, cu_vector_offset});
name_off += entry.first.length() + 1;
}
time = log_time(time, "Get_symbol_and_constants: Populate hash table");
auto const num_cu_vector_bytes =
cu_vector_values.size() * sizeof(cu_vector_values[0]);
for (auto& sym : symbol_hash_table.m_hashtable) {
if (sym.valid()) {
sym.name_offset += num_cu_vector_bytes - 1;
}
}
log_time(time, "Get_symbol_and_constants: Update symbol pool");
std::cout << "Hash Table Size: " << symbol_hash_table.m_size <<
" Capacity: " << symbol_hash_table.m_capacity << std::endl;
std::cout << "Strings Size: " << strings.size() << std::endl;
std::cout << "CU Vector Values Size: " <<
cu_vector_values.size() << std::endl;
return {
std::move(symbol_hash_table),
std::move(cu_vector_values),
std::move(strings)
};
}
std::string m_filename;
int m_numThreads;
};
////////////////////////////////////////////////////////////////////////////////
}
std::unique_ptr<TypeParser>
make_dwarf_type_parser(const std::string& filename, int num_threads) {
return std::make_unique<TypeParserImpl>(filename, num_threads);
}
std::unique_ptr<Printer> make_dwarf_printer(const std::string& filename) {
return std::make_unique<PrinterImpl>(filename);
}
std::unique_ptr<GDBIndexer>
make_dwarf_gdb_indexer(const std::string& filename, int num_threads) {
return std::make_unique<GDBIndexerImpl>(filename, num_threads);
}
////////////////////////////////////////////////////////////////////////////////
}
#endif