in lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp [2220:4874]
void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s",
m_file.GetFilename().AsCString(""));
ModuleSP module_sp(GetModule());
if (!module_sp)
return;
Progress progress(llvm::formatv("Parsing symbol table for {0}",
m_file.GetFilename().AsCString("<Unknown>")));
llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
// The data element of type bool indicates that this entry is thumb
// code.
typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
// Record the address of every function/data that we add to the symtab.
// We add symbols to the table in the order of most information (nlist
// records) to least (function starts), and avoid duplicating symbols
// via this set.
llvm::DenseSet<addr_t> symbols_added;
// We are using a llvm::DenseSet for "symbols_added" so we must be sure we
// do not add the tombstone or empty keys to the set.
auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
// Don't add the tombstone or empty keys.
if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
return;
symbols_added.insert(file_addr);
};
FunctionStarts function_starts;
lldb::offset_t offset = MachHeaderSizeFromMagic(m_header.magic);
uint32_t i;
FileSpecList dylib_files;
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_SYMBOLS));
llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
for (i = 0; i < m_header.ncmds; ++i) {
const lldb::offset_t cmd_offset = offset;
// Read in the load command and load command size
llvm::MachO::load_command lc;
if (m_data.GetU32(&offset, &lc, 2) == nullptr)
break;
// Watch for the symbol table load command
switch (lc.cmd) {
case LC_SYMTAB:
symtab_load_command.cmd = lc.cmd;
symtab_load_command.cmdsize = lc.cmdsize;
// Read in the rest of the symtab load command
if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
nullptr) // fill in symoff, nsyms, stroff, strsize fields
return;
break;
case LC_DYLD_INFO:
case LC_DYLD_INFO_ONLY:
if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
dyld_info.cmd = lc.cmd;
dyld_info.cmdsize = lc.cmdsize;
} else {
memset(&dyld_info, 0, sizeof(dyld_info));
}
break;
case LC_LOAD_DYLIB:
case LC_LOAD_WEAK_DYLIB:
case LC_REEXPORT_DYLIB:
case LC_LOADFVMLIB:
case LC_LOAD_UPWARD_DYLIB: {
uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
const char *path = m_data.PeekCStr(name_offset);
if (path) {
FileSpec file_spec(path);
// Strip the path if there is @rpath, @executable, etc so we just use
// the basename
if (path[0] == '@')
file_spec.GetDirectory().Clear();
if (lc.cmd == LC_REEXPORT_DYLIB) {
m_reexported_dylibs.AppendIfUnique(file_spec);
}
dylib_files.Append(file_spec);
}
} break;
case LC_DYLD_EXPORTS_TRIE:
exports_trie_load_command.cmd = lc.cmd;
exports_trie_load_command.cmdsize = lc.cmdsize;
if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
nullptr) // fill in offset and size fields
memset(&exports_trie_load_command, 0,
sizeof(exports_trie_load_command));
break;
case LC_FUNCTION_STARTS:
function_starts_load_command.cmd = lc.cmd;
function_starts_load_command.cmdsize = lc.cmdsize;
if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
nullptr) // fill in data offset and size fields
memset(&function_starts_load_command, 0,
sizeof(function_starts_load_command));
break;
default:
break;
}
offset = cmd_offset + lc.cmdsize;
}
if (!symtab_load_command.cmd)
return;
SectionList *section_list = GetSectionList();
if (section_list == nullptr)
return;
const uint32_t addr_byte_size = m_data.GetAddressByteSize();
const ByteOrder byte_order = m_data.GetByteOrder();
bool bit_width_32 = addr_byte_size == 4;
const size_t nlist_byte_size =
bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
addr_byte_size);
DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
const addr_t nlist_data_byte_size =
symtab_load_command.nsyms * nlist_byte_size;
const addr_t strtab_data_byte_size = symtab_load_command.strsize;
addr_t strtab_addr = LLDB_INVALID_ADDRESS;
ProcessSP process_sp(m_process_wp.lock());
Process *process = process_sp.get();
uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
bool is_shared_cache_image = IsSharedCacheBinary();
bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
SectionSP linkedit_section_sp(
section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
!is_local_shared_cache_image) {
Target &target = process->GetTarget();
memory_module_load_level = target.GetMemoryModuleLoadLevel();
// Reading mach file from memory in a process or core file...
if (linkedit_section_sp) {
addr_t linkedit_load_addr =
linkedit_section_sp->GetLoadBaseAddress(&target);
if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
// We might be trying to access the symbol table before the
// __LINKEDIT's load address has been set in the target. We can't
// fail to read the symbol table, so calculate the right address
// manually
linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
}
const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
const addr_t symoff_addr = linkedit_load_addr +
symtab_load_command.symoff -
linkedit_file_offset;
strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
linkedit_file_offset;
// Always load dyld - the dynamic linker - from memory if we didn't
// find a binary anywhere else. lldb will not register
// dylib/framework/bundle loads/unloads if we don't have the dyld
// symbols, we force dyld to load from memory despite the user's
// target.memory-module-load-level setting.
if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
m_header.filetype == llvm::MachO::MH_DYLINKER) {
DataBufferSP nlist_data_sp(
ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
if (nlist_data_sp)
nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
if (m_dysymtab.nindirectsyms != 0) {
const addr_t indirect_syms_addr = linkedit_load_addr +
m_dysymtab.indirectsymoff -
linkedit_file_offset;
DataBufferSP indirect_syms_data_sp(ReadMemory(
process_sp, indirect_syms_addr, m_dysymtab.nindirectsyms * 4));
if (indirect_syms_data_sp)
indirect_symbol_index_data.SetData(
indirect_syms_data_sp, 0,
indirect_syms_data_sp->GetByteSize());
// If this binary is outside the shared cache,
// cache the string table.
// Binaries in the shared cache all share a giant string table,
// and we can't share the string tables across multiple
// ObjectFileMachO's, so we'd end up re-reading this mega-strtab
// for every binary in the shared cache - it would be a big perf
// problem. For binaries outside the shared cache, it's faster to
// read the entire strtab at once instead of piece-by-piece as we
// process the nlist records.
if (!is_shared_cache_image) {
DataBufferSP strtab_data_sp(
ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
if (strtab_data_sp) {
strtab_data.SetData(strtab_data_sp, 0,
strtab_data_sp->GetByteSize());
}
}
}
if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
if (function_starts_load_command.cmd) {
const addr_t func_start_addr =
linkedit_load_addr + function_starts_load_command.dataoff -
linkedit_file_offset;
DataBufferSP func_start_data_sp(
ReadMemory(process_sp, func_start_addr,
function_starts_load_command.datasize));
if (func_start_data_sp)
function_starts_data.SetData(func_start_data_sp, 0,
func_start_data_sp->GetByteSize());
}
}
}
}
} else {
if (is_local_shared_cache_image) {
// The load commands in shared cache images are relative to the
// beginning of the shared cache, not the library image. The
// data we get handed when creating the ObjectFileMachO starts
// at the beginning of a specific library and spans to the end
// of the cache to be able to reach the shared LINKEDIT
// segments. We need to convert the load command offsets to be
// relative to the beginning of our specific image.
lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
lldb::offset_t linkedit_slide =
linkedit_offset - m_linkedit_original_offset;
symtab_load_command.symoff += linkedit_slide;
symtab_load_command.stroff += linkedit_slide;
dyld_info.export_off += linkedit_slide;
m_dysymtab.indirectsymoff += linkedit_slide;
function_starts_load_command.dataoff += linkedit_slide;
exports_trie_load_command.dataoff += linkedit_slide;
}
nlist_data.SetData(m_data, symtab_load_command.symoff,
nlist_data_byte_size);
strtab_data.SetData(m_data, symtab_load_command.stroff,
strtab_data_byte_size);
// We shouldn't have exports data from both the LC_DYLD_INFO command
// AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
lldbassert(!((dyld_info.export_size > 0)
&& (exports_trie_load_command.datasize > 0)));
if (dyld_info.export_size > 0) {
dyld_trie_data.SetData(m_data, dyld_info.export_off,
dyld_info.export_size);
} else if (exports_trie_load_command.datasize > 0) {
dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
exports_trie_load_command.datasize);
}
if (m_dysymtab.nindirectsyms != 0) {
indirect_symbol_index_data.SetData(m_data, m_dysymtab.indirectsymoff,
m_dysymtab.nindirectsyms * 4);
}
if (function_starts_load_command.cmd) {
function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
function_starts_load_command.datasize);
}
}
const bool have_strtab_data = strtab_data.GetByteSize() > 0;
ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
ConstString g_segment_name_DATA = GetSegmentNameDATA();
ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
SectionSP text_section_sp(
section_list->FindSectionByName(g_segment_name_TEXT));
SectionSP data_section_sp(
section_list->FindSectionByName(g_segment_name_DATA));
SectionSP data_dirty_section_sp(
section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
SectionSP data_const_section_sp(
section_list->FindSectionByName(g_segment_name_DATA_CONST));
SectionSP objc_section_sp(
section_list->FindSectionByName(g_segment_name_OBJC));
SectionSP eh_frame_section_sp;
if (text_section_sp.get())
eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
g_section_name_eh_frame);
else
eh_frame_section_sp =
section_list->FindSectionByName(g_section_name_eh_frame);
const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
// lldb works best if it knows the start address of all functions in a
// module. Linker symbols or debug info are normally the best source of
// information for start addr / size but they may be stripped in a released
// binary. Two additional sources of information exist in Mach-O binaries:
// LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
// function's start address in the
// binary, relative to the text section.
// eh_frame - the eh_frame FDEs have the start addr & size of
// each function
// LC_FUNCTION_STARTS is the fastest source to read in, and is present on
// all modern binaries.
// Binaries built to run on older releases may need to use eh_frame
// information.
if (text_section_sp && function_starts_data.GetByteSize()) {
FunctionStarts::Entry function_start_entry;
function_start_entry.data = false;
lldb::offset_t function_start_offset = 0;
function_start_entry.addr = text_section_sp->GetFileAddress();
uint64_t delta;
while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
0) {
// Now append the current entry
function_start_entry.addr += delta;
if (is_arm) {
if (function_start_entry.addr & 1) {
function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
function_start_entry.data = true;
} else if (always_thumb) {
function_start_entry.data = true;
}
}
function_starts.Append(function_start_entry);
}
} else {
// If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
// load command claiming an eh_frame but it doesn't actually have the
// eh_frame content. And if we have a dSYM, we don't need to do any of
// this fill-in-the-missing-symbols works anyway - the debug info should
// give us all the functions in the module.
if (text_section_sp.get() && eh_frame_section_sp.get() &&
m_type != eTypeDebugInfo) {
DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
DWARFCallFrameInfo::EH);
DWARFCallFrameInfo::FunctionAddressAndSizeVector functions;
eh_frame.GetFunctionAddressAndSizeVector(functions);
addr_t text_base_addr = text_section_sp->GetFileAddress();
size_t count = functions.GetSize();
for (size_t i = 0; i < count; ++i) {
const DWARFCallFrameInfo::FunctionAddressAndSizeVector::Entry *func =
functions.GetEntryAtIndex(i);
if (func) {
FunctionStarts::Entry function_start_entry;
function_start_entry.addr = func->base - text_base_addr;
if (is_arm) {
if (function_start_entry.addr & 1) {
function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
function_start_entry.data = true;
} else if (always_thumb) {
function_start_entry.data = true;
}
}
function_starts.Append(function_start_entry);
}
}
}
}
const size_t function_starts_count = function_starts.GetSize();
// For user process binaries (executables, dylibs, frameworks, bundles), if
// we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
// going to assume the binary has been stripped. Don't allow assembly
// language instruction emulation because we don't know proper function
// start boundaries.
//
// For all other types of binaries (kernels, stand-alone bare board
// binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
// sections - we should not make any assumptions about them based on that.
if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
m_allow_assembly_emulation_unwind_plans = false;
Log *unwind_or_symbol_log(lldb_private::GetLogIfAnyCategoriesSet(
LIBLLDB_LOG_SYMBOLS | LIBLLDB_LOG_UNWIND));
if (unwind_or_symbol_log)
module_sp->LogMessage(
unwind_or_symbol_log,
"no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
}
const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
? eh_frame_section_sp->GetID()
: static_cast<user_id_t>(NO_SECT);
lldb::offset_t nlist_data_offset = 0;
uint32_t N_SO_index = UINT32_MAX;
MachSymtabSectionInfo section_info(section_list);
std::vector<uint32_t> N_FUN_indexes;
std::vector<uint32_t> N_NSYM_indexes;
std::vector<uint32_t> N_INCL_indexes;
std::vector<uint32_t> N_BRAC_indexes;
std::vector<uint32_t> N_COMM_indexes;
typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
// Any symbols that get merged into another will get an entry in this map
// so we know
NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
uint32_t nlist_idx = 0;
Symbol *symbol_ptr = nullptr;
uint32_t sym_idx = 0;
Symbol *sym = nullptr;
size_t num_syms = 0;
std::string memory_symbol_name;
uint32_t unmapped_local_symbols_found = 0;
std::vector<TrieEntryWithOffset> reexport_trie_entries;
std::vector<TrieEntryWithOffset> external_sym_trie_entries;
std::set<lldb::addr_t> resolver_addresses;
if (dyld_trie_data.GetByteSize() > 0) {
ConstString text_segment_name("__TEXT");
SectionSP text_segment_sp =
GetSectionList()->FindSectionByName(text_segment_name);
lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
if (text_segment_sp)
text_segment_file_addr = text_segment_sp->GetFileAddress();
std::vector<llvm::StringRef> nameSlices;
ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
nameSlices, resolver_addresses, reexport_trie_entries,
external_sym_trie_entries);
}
typedef std::set<ConstString> IndirectSymbols;
IndirectSymbols indirect_symbol_names;
#if TARGET_OS_IPHONE
// Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
// optimized by moving LOCAL symbols out of the memory mapped portion of
// the DSC. The symbol information has all been retained, but it isn't
// available in the normal nlist data. However, there *are* duplicate
// entries of *some*
// LOCAL symbols in the normal nlist data. To handle this situation
// correctly, we must first attempt
// to parse any DSC unmapped symbol information. If we find any, we set a
// flag that tells the normal nlist parser to ignore all LOCAL symbols.
if (IsSharedCacheBinary()) {
// Before we can start mapping the DSC, we need to make certain the
// target process is actually using the cache we can find.
// Next we need to determine the correct path for the dyld shared cache.
ArchSpec header_arch = GetArchitecture();
char dsc_path[PATH_MAX];
char dsc_path_development[PATH_MAX];
snprintf(
dsc_path, sizeof(dsc_path), "%s%s%s",
"/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
*/
"dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
header_arch.GetArchitectureName());
snprintf(
dsc_path_development, sizeof(dsc_path), "%s%s%s%s",
"/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
*/
"dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
header_arch.GetArchitectureName(), ".development");
FileSpec dsc_nondevelopment_filespec(dsc_path);
FileSpec dsc_development_filespec(dsc_path_development);
FileSpec dsc_filespec;
UUID dsc_uuid;
UUID process_shared_cache_uuid;
addr_t process_shared_cache_base_addr;
if (process) {
GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
process_shared_cache_uuid);
}
// First see if we can find an exact match for the inferior process
// shared cache UUID in the development or non-development shared caches
// on disk.
if (process_shared_cache_uuid.IsValid()) {
if (FileSystem::Instance().Exists(dsc_development_filespec)) {
UUID dsc_development_uuid = GetSharedCacheUUID(
dsc_development_filespec, byte_order, addr_byte_size);
if (dsc_development_uuid.IsValid() &&
dsc_development_uuid == process_shared_cache_uuid) {
dsc_filespec = dsc_development_filespec;
dsc_uuid = dsc_development_uuid;
}
}
if (!dsc_uuid.IsValid() &&
FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) {
UUID dsc_nondevelopment_uuid = GetSharedCacheUUID(
dsc_nondevelopment_filespec, byte_order, addr_byte_size);
if (dsc_nondevelopment_uuid.IsValid() &&
dsc_nondevelopment_uuid == process_shared_cache_uuid) {
dsc_filespec = dsc_nondevelopment_filespec;
dsc_uuid = dsc_nondevelopment_uuid;
}
}
}
// Failing a UUID match, prefer the development dyld_shared cache if both
// are present.
if (!FileSystem::Instance().Exists(dsc_filespec)) {
if (FileSystem::Instance().Exists(dsc_development_filespec)) {
dsc_filespec = dsc_development_filespec;
} else {
dsc_filespec = dsc_nondevelopment_filespec;
}
}
/* The dyld_cache_header has a pointer to the
dyld_cache_local_symbols_info structure (localSymbolsOffset).
The dyld_cache_local_symbols_info structure gives us three things:
1. The start and count of the nlist records in the dyld_shared_cache
file
2. The start and size of the strings for these nlist records
3. The start and count of dyld_cache_local_symbols_entry entries
There is one dyld_cache_local_symbols_entry per dylib/framework in the
dyld shared cache.
The "dylibOffset" field is the Mach-O header of this dylib/framework in
the dyld shared cache.
The dyld_cache_local_symbols_entry also lists the start of this
dylib/framework's nlist records
and the count of how many nlist records there are for this
dylib/framework.
*/
// Process the dyld shared cache header to find the unmapped symbols
DataBufferSP dsc_data_sp = MapFileData(
dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
if (!dsc_uuid.IsValid()) {
dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size);
}
if (dsc_data_sp) {
DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size);
bool uuid_match = true;
if (dsc_uuid.IsValid() && process) {
if (process_shared_cache_uuid.IsValid() &&
dsc_uuid != process_shared_cache_uuid) {
// The on-disk dyld_shared_cache file is not the same as the one in
// this process' memory, don't use it.
uuid_match = false;
ModuleSP module_sp(GetModule());
if (module_sp)
module_sp->ReportWarning("process shared cache does not match "
"on-disk dyld_shared_cache file, some "
"symbol names will be missing.");
}
}
offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset);
uint32_t mappingOffset = dsc_header_data.GetU32(&offset);
// If the mappingOffset points to a location inside the header, we've
// opened an old dyld shared cache, and should not proceed further.
if (uuid_match &&
mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) {
DataBufferSP dsc_mapping_info_data_sp = MapFileData(
dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info),
mappingOffset);
DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp,
byte_order, addr_byte_size);
offset = 0;
// The File addresses (from the in-memory Mach-O load commands) for
// the shared libraries in the shared library cache need to be
// adjusted by an offset to match up with the dylibOffset identifying
// field in the dyld_cache_local_symbol_entry's. This offset is
// recorded in mapping_offset_value.
const uint64_t mapping_offset_value =
dsc_mapping_info_data.GetU64(&offset);
offset =
offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset);
uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset);
uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset);
if (localSymbolsOffset && localSymbolsSize) {
// Map the local symbols
DataBufferSP dsc_local_symbols_data_sp =
MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset);
if (dsc_local_symbols_data_sp) {
DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp,
byte_order, addr_byte_size);
offset = 0;
typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
UndefinedNameToDescMap undefined_name_to_desc;
SymbolIndexToName reexport_shlib_needs_fixup;
// Read the local_symbols_infos struct in one shot
struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info;
dsc_local_symbols_data.GetU32(&offset,
&local_symbols_info.nlistOffset, 6);
SectionSP text_section_sp(
section_list->FindSectionByName(GetSegmentNameTEXT()));
uint32_t header_file_offset =
(text_section_sp->GetFileAddress() - mapping_offset_value);
offset = local_symbols_info.entriesOffset;
for (uint32_t entry_index = 0;
entry_index < local_symbols_info.entriesCount; entry_index++) {
struct lldb_copy_dyld_cache_local_symbols_entry
local_symbols_entry;
local_symbols_entry.dylibOffset =
dsc_local_symbols_data.GetU32(&offset);
local_symbols_entry.nlistStartIndex =
dsc_local_symbols_data.GetU32(&offset);
local_symbols_entry.nlistCount =
dsc_local_symbols_data.GetU32(&offset);
if (header_file_offset == local_symbols_entry.dylibOffset) {
unmapped_local_symbols_found = local_symbols_entry.nlistCount;
// The normal nlist code cannot correctly size the Symbols
// array, we need to allocate it here.
sym = symtab.Resize(
symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
unmapped_local_symbols_found - m_dysymtab.nlocalsym);
num_syms = symtab.GetNumSymbols();
nlist_data_offset =
local_symbols_info.nlistOffset +
(nlist_byte_size * local_symbols_entry.nlistStartIndex);
uint32_t string_table_offset = local_symbols_info.stringsOffset;
for (uint32_t nlist_index = 0;
nlist_index < local_symbols_entry.nlistCount;
nlist_index++) {
/////////////////////////////
{
llvm::Optional<struct nlist_64> nlist_maybe =
ParseNList(dsc_local_symbols_data, nlist_data_offset,
nlist_byte_size);
if (!nlist_maybe)
break;
struct nlist_64 nlist = *nlist_maybe;
SymbolType type = eSymbolTypeInvalid;
const char *symbol_name = dsc_local_symbols_data.PeekCStr(
string_table_offset + nlist.n_strx);
if (symbol_name == NULL) {
// No symbol should be NULL, even the symbols with no
// string values should have an offset zero which
// points to an empty C-string
Host::SystemLog(
Host::eSystemLogError,
"error: DSC unmapped local symbol[%u] has invalid "
"string table offset 0x%x in %s, ignoring symbol\n",
entry_index, nlist.n_strx,
module_sp->GetFileSpec().GetPath().c_str());
continue;
}
if (symbol_name[0] == '\0')
symbol_name = NULL;
const char *symbol_name_non_abi_mangled = NULL;
SectionSP symbol_section;
uint32_t symbol_byte_size = 0;
bool add_nlist = true;
bool is_debug = ((nlist.n_type & N_STAB) != 0);
bool demangled_is_synthesized = false;
bool is_gsym = false;
bool set_value = true;
assert(sym_idx < num_syms);
sym[sym_idx].SetDebug(is_debug);
if (is_debug) {
switch (nlist.n_type) {
case N_GSYM:
// global symbol: name,,NO_SECT,type,0
// Sometimes the N_GSYM value contains the address.
// FIXME: In the .o files, we have a GSYM and a debug
// symbol for all the ObjC data. They
// have the same address, but we want to ensure that
// we always find only the real symbol, 'cause we
// don't currently correctly attribute the
// GSYM one to the ObjCClass/Ivar/MetaClass
// symbol type. This is a temporary hack to make
// sure the ObjectiveC symbols get treated correctly.
// To do this right, we should coalesce all the GSYM
// & global symbols that have the same address.
is_gsym = true;
sym[sym_idx].SetExternal(true);
if (symbol_name && symbol_name[0] == '_' &&
symbol_name[1] == 'O') {
llvm::StringRef symbol_name_ref(symbol_name);
if (symbol_name_ref.startswith(
g_objc_v2_prefix_class)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name =
symbol_name + g_objc_v2_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(
g_objc_v2_prefix_metaclass)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name =
symbol_name + g_objc_v2_prefix_metaclass.size();
type = eSymbolTypeObjCMetaClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(
g_objc_v2_prefix_ivar)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name =
symbol_name + g_objc_v2_prefix_ivar.size();
type = eSymbolTypeObjCIVar;
demangled_is_synthesized = true;
}
} else {
if (nlist.n_value != 0)
symbol_section = section_info.GetSection(
nlist.n_sect, nlist.n_value);
type = eSymbolTypeData;
}
break;
case N_FNAME:
// procedure name (f77 kludge): name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_FUN:
// procedure: name,,n_sect,linenumber,address
if (symbol_name) {
type = eSymbolTypeCode;
symbol_section = section_info.GetSection(
nlist.n_sect, nlist.n_value);
N_FUN_addr_to_sym_idx.insert(
std::make_pair(nlist.n_value, sym_idx));
// We use the current number of symbols in the
// symbol table in lieu of using nlist_idx in case
// we ever start trimming entries out
N_FUN_indexes.push_back(sym_idx);
} else {
type = eSymbolTypeCompiler;
if (!N_FUN_indexes.empty()) {
// Copy the size of the function into the
// original
// STAB entry so we don't have
// to hunt for it later
symtab.SymbolAtIndex(N_FUN_indexes.back())
->SetByteSize(nlist.n_value);
N_FUN_indexes.pop_back();
// We don't really need the end function STAB as
// it contains the size which we already placed
// with the original symbol, so don't add it if
// we want a minimal symbol table
add_nlist = false;
}
}
break;
case N_STSYM:
// static symbol: name,,n_sect,type,address
N_STSYM_addr_to_sym_idx.insert(
std::make_pair(nlist.n_value, sym_idx));
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
if (symbol_name && symbol_name[0]) {
type = ObjectFile::GetSymbolTypeFromName(
symbol_name + 1, eSymbolTypeData);
}
break;
case N_LCSYM:
// .lcomm symbol: name,,n_sect,type,address
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
type = eSymbolTypeCommonBlock;
break;
case N_BNSYM:
// We use the current number of symbols in the symbol
// table in lieu of using nlist_idx in case we ever
// start trimming entries out Skip these if we want
// minimal symbol tables
add_nlist = false;
break;
case N_ENSYM:
// Set the size of the N_BNSYM to the terminating
// index of this N_ENSYM so that we can always skip
// the entire symbol if we need to navigate more
// quickly at the source level when parsing STABS
// Skip these if we want minimal symbol tables
add_nlist = false;
break;
case N_OPT:
// emitted with gcc2_compiled and in gcc source
type = eSymbolTypeCompiler;
break;
case N_RSYM:
// register sym: name,,NO_SECT,type,register
type = eSymbolTypeVariable;
break;
case N_SLINE:
// src line: 0,,n_sect,linenumber,address
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
type = eSymbolTypeLineEntry;
break;
case N_SSYM:
// structure elt: name,,NO_SECT,type,struct_offset
type = eSymbolTypeVariableType;
break;
case N_SO:
// source file name
type = eSymbolTypeSourceFile;
if (symbol_name == NULL) {
add_nlist = false;
if (N_SO_index != UINT32_MAX) {
// Set the size of the N_SO to the terminating
// index of this N_SO so that we can always skip
// the entire N_SO if we need to navigate more
// quickly at the source level when parsing STABS
symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
symbol_ptr->SetByteSize(sym_idx);
symbol_ptr->SetSizeIsSibling(true);
}
N_NSYM_indexes.clear();
N_INCL_indexes.clear();
N_BRAC_indexes.clear();
N_COMM_indexes.clear();
N_FUN_indexes.clear();
N_SO_index = UINT32_MAX;
} else {
// We use the current number of symbols in the
// symbol table in lieu of using nlist_idx in case
// we ever start trimming entries out
const bool N_SO_has_full_path = symbol_name[0] == '/';
if (N_SO_has_full_path) {
if ((N_SO_index == sym_idx - 1) &&
((sym_idx - 1) < num_syms)) {
// We have two consecutive N_SO entries where
// the first contains a directory and the
// second contains a full path.
sym[sym_idx - 1].GetMangled().SetValue(
ConstString(symbol_name), false);
m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
add_nlist = false;
} else {
// This is the first entry in a N_SO that
// contains a directory or
// a full path to the source file
N_SO_index = sym_idx;
}
} else if ((N_SO_index == sym_idx - 1) &&
((sym_idx - 1) < num_syms)) {
// This is usually the second N_SO entry that
// contains just the filename, so here we combine
// it with the first one if we are minimizing the
// symbol table
const char *so_path = sym[sym_idx - 1]
.GetMangled()
.GetDemangledName()
.AsCString();
if (so_path && so_path[0]) {
std::string full_so_path(so_path);
const size_t double_slash_pos =
full_so_path.find("//");
if (double_slash_pos != std::string::npos) {
// The linker has been generating bad N_SO
// entries with doubled up paths
// in the format "%s%s" where the first
// string in the DW_AT_comp_dir, and the
// second is the directory for the source
// file so you end up with a path that looks
// like "/tmp/src//tmp/src/"
FileSpec so_dir(so_path);
if (!FileSystem::Instance().Exists(so_dir)) {
so_dir.SetFile(
&full_so_path[double_slash_pos + 1],
FileSpec::Style::native);
if (FileSystem::Instance().Exists(so_dir)) {
// Trim off the incorrect path
full_so_path.erase(0, double_slash_pos + 1);
}
}
}
if (*full_so_path.rbegin() != '/')
full_so_path += '/';
full_so_path += symbol_name;
sym[sym_idx - 1].GetMangled().SetValue(
ConstString(full_so_path.c_str()), false);
add_nlist = false;
m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
}
} else {
// This could be a relative path to a N_SO
N_SO_index = sym_idx;
}
}
break;
case N_OSO:
// object file name: name,,0,0,st_mtime
type = eSymbolTypeObjectFile;
break;
case N_LSYM:
// local sym: name,,NO_SECT,type,offset
type = eSymbolTypeLocal;
break;
// INCL scopes
case N_BINCL:
// include file beginning: name,,NO_SECT,0,sum We use
// the current number of symbols in the symbol table
// in lieu of using nlist_idx in case we ever start
// trimming entries out
N_INCL_indexes.push_back(sym_idx);
type = eSymbolTypeScopeBegin;
break;
case N_EINCL:
// include file end: name,,NO_SECT,0,0
// Set the size of the N_BINCL to the terminating
// index of this N_EINCL so that we can always skip
// the entire symbol if we need to navigate more
// quickly at the source level when parsing STABS
if (!N_INCL_indexes.empty()) {
symbol_ptr =
symtab.SymbolAtIndex(N_INCL_indexes.back());
symbol_ptr->SetByteSize(sym_idx + 1);
symbol_ptr->SetSizeIsSibling(true);
N_INCL_indexes.pop_back();
}
type = eSymbolTypeScopeEnd;
break;
case N_SOL:
// #included file name: name,,n_sect,0,address
type = eSymbolTypeHeaderFile;
// We currently don't use the header files on darwin
add_nlist = false;
break;
case N_PARAMS:
// compiler parameters: name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_VERSION:
// compiler version: name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_OLEVEL:
// compiler -O level: name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_PSYM:
// parameter: name,,NO_SECT,type,offset
type = eSymbolTypeVariable;
break;
case N_ENTRY:
// alternate entry: name,,n_sect,linenumber,address
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
type = eSymbolTypeLineEntry;
break;
// Left and Right Braces
case N_LBRAC:
// left bracket: 0,,NO_SECT,nesting level,address We
// use the current number of symbols in the symbol
// table in lieu of using nlist_idx in case we ever
// start trimming entries out
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
N_BRAC_indexes.push_back(sym_idx);
type = eSymbolTypeScopeBegin;
break;
case N_RBRAC:
// right bracket: 0,,NO_SECT,nesting level,address
// Set the size of the N_LBRAC to the terminating
// index of this N_RBRAC so that we can always skip
// the entire symbol if we need to navigate more
// quickly at the source level when parsing STABS
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
if (!N_BRAC_indexes.empty()) {
symbol_ptr =
symtab.SymbolAtIndex(N_BRAC_indexes.back());
symbol_ptr->SetByteSize(sym_idx + 1);
symbol_ptr->SetSizeIsSibling(true);
N_BRAC_indexes.pop_back();
}
type = eSymbolTypeScopeEnd;
break;
case N_EXCL:
// deleted include file: name,,NO_SECT,0,sum
type = eSymbolTypeHeaderFile;
break;
// COMM scopes
case N_BCOMM:
// begin common: name,,NO_SECT,0,0
// We use the current number of symbols in the symbol
// table in lieu of using nlist_idx in case we ever
// start trimming entries out
type = eSymbolTypeScopeBegin;
N_COMM_indexes.push_back(sym_idx);
break;
case N_ECOML:
// end common (local name): 0,,n_sect,0,address
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
// Fall through
case N_ECOMM:
// end common: name,,n_sect,0,0
// Set the size of the N_BCOMM to the terminating
// index of this N_ECOMM/N_ECOML so that we can
// always skip the entire symbol if we need to
// navigate more quickly at the source level when
// parsing STABS
if (!N_COMM_indexes.empty()) {
symbol_ptr =
symtab.SymbolAtIndex(N_COMM_indexes.back());
symbol_ptr->SetByteSize(sym_idx + 1);
symbol_ptr->SetSizeIsSibling(true);
N_COMM_indexes.pop_back();
}
type = eSymbolTypeScopeEnd;
break;
case N_LENG:
// second stab entry with length information
type = eSymbolTypeAdditional;
break;
default:
break;
}
} else {
// uint8_t n_pext = N_PEXT & nlist.n_type;
uint8_t n_type = N_TYPE & nlist.n_type;
sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
switch (n_type) {
case N_INDR: {
const char *reexport_name_cstr =
strtab_data.PeekCStr(nlist.n_value);
if (reexport_name_cstr && reexport_name_cstr[0]) {
type = eSymbolTypeReExported;
ConstString reexport_name(
reexport_name_cstr +
((reexport_name_cstr[0] == '_') ? 1 : 0));
sym[sym_idx].SetReExportedSymbolName(reexport_name);
set_value = false;
reexport_shlib_needs_fixup[sym_idx] = reexport_name;
indirect_symbol_names.insert(ConstString(
symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
} else
type = eSymbolTypeUndefined;
} break;
case N_UNDF:
if (symbol_name && symbol_name[0]) {
ConstString undefined_name(
symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
undefined_name_to_desc[undefined_name] = nlist.n_desc;
}
// Fall through
case N_PBUD:
type = eSymbolTypeUndefined;
break;
case N_ABS:
type = eSymbolTypeAbsolute;
break;
case N_SECT: {
symbol_section = section_info.GetSection(nlist.n_sect,
nlist.n_value);
if (symbol_section == NULL) {
// TODO: warn about this?
add_nlist = false;
break;
}
if (TEXT_eh_frame_sectID == nlist.n_sect) {
type = eSymbolTypeException;
} else {
uint32_t section_type =
symbol_section->Get() & SECTION_TYPE;
switch (section_type) {
case S_CSTRING_LITERALS:
type = eSymbolTypeData;
break; // section with only literal C strings
case S_4BYTE_LITERALS:
type = eSymbolTypeData;
break; // section with only 4 byte literals
case S_8BYTE_LITERALS:
type = eSymbolTypeData;
break; // section with only 8 byte literals
case S_LITERAL_POINTERS:
type = eSymbolTypeTrampoline;
break; // section with only pointers to literals
case S_NON_LAZY_SYMBOL_POINTERS:
type = eSymbolTypeTrampoline;
break; // section with only non-lazy symbol
// pointers
case S_LAZY_SYMBOL_POINTERS:
type = eSymbolTypeTrampoline;
break; // section with only lazy symbol pointers
case S_SYMBOL_STUBS:
type = eSymbolTypeTrampoline;
break; // section with only symbol stubs, byte
// size of stub in the reserved2 field
case S_MOD_INIT_FUNC_POINTERS:
type = eSymbolTypeCode;
break; // section with only function pointers for
// initialization
case S_MOD_TERM_FUNC_POINTERS:
type = eSymbolTypeCode;
break; // section with only function pointers for
// termination
case S_INTERPOSING:
type = eSymbolTypeTrampoline;
break; // section with only pairs of function
// pointers for interposing
case S_16BYTE_LITERALS:
type = eSymbolTypeData;
break; // section with only 16 byte literals
case S_DTRACE_DOF:
type = eSymbolTypeInstrumentation;
break;
case S_LAZY_DYLIB_SYMBOL_POINTERS:
type = eSymbolTypeTrampoline;
break;
default:
switch (symbol_section->GetType()) {
case lldb::eSectionTypeCode:
type = eSymbolTypeCode;
break;
case eSectionTypeData:
case eSectionTypeDataCString: // Inlined C string
// data
case eSectionTypeDataCStringPointers: // Pointers
// to C
// string
// data
case eSectionTypeDataSymbolAddress: // Address of
// a symbol in
// the symbol
// table
case eSectionTypeData4:
case eSectionTypeData8:
case eSectionTypeData16:
type = eSymbolTypeData;
break;
default:
break;
}
break;
}
if (type == eSymbolTypeInvalid) {
const char *symbol_sect_name =
symbol_section->GetName().AsCString();
if (symbol_section->IsDescendant(
text_section_sp.get())) {
if (symbol_section->IsClear(
S_ATTR_PURE_INSTRUCTIONS |
S_ATTR_SELF_MODIFYING_CODE |
S_ATTR_SOME_INSTRUCTIONS))
type = eSymbolTypeData;
else
type = eSymbolTypeCode;
} else if (symbol_section->IsDescendant(
data_section_sp.get()) ||
symbol_section->IsDescendant(
data_dirty_section_sp.get()) ||
symbol_section->IsDescendant(
data_const_section_sp.get())) {
if (symbol_sect_name &&
::strstr(symbol_sect_name, "__objc") ==
symbol_sect_name) {
type = eSymbolTypeRuntime;
if (symbol_name) {
llvm::StringRef symbol_name_ref(symbol_name);
if (symbol_name_ref.startswith("_OBJC_")) {
llvm::StringRef
g_objc_v2_prefix_class(
"_OBJC_CLASS_$_");
llvm::StringRef
g_objc_v2_prefix_metaclass(
"_OBJC_METACLASS_$_");
llvm::StringRef
g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
if (symbol_name_ref.startswith(
g_objc_v2_prefix_class)) {
symbol_name_non_abi_mangled =
symbol_name + 1;
symbol_name =
symbol_name +
g_objc_v2_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
} else if (
symbol_name_ref.startswith(
g_objc_v2_prefix_metaclass)) {
symbol_name_non_abi_mangled =
symbol_name + 1;
symbol_name =
symbol_name +
g_objc_v2_prefix_metaclass.size();
type = eSymbolTypeObjCMetaClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(
g_objc_v2_prefix_ivar)) {
symbol_name_non_abi_mangled =
symbol_name + 1;
symbol_name =
symbol_name +
g_objc_v2_prefix_ivar.size();
type = eSymbolTypeObjCIVar;
demangled_is_synthesized = true;
}
}
}
} else if (symbol_sect_name &&
::strstr(symbol_sect_name,
"__gcc_except_tab") ==
symbol_sect_name) {
type = eSymbolTypeException;
} else {
type = eSymbolTypeData;
}
} else if (symbol_sect_name &&
::strstr(symbol_sect_name, "__IMPORT") ==
symbol_sect_name) {
type = eSymbolTypeTrampoline;
} else if (symbol_section->IsDescendant(
objc_section_sp.get())) {
type = eSymbolTypeRuntime;
if (symbol_name && symbol_name[0] == '.') {
llvm::StringRef symbol_name_ref(symbol_name);
llvm::StringRef
g_objc_v1_prefix_class(".objc_class_name_");
if (symbol_name_ref.startswith(
g_objc_v1_prefix_class)) {
symbol_name_non_abi_mangled = symbol_name;
symbol_name = symbol_name +
g_objc_v1_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
}
}
}
}
}
} break;
}
}
if (add_nlist) {
uint64_t symbol_value = nlist.n_value;
if (symbol_name_non_abi_mangled) {
sym[sym_idx].GetMangled().SetMangledName(
ConstString(symbol_name_non_abi_mangled));
sym[sym_idx].GetMangled().SetDemangledName(
ConstString(symbol_name));
} else {
bool symbol_name_is_mangled = false;
if (symbol_name && symbol_name[0] == '_') {
symbol_name_is_mangled = symbol_name[1] == '_';
symbol_name++; // Skip the leading underscore
}
if (symbol_name) {
ConstString const_symbol_name(symbol_name);
sym[sym_idx].GetMangled().SetValue(
const_symbol_name, symbol_name_is_mangled);
if (is_gsym && is_debug) {
const char *gsym_name =
sym[sym_idx]
.GetMangled()
.GetName(Mangled::ePreferMangled)
.GetCString();
if (gsym_name)
N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
}
}
}
if (symbol_section) {
const addr_t section_file_addr =
symbol_section->GetFileAddress();
if (symbol_byte_size == 0 &&
function_starts_count > 0) {
addr_t symbol_lookup_file_addr = nlist.n_value;
// Do an exact address match for non-ARM addresses,
// else get the closest since the symbol might be a
// thumb symbol which has an address with bit zero
// set
FunctionStarts::Entry *func_start_entry =
function_starts.FindEntry(symbol_lookup_file_addr,
!is_arm);
if (is_arm && func_start_entry) {
// Verify that the function start address is the
// symbol address (ARM) or the symbol address + 1
// (thumb)
if (func_start_entry->addr !=
symbol_lookup_file_addr &&
func_start_entry->addr !=
(symbol_lookup_file_addr + 1)) {
// Not the right entry, NULL it out...
func_start_entry = NULL;
}
}
if (func_start_entry) {
func_start_entry->data = true;
addr_t symbol_file_addr = func_start_entry->addr;
uint32_t symbol_flags = 0;
if (is_arm) {
if (symbol_file_addr & 1)
symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
}
const FunctionStarts::Entry *next_func_start_entry =
function_starts.FindNextEntry(func_start_entry);
const addr_t section_end_file_addr =
section_file_addr +
symbol_section->GetByteSize();
if (next_func_start_entry) {
addr_t next_symbol_file_addr =
next_func_start_entry->addr;
// Be sure the clear the Thumb address bit when
// we calculate the size from the current and
// next address
if (is_arm)
next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
symbol_byte_size = std::min<lldb::addr_t>(
next_symbol_file_addr - symbol_file_addr,
section_end_file_addr - symbol_file_addr);
} else {
symbol_byte_size =
section_end_file_addr - symbol_file_addr;
}
}
}
symbol_value -= section_file_addr;
}
if (is_debug == false) {
if (type == eSymbolTypeCode) {
// See if we can find a N_FUN entry for any code
// symbols. If we do find a match, and the name
// matches, then we can merge the two into just the
// function symbol to avoid duplicate entries in
// the symbol table
auto range =
N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
if (range.first != range.second) {
bool found_it = false;
for (auto pos = range.first; pos != range.second;
++pos) {
if (sym[sym_idx].GetMangled().GetName(
Mangled::ePreferMangled) ==
sym[pos->second].GetMangled().GetName(
Mangled::ePreferMangled)) {
m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
// We just need the flags from the linker
// symbol, so put these flags
// into the N_FUN flags to avoid duplicate
// symbols in the symbol table
sym[pos->second].SetExternal(
sym[sym_idx].IsExternal());
sym[pos->second].SetFlags(nlist.n_type << 16 |
nlist.n_desc);
if (resolver_addresses.find(nlist.n_value) !=
resolver_addresses.end())
sym[pos->second].SetType(eSymbolTypeResolver);
sym[sym_idx].Clear();
found_it = true;
break;
}
}
if (found_it)
continue;
} else {
if (resolver_addresses.find(nlist.n_value) !=
resolver_addresses.end())
type = eSymbolTypeResolver;
}
} else if (type == eSymbolTypeData ||
type == eSymbolTypeObjCClass ||
type == eSymbolTypeObjCMetaClass ||
type == eSymbolTypeObjCIVar) {
// See if we can find a N_STSYM entry for any data
// symbols. If we do find a match, and the name
// matches, then we can merge the two into just the
// Static symbol to avoid duplicate entries in the
// symbol table
auto range = N_STSYM_addr_to_sym_idx.equal_range(
nlist.n_value);
if (range.first != range.second) {
bool found_it = false;
for (auto pos = range.first; pos != range.second;
++pos) {
if (sym[sym_idx].GetMangled().GetName(
Mangled::ePreferMangled) ==
sym[pos->second].GetMangled().GetName(
Mangled::ePreferMangled)) {
m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
// We just need the flags from the linker
// symbol, so put these flags
// into the N_STSYM flags to avoid duplicate
// symbols in the symbol table
sym[pos->second].SetExternal(
sym[sym_idx].IsExternal());
sym[pos->second].SetFlags(nlist.n_type << 16 |
nlist.n_desc);
sym[sym_idx].Clear();
found_it = true;
break;
}
}
if (found_it)
continue;
} else {
const char *gsym_name =
sym[sym_idx]
.GetMangled()
.GetName(Mangled::ePreferMangled)
.GetCString();
if (gsym_name) {
// Combine N_GSYM stab entries with the non
// stab symbol
ConstNameToSymbolIndexMap::const_iterator pos =
N_GSYM_name_to_sym_idx.find(gsym_name);
if (pos != N_GSYM_name_to_sym_idx.end()) {
const uint32_t GSYM_sym_idx = pos->second;
m_nlist_idx_to_sym_idx[nlist_idx] =
GSYM_sym_idx;
// Copy the address, because often the N_GSYM
// address has an invalid address of zero
// when the global is a common symbol
sym[GSYM_sym_idx].GetAddressRef().SetSection(
symbol_section);
sym[GSYM_sym_idx].GetAddressRef().SetOffset(
symbol_value);
add_symbol_addr(sym[GSYM_sym_idx]
.GetAddress()
.GetFileAddress());
// We just need the flags from the linker
// symbol, so put these flags
// into the N_GSYM flags to avoid duplicate
// symbols in the symbol table
sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
nlist.n_desc);
sym[sym_idx].Clear();
continue;
}
}
}
}
}
sym[sym_idx].SetID(nlist_idx);
sym[sym_idx].SetType(type);
if (set_value) {
sym[sym_idx].GetAddressRef().SetSection(symbol_section);
sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
add_symbol_addr(
sym[sym_idx].GetAddress().GetFileAddress());
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
if (symbol_byte_size > 0)
sym[sym_idx].SetByteSize(symbol_byte_size);
if (demangled_is_synthesized)
sym[sym_idx].SetDemangledNameIsSynthesized(true);
++sym_idx;
} else {
sym[sym_idx].Clear();
}
}
/////////////////////////////
}
break; // No more entries to consider
}
}
for (const auto &pos : reexport_shlib_needs_fixup) {
const auto undef_pos = undefined_name_to_desc.find(pos.second);
if (undef_pos != undefined_name_to_desc.end()) {
const uint8_t dylib_ordinal =
llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
sym[pos.first].SetReExportedSymbolSharedLibrary(
dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
}
}
}
}
}
}
}
// Must reset this in case it was mutated above!
nlist_data_offset = 0;
#endif
if (nlist_data.GetByteSize() > 0) {
// If the sym array was not created while parsing the DSC unmapped
// symbols, create it now.
if (sym == nullptr) {
sym =
symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
num_syms = symtab.GetNumSymbols();
}
if (unmapped_local_symbols_found) {
assert(m_dysymtab.ilocalsym == 0);
nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
nlist_idx = m_dysymtab.nlocalsym;
} else {
nlist_idx = 0;
}
typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
UndefinedNameToDescMap undefined_name_to_desc;
SymbolIndexToName reexport_shlib_needs_fixup;
// Symtab parsing is a huge mess. Everything is entangled and the code
// requires access to a ridiculous amount of variables. LLDB depends
// heavily on the proper merging of symbols and to get that right we need
// to make sure we have parsed all the debug symbols first. Therefore we
// invoke the lambda twice, once to parse only the debug symbols and then
// once more to parse the remaining symbols.
auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
bool debug_only) {
const bool is_debug = ((nlist.n_type & N_STAB) != 0);
if (is_debug != debug_only)
return true;
const char *symbol_name_non_abi_mangled = nullptr;
const char *symbol_name = nullptr;
if (have_strtab_data) {
symbol_name = strtab_data.PeekCStr(nlist.n_strx);
if (symbol_name == nullptr) {
// No symbol should be NULL, even the symbols with no string values
// should have an offset zero which points to an empty C-string
Host::SystemLog(Host::eSystemLogError,
"error: symbol[%u] has invalid string table offset "
"0x%x in %s, ignoring symbol\n",
nlist_idx, nlist.n_strx,
module_sp->GetFileSpec().GetPath().c_str());
return true;
}
if (symbol_name[0] == '\0')
symbol_name = nullptr;
} else {
const addr_t str_addr = strtab_addr + nlist.n_strx;
Status str_error;
if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
str_error))
symbol_name = memory_symbol_name.c_str();
}
SymbolType type = eSymbolTypeInvalid;
SectionSP symbol_section;
lldb::addr_t symbol_byte_size = 0;
bool add_nlist = true;
bool is_gsym = false;
bool demangled_is_synthesized = false;
bool set_value = true;
assert(sym_idx < num_syms);
sym[sym_idx].SetDebug(is_debug);
if (is_debug) {
switch (nlist.n_type) {
case N_GSYM:
// global symbol: name,,NO_SECT,type,0
// Sometimes the N_GSYM value contains the address.
// FIXME: In the .o files, we have a GSYM and a debug symbol for all
// the ObjC data. They
// have the same address, but we want to ensure that we always find
// only the real symbol, 'cause we don't currently correctly
// attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
// type. This is a temporary hack to make sure the ObjectiveC
// symbols get treated correctly. To do this right, we should
// coalesce all the GSYM & global symbols that have the same
// address.
is_gsym = true;
sym[sym_idx].SetExternal(true);
if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
llvm::StringRef symbol_name_ref(symbol_name);
if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name = symbol_name + g_objc_v2_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
type = eSymbolTypeObjCMetaClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
type = eSymbolTypeObjCIVar;
demangled_is_synthesized = true;
}
} else {
if (nlist.n_value != 0)
symbol_section =
section_info.GetSection(nlist.n_sect, nlist.n_value);
type = eSymbolTypeData;
}
break;
case N_FNAME:
// procedure name (f77 kludge): name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_FUN:
// procedure: name,,n_sect,linenumber,address
if (symbol_name) {
type = eSymbolTypeCode;
symbol_section =
section_info.GetSection(nlist.n_sect, nlist.n_value);
N_FUN_addr_to_sym_idx.insert(
std::make_pair(nlist.n_value, sym_idx));
// We use the current number of symbols in the symbol table in
// lieu of using nlist_idx in case we ever start trimming entries
// out
N_FUN_indexes.push_back(sym_idx);
} else {
type = eSymbolTypeCompiler;
if (!N_FUN_indexes.empty()) {
// Copy the size of the function into the original STAB entry
// so we don't have to hunt for it later
symtab.SymbolAtIndex(N_FUN_indexes.back())
->SetByteSize(nlist.n_value);
N_FUN_indexes.pop_back();
// We don't really need the end function STAB as it contains
// the size which we already placed with the original symbol,
// so don't add it if we want a minimal symbol table
add_nlist = false;
}
}
break;
case N_STSYM:
// static symbol: name,,n_sect,type,address
N_STSYM_addr_to_sym_idx.insert(
std::make_pair(nlist.n_value, sym_idx));
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
if (symbol_name && symbol_name[0]) {
type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
eSymbolTypeData);
}
break;
case N_LCSYM:
// .lcomm symbol: name,,n_sect,type,address
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
type = eSymbolTypeCommonBlock;
break;
case N_BNSYM:
// We use the current number of symbols in the symbol table in lieu
// of using nlist_idx in case we ever start trimming entries out
// Skip these if we want minimal symbol tables
add_nlist = false;
break;
case N_ENSYM:
// Set the size of the N_BNSYM to the terminating index of this
// N_ENSYM so that we can always skip the entire symbol if we need
// to navigate more quickly at the source level when parsing STABS
// Skip these if we want minimal symbol tables
add_nlist = false;
break;
case N_OPT:
// emitted with gcc2_compiled and in gcc source
type = eSymbolTypeCompiler;
break;
case N_RSYM:
// register sym: name,,NO_SECT,type,register
type = eSymbolTypeVariable;
break;
case N_SLINE:
// src line: 0,,n_sect,linenumber,address
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
type = eSymbolTypeLineEntry;
break;
case N_SSYM:
// structure elt: name,,NO_SECT,type,struct_offset
type = eSymbolTypeVariableType;
break;
case N_SO:
// source file name
type = eSymbolTypeSourceFile;
if (symbol_name == nullptr) {
add_nlist = false;
if (N_SO_index != UINT32_MAX) {
// Set the size of the N_SO to the terminating index of this
// N_SO so that we can always skip the entire N_SO if we need
// to navigate more quickly at the source level when parsing
// STABS
symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
symbol_ptr->SetByteSize(sym_idx);
symbol_ptr->SetSizeIsSibling(true);
}
N_NSYM_indexes.clear();
N_INCL_indexes.clear();
N_BRAC_indexes.clear();
N_COMM_indexes.clear();
N_FUN_indexes.clear();
N_SO_index = UINT32_MAX;
} else {
// We use the current number of symbols in the symbol table in
// lieu of using nlist_idx in case we ever start trimming entries
// out
const bool N_SO_has_full_path = symbol_name[0] == '/';
if (N_SO_has_full_path) {
if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
// We have two consecutive N_SO entries where the first
// contains a directory and the second contains a full path.
sym[sym_idx - 1].GetMangled().SetValue(ConstString(symbol_name),
false);
m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
add_nlist = false;
} else {
// This is the first entry in a N_SO that contains a
// directory or a full path to the source file
N_SO_index = sym_idx;
}
} else if ((N_SO_index == sym_idx - 1) &&
((sym_idx - 1) < num_syms)) {
// This is usually the second N_SO entry that contains just the
// filename, so here we combine it with the first one if we are
// minimizing the symbol table
const char *so_path =
sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
if (so_path && so_path[0]) {
std::string full_so_path(so_path);
const size_t double_slash_pos = full_so_path.find("//");
if (double_slash_pos != std::string::npos) {
// The linker has been generating bad N_SO entries with
// doubled up paths in the format "%s%s" where the first
// string in the DW_AT_comp_dir, and the second is the
// directory for the source file so you end up with a path
// that looks like "/tmp/src//tmp/src/"
FileSpec so_dir(so_path);
if (!FileSystem::Instance().Exists(so_dir)) {
so_dir.SetFile(&full_so_path[double_slash_pos + 1],
FileSpec::Style::native);
if (FileSystem::Instance().Exists(so_dir)) {
// Trim off the incorrect path
full_so_path.erase(0, double_slash_pos + 1);
}
}
}
if (*full_so_path.rbegin() != '/')
full_so_path += '/';
full_so_path += symbol_name;
sym[sym_idx - 1].GetMangled().SetValue(
ConstString(full_so_path.c_str()), false);
add_nlist = false;
m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
}
} else {
// This could be a relative path to a N_SO
N_SO_index = sym_idx;
}
}
break;
case N_OSO:
// object file name: name,,0,0,st_mtime
type = eSymbolTypeObjectFile;
break;
case N_LSYM:
// local sym: name,,NO_SECT,type,offset
type = eSymbolTypeLocal;
break;
// INCL scopes
case N_BINCL:
// include file beginning: name,,NO_SECT,0,sum We use the current
// number of symbols in the symbol table in lieu of using nlist_idx
// in case we ever start trimming entries out
N_INCL_indexes.push_back(sym_idx);
type = eSymbolTypeScopeBegin;
break;
case N_EINCL:
// include file end: name,,NO_SECT,0,0
// Set the size of the N_BINCL to the terminating index of this
// N_EINCL so that we can always skip the entire symbol if we need
// to navigate more quickly at the source level when parsing STABS
if (!N_INCL_indexes.empty()) {
symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
symbol_ptr->SetByteSize(sym_idx + 1);
symbol_ptr->SetSizeIsSibling(true);
N_INCL_indexes.pop_back();
}
type = eSymbolTypeScopeEnd;
break;
case N_SOL:
// #included file name: name,,n_sect,0,address
type = eSymbolTypeHeaderFile;
// We currently don't use the header files on darwin
add_nlist = false;
break;
case N_PARAMS:
// compiler parameters: name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_VERSION:
// compiler version: name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_OLEVEL:
// compiler -O level: name,,NO_SECT,0,0
type = eSymbolTypeCompiler;
break;
case N_PSYM:
// parameter: name,,NO_SECT,type,offset
type = eSymbolTypeVariable;
break;
case N_ENTRY:
// alternate entry: name,,n_sect,linenumber,address
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
type = eSymbolTypeLineEntry;
break;
// Left and Right Braces
case N_LBRAC:
// left bracket: 0,,NO_SECT,nesting level,address We use the
// current number of symbols in the symbol table in lieu of using
// nlist_idx in case we ever start trimming entries out
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
N_BRAC_indexes.push_back(sym_idx);
type = eSymbolTypeScopeBegin;
break;
case N_RBRAC:
// right bracket: 0,,NO_SECT,nesting level,address Set the size of
// the N_LBRAC to the terminating index of this N_RBRAC so that we
// can always skip the entire symbol if we need to navigate more
// quickly at the source level when parsing STABS
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
if (!N_BRAC_indexes.empty()) {
symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
symbol_ptr->SetByteSize(sym_idx + 1);
symbol_ptr->SetSizeIsSibling(true);
N_BRAC_indexes.pop_back();
}
type = eSymbolTypeScopeEnd;
break;
case N_EXCL:
// deleted include file: name,,NO_SECT,0,sum
type = eSymbolTypeHeaderFile;
break;
// COMM scopes
case N_BCOMM:
// begin common: name,,NO_SECT,0,0
// We use the current number of symbols in the symbol table in lieu
// of using nlist_idx in case we ever start trimming entries out
type = eSymbolTypeScopeBegin;
N_COMM_indexes.push_back(sym_idx);
break;
case N_ECOML:
// end common (local name): 0,,n_sect,0,address
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
LLVM_FALLTHROUGH;
case N_ECOMM:
// end common: name,,n_sect,0,0
// Set the size of the N_BCOMM to the terminating index of this
// N_ECOMM/N_ECOML so that we can always skip the entire symbol if
// we need to navigate more quickly at the source level when
// parsing STABS
if (!N_COMM_indexes.empty()) {
symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
symbol_ptr->SetByteSize(sym_idx + 1);
symbol_ptr->SetSizeIsSibling(true);
N_COMM_indexes.pop_back();
}
type = eSymbolTypeScopeEnd;
break;
case N_LENG:
// second stab entry with length information
type = eSymbolTypeAdditional;
break;
default:
break;
}
} else {
uint8_t n_type = N_TYPE & nlist.n_type;
sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
switch (n_type) {
case N_INDR: {
const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
if (reexport_name_cstr && reexport_name_cstr[0]) {
type = eSymbolTypeReExported;
ConstString reexport_name(reexport_name_cstr +
((reexport_name_cstr[0] == '_') ? 1 : 0));
sym[sym_idx].SetReExportedSymbolName(reexport_name);
set_value = false;
reexport_shlib_needs_fixup[sym_idx] = reexport_name;
indirect_symbol_names.insert(
ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
} else
type = eSymbolTypeUndefined;
} break;
case N_UNDF:
if (symbol_name && symbol_name[0]) {
ConstString undefined_name(symbol_name +
((symbol_name[0] == '_') ? 1 : 0));
undefined_name_to_desc[undefined_name] = nlist.n_desc;
}
LLVM_FALLTHROUGH;
case N_PBUD:
type = eSymbolTypeUndefined;
break;
case N_ABS:
type = eSymbolTypeAbsolute;
break;
case N_SECT: {
symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
if (!symbol_section) {
// TODO: warn about this?
add_nlist = false;
break;
}
if (TEXT_eh_frame_sectID == nlist.n_sect) {
type = eSymbolTypeException;
} else {
uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
switch (section_type) {
case S_CSTRING_LITERALS:
type = eSymbolTypeData;
break; // section with only literal C strings
case S_4BYTE_LITERALS:
type = eSymbolTypeData;
break; // section with only 4 byte literals
case S_8BYTE_LITERALS:
type = eSymbolTypeData;
break; // section with only 8 byte literals
case S_LITERAL_POINTERS:
type = eSymbolTypeTrampoline;
break; // section with only pointers to literals
case S_NON_LAZY_SYMBOL_POINTERS:
type = eSymbolTypeTrampoline;
break; // section with only non-lazy symbol pointers
case S_LAZY_SYMBOL_POINTERS:
type = eSymbolTypeTrampoline;
break; // section with only lazy symbol pointers
case S_SYMBOL_STUBS:
type = eSymbolTypeTrampoline;
break; // section with only symbol stubs, byte size of stub in
// the reserved2 field
case S_MOD_INIT_FUNC_POINTERS:
type = eSymbolTypeCode;
break; // section with only function pointers for initialization
case S_MOD_TERM_FUNC_POINTERS:
type = eSymbolTypeCode;
break; // section with only function pointers for termination
case S_INTERPOSING:
type = eSymbolTypeTrampoline;
break; // section with only pairs of function pointers for
// interposing
case S_16BYTE_LITERALS:
type = eSymbolTypeData;
break; // section with only 16 byte literals
case S_DTRACE_DOF:
type = eSymbolTypeInstrumentation;
break;
case S_LAZY_DYLIB_SYMBOL_POINTERS:
type = eSymbolTypeTrampoline;
break;
default:
switch (symbol_section->GetType()) {
case lldb::eSectionTypeCode:
type = eSymbolTypeCode;
break;
case eSectionTypeData:
case eSectionTypeDataCString: // Inlined C string data
case eSectionTypeDataCStringPointers: // Pointers to C string
// data
case eSectionTypeDataSymbolAddress: // Address of a symbol in
// the symbol table
case eSectionTypeData4:
case eSectionTypeData8:
case eSectionTypeData16:
type = eSymbolTypeData;
break;
default:
break;
}
break;
}
if (type == eSymbolTypeInvalid) {
const char *symbol_sect_name =
symbol_section->GetName().AsCString();
if (symbol_section->IsDescendant(text_section_sp.get())) {
if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
S_ATTR_SELF_MODIFYING_CODE |
S_ATTR_SOME_INSTRUCTIONS))
type = eSymbolTypeData;
else
type = eSymbolTypeCode;
} else if (symbol_section->IsDescendant(data_section_sp.get()) ||
symbol_section->IsDescendant(
data_dirty_section_sp.get()) ||
symbol_section->IsDescendant(
data_const_section_sp.get())) {
if (symbol_sect_name &&
::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
type = eSymbolTypeRuntime;
if (symbol_name) {
llvm::StringRef symbol_name_ref(symbol_name);
if (symbol_name_ref.startswith("_OBJC_")) {
llvm::StringRef g_objc_v2_prefix_class(
"_OBJC_CLASS_$_");
llvm::StringRef g_objc_v2_prefix_metaclass(
"_OBJC_METACLASS_$_");
llvm::StringRef g_objc_v2_prefix_ivar(
"_OBJC_IVAR_$_");
if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name =
symbol_name + g_objc_v2_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(
g_objc_v2_prefix_metaclass)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name =
symbol_name + g_objc_v2_prefix_metaclass.size();
type = eSymbolTypeObjCMetaClass;
demangled_is_synthesized = true;
} else if (symbol_name_ref.startswith(
g_objc_v2_prefix_ivar)) {
symbol_name_non_abi_mangled = symbol_name + 1;
symbol_name =
symbol_name + g_objc_v2_prefix_ivar.size();
type = eSymbolTypeObjCIVar;
demangled_is_synthesized = true;
}
}
}
} else if (symbol_sect_name &&
::strstr(symbol_sect_name, "__gcc_except_tab") ==
symbol_sect_name) {
type = eSymbolTypeException;
} else {
type = eSymbolTypeData;
}
} else if (symbol_sect_name &&
::strstr(symbol_sect_name, "__IMPORT") ==
symbol_sect_name) {
type = eSymbolTypeTrampoline;
} else if (symbol_section->IsDescendant(objc_section_sp.get())) {
type = eSymbolTypeRuntime;
if (symbol_name && symbol_name[0] == '.') {
llvm::StringRef symbol_name_ref(symbol_name);
llvm::StringRef g_objc_v1_prefix_class(
".objc_class_name_");
if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
symbol_name_non_abi_mangled = symbol_name;
symbol_name = symbol_name + g_objc_v1_prefix_class.size();
type = eSymbolTypeObjCClass;
demangled_is_synthesized = true;
}
}
}
}
}
} break;
}
}
if (!add_nlist) {
sym[sym_idx].Clear();
return true;
}
uint64_t symbol_value = nlist.n_value;
if (symbol_name_non_abi_mangled) {
sym[sym_idx].GetMangled().SetMangledName(
ConstString(symbol_name_non_abi_mangled));
sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
} else {
bool symbol_name_is_mangled = false;
if (symbol_name && symbol_name[0] == '_') {
symbol_name_is_mangled = symbol_name[1] == '_';
symbol_name++; // Skip the leading underscore
}
if (symbol_name) {
ConstString const_symbol_name(symbol_name);
sym[sym_idx].GetMangled().SetValue(const_symbol_name,
symbol_name_is_mangled);
}
}
if (is_gsym) {
const char *gsym_name = sym[sym_idx]
.GetMangled()
.GetName(Mangled::ePreferMangled)
.GetCString();
if (gsym_name)
N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
}
if (symbol_section) {
const addr_t section_file_addr = symbol_section->GetFileAddress();
if (symbol_byte_size == 0 && function_starts_count > 0) {
addr_t symbol_lookup_file_addr = nlist.n_value;
// Do an exact address match for non-ARM addresses, else get the
// closest since the symbol might be a thumb symbol which has an
// address with bit zero set.
FunctionStarts::Entry *func_start_entry =
function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
if (is_arm && func_start_entry) {
// Verify that the function start address is the symbol address
// (ARM) or the symbol address + 1 (thumb).
if (func_start_entry->addr != symbol_lookup_file_addr &&
func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
// Not the right entry, NULL it out...
func_start_entry = nullptr;
}
}
if (func_start_entry) {
func_start_entry->data = true;
addr_t symbol_file_addr = func_start_entry->addr;
if (is_arm)
symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
const FunctionStarts::Entry *next_func_start_entry =
function_starts.FindNextEntry(func_start_entry);
const addr_t section_end_file_addr =
section_file_addr + symbol_section->GetByteSize();
if (next_func_start_entry) {
addr_t next_symbol_file_addr = next_func_start_entry->addr;
// Be sure the clear the Thumb address bit when we calculate the
// size from the current and next address
if (is_arm)
next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
symbol_byte_size = std::min<lldb::addr_t>(
next_symbol_file_addr - symbol_file_addr,
section_end_file_addr - symbol_file_addr);
} else {
symbol_byte_size = section_end_file_addr - symbol_file_addr;
}
}
}
symbol_value -= section_file_addr;
}
if (!is_debug) {
if (type == eSymbolTypeCode) {
// See if we can find a N_FUN entry for any code symbols. If we do
// find a match, and the name matches, then we can merge the two into
// just the function symbol to avoid duplicate entries in the symbol
// table.
std::pair<ValueToSymbolIndexMap::const_iterator,
ValueToSymbolIndexMap::const_iterator>
range;
range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
if (range.first != range.second) {
for (ValueToSymbolIndexMap::const_iterator pos = range.first;
pos != range.second; ++pos) {
if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
sym[pos->second].GetMangled().GetName(
Mangled::ePreferMangled)) {
m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
// We just need the flags from the linker symbol, so put these
// flags into the N_FUN flags to avoid duplicate symbols in the
// symbol table.
sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
if (resolver_addresses.find(nlist.n_value) !=
resolver_addresses.end())
sym[pos->second].SetType(eSymbolTypeResolver);
sym[sym_idx].Clear();
return true;
}
}
} else {
if (resolver_addresses.find(nlist.n_value) !=
resolver_addresses.end())
type = eSymbolTypeResolver;
}
} else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
type == eSymbolTypeObjCMetaClass ||
type == eSymbolTypeObjCIVar) {
// See if we can find a N_STSYM entry for any data symbols. If we do
// find a match, and the name matches, then we can merge the two into
// just the Static symbol to avoid duplicate entries in the symbol
// table.
std::pair<ValueToSymbolIndexMap::const_iterator,
ValueToSymbolIndexMap::const_iterator>
range;
range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
if (range.first != range.second) {
for (ValueToSymbolIndexMap::const_iterator pos = range.first;
pos != range.second; ++pos) {
if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
sym[pos->second].GetMangled().GetName(
Mangled::ePreferMangled)) {
m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
// We just need the flags from the linker symbol, so put these
// flags into the N_STSYM flags to avoid duplicate symbols in
// the symbol table.
sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
sym[sym_idx].Clear();
return true;
}
}
} else {
// Combine N_GSYM stab entries with the non stab symbol.
const char *gsym_name = sym[sym_idx]
.GetMangled()
.GetName(Mangled::ePreferMangled)
.GetCString();
if (gsym_name) {
ConstNameToSymbolIndexMap::const_iterator pos =
N_GSYM_name_to_sym_idx.find(gsym_name);
if (pos != N_GSYM_name_to_sym_idx.end()) {
const uint32_t GSYM_sym_idx = pos->second;
m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
// Copy the address, because often the N_GSYM address has an
// invalid address of zero when the global is a common symbol.
sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
add_symbol_addr(
sym[GSYM_sym_idx].GetAddress().GetFileAddress());
// We just need the flags from the linker symbol, so put these
// flags into the N_GSYM flags to avoid duplicate symbols in
// the symbol table.
sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
sym[sym_idx].Clear();
return true;
}
}
}
}
}
sym[sym_idx].SetID(nlist_idx);
sym[sym_idx].SetType(type);
if (set_value) {
sym[sym_idx].GetAddressRef().SetSection(symbol_section);
sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
if (symbol_section)
add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
}
sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
if (nlist.n_desc & N_WEAK_REF)
sym[sym_idx].SetIsWeak(true);
if (symbol_byte_size > 0)
sym[sym_idx].SetByteSize(symbol_byte_size);
if (demangled_is_synthesized)
sym[sym_idx].SetDemangledNameIsSynthesized(true);
++sym_idx;
return true;
};
// First parse all the nlists but don't process them yet. See the next
// comment for an explanation why.
std::vector<struct nlist_64> nlists;
nlists.reserve(symtab_load_command.nsyms);
for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
if (auto nlist =
ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
nlists.push_back(*nlist);
else
break;
}
// Now parse all the debug symbols. This is needed to merge non-debug
// symbols in the next step. Non-debug symbols are always coalesced into
// the debug symbol. Doing this in one step would mean that some symbols
// won't be merged.
nlist_idx = 0;
for (auto &nlist : nlists) {
if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
break;
}
// Finally parse all the non debug symbols.
nlist_idx = 0;
for (auto &nlist : nlists) {
if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
break;
}
for (const auto &pos : reexport_shlib_needs_fixup) {
const auto undef_pos = undefined_name_to_desc.find(pos.second);
if (undef_pos != undefined_name_to_desc.end()) {
const uint8_t dylib_ordinal =
llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
sym[pos.first].SetReExportedSymbolSharedLibrary(
dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
}
}
}
// Count how many trie symbols we'll add to the symbol table
int trie_symbol_table_augment_count = 0;
for (auto &e : external_sym_trie_entries) {
if (symbols_added.find(e.entry.address) == symbols_added.end())
trie_symbol_table_augment_count++;
}
if (num_syms < sym_idx + trie_symbol_table_augment_count) {
num_syms = sym_idx + trie_symbol_table_augment_count;
sym = symtab.Resize(num_syms);
}
uint32_t synthetic_sym_id = symtab_load_command.nsyms;
// Add symbols from the trie to the symbol table.
for (auto &e : external_sym_trie_entries) {
if (symbols_added.contains(e.entry.address))
continue;
// Find the section that this trie address is in, use that to annotate
// symbol type as we add the trie address and name to the symbol table.
Address symbol_addr;
if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
SectionSP symbol_section(symbol_addr.GetSection());
const char *symbol_name = e.entry.name.GetCString();
bool demangled_is_synthesized = false;
SymbolType type =
GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
data_section_sp, data_dirty_section_sp,
data_const_section_sp, symbol_section);
sym[sym_idx].SetType(type);
if (symbol_section) {
sym[sym_idx].SetID(synthetic_sym_id++);
sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
if (demangled_is_synthesized)
sym[sym_idx].SetDemangledNameIsSynthesized(true);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].SetExternal(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
add_symbol_addr(symbol_addr.GetFileAddress());
if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
sym[sym_idx].SetFlags(MACHO_NLIST_ARM_SYMBOL_IS_THUMB);
++sym_idx;
}
}
}
if (function_starts_count > 0) {
uint32_t num_synthetic_function_symbols = 0;
for (i = 0; i < function_starts_count; ++i) {
if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
symbols_added.end())
++num_synthetic_function_symbols;
}
if (num_synthetic_function_symbols > 0) {
if (num_syms < sym_idx + num_synthetic_function_symbols) {
num_syms = sym_idx + num_synthetic_function_symbols;
sym = symtab.Resize(num_syms);
}
for (i = 0; i < function_starts_count; ++i) {
const FunctionStarts::Entry *func_start_entry =
function_starts.GetEntryAtIndex(i);
if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
addr_t symbol_file_addr = func_start_entry->addr;
uint32_t symbol_flags = 0;
if (func_start_entry->data)
symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
Address symbol_addr;
if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
SectionSP symbol_section(symbol_addr.GetSection());
uint32_t symbol_byte_size = 0;
if (symbol_section) {
const addr_t section_file_addr = symbol_section->GetFileAddress();
const FunctionStarts::Entry *next_func_start_entry =
function_starts.FindNextEntry(func_start_entry);
const addr_t section_end_file_addr =
section_file_addr + symbol_section->GetByteSize();
if (next_func_start_entry) {
addr_t next_symbol_file_addr = next_func_start_entry->addr;
if (is_arm)
next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
symbol_byte_size = std::min<lldb::addr_t>(
next_symbol_file_addr - symbol_file_addr,
section_end_file_addr - symbol_file_addr);
} else {
symbol_byte_size = section_end_file_addr - symbol_file_addr;
}
sym[sym_idx].SetID(synthetic_sym_id++);
// Don't set the name for any synthetic symbols, the Symbol
// object will generate one if needed when the name is accessed
// via accessors.
sym[sym_idx].GetMangled().SetDemangledName(ConstString());
sym[sym_idx].SetType(eSymbolTypeCode);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].GetAddressRef() = symbol_addr;
add_symbol_addr(symbol_addr.GetFileAddress());
if (symbol_flags)
sym[sym_idx].SetFlags(symbol_flags);
if (symbol_byte_size)
sym[sym_idx].SetByteSize(symbol_byte_size);
++sym_idx;
}
}
}
}
}
}
// Trim our symbols down to just what we ended up with after removing any
// symbols.
if (sym_idx < num_syms) {
num_syms = sym_idx;
sym = symtab.Resize(num_syms);
}
// Now synthesize indirect symbols
if (m_dysymtab.nindirectsyms != 0) {
if (indirect_symbol_index_data.GetByteSize()) {
NListIndexToSymbolIndexMap::const_iterator end_index_pos =
m_nlist_idx_to_sym_idx.end();
for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
++sect_idx) {
if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
S_SYMBOL_STUBS) {
uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
if (symbol_stub_byte_size == 0)
continue;
const uint32_t num_symbol_stubs =
m_mach_sections[sect_idx].size / symbol_stub_byte_size;
if (num_symbol_stubs == 0)
continue;
const uint32_t symbol_stub_index_offset =
m_mach_sections[sect_idx].reserved1;
for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
const uint32_t symbol_stub_index =
symbol_stub_index_offset + stub_idx;
const lldb::addr_t symbol_stub_addr =
m_mach_sections[sect_idx].addr +
(stub_idx * symbol_stub_byte_size);
lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
symbol_stub_offset, 4)) {
const uint32_t stub_sym_id =
indirect_symbol_index_data.GetU32(&symbol_stub_offset);
if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
continue;
NListIndexToSymbolIndexMap::const_iterator index_pos =
m_nlist_idx_to_sym_idx.find(stub_sym_id);
Symbol *stub_symbol = nullptr;
if (index_pos != end_index_pos) {
// We have a remapping from the original nlist index to a
// current symbol index, so just look this up by index
stub_symbol = symtab.SymbolAtIndex(index_pos->second);
} else {
// We need to lookup a symbol using the original nlist symbol
// index since this index is coming from the S_SYMBOL_STUBS
stub_symbol = symtab.FindSymbolByID(stub_sym_id);
}
if (stub_symbol) {
Address so_addr(symbol_stub_addr, section_list);
if (stub_symbol->GetType() == eSymbolTypeUndefined) {
// Change the external symbol into a trampoline that makes
// sense These symbols were N_UNDF N_EXT, and are useless
// to us, so we can re-use them so we don't have to make up
// a synthetic symbol for no good reason.
if (resolver_addresses.find(symbol_stub_addr) ==
resolver_addresses.end())
stub_symbol->SetType(eSymbolTypeTrampoline);
else
stub_symbol->SetType(eSymbolTypeResolver);
stub_symbol->SetExternal(false);
stub_symbol->GetAddressRef() = so_addr;
stub_symbol->SetByteSize(symbol_stub_byte_size);
} else {
// Make a synthetic symbol to describe the trampoline stub
Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
if (sym_idx >= num_syms) {
sym = symtab.Resize(++num_syms);
stub_symbol = nullptr; // this pointer no longer valid
}
sym[sym_idx].SetID(synthetic_sym_id++);
sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
if (resolver_addresses.find(symbol_stub_addr) ==
resolver_addresses.end())
sym[sym_idx].SetType(eSymbolTypeTrampoline);
else
sym[sym_idx].SetType(eSymbolTypeResolver);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].GetAddressRef() = so_addr;
add_symbol_addr(so_addr.GetFileAddress());
sym[sym_idx].SetByteSize(symbol_stub_byte_size);
++sym_idx;
}
} else {
if (log)
log->Warning("symbol stub referencing symbol table symbol "
"%u that isn't in our minimal symbol table, "
"fix this!!!",
stub_sym_id);
}
}
}
}
}
}
}
if (!reexport_trie_entries.empty()) {
for (const auto &e : reexport_trie_entries) {
if (e.entry.import_name) {
// Only add indirect symbols from the Trie entries if we didn't have
// a N_INDR nlist entry for this already
if (indirect_symbol_names.find(e.entry.name) ==
indirect_symbol_names.end()) {
// Make a synthetic symbol to describe re-exported symbol.
if (sym_idx >= num_syms)
sym = symtab.Resize(++num_syms);
sym[sym_idx].SetID(synthetic_sym_id++);
sym[sym_idx].GetMangled() = Mangled(e.entry.name);
sym[sym_idx].SetType(eSymbolTypeReExported);
sym[sym_idx].SetIsSynthetic(true);
sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
sym[sym_idx].SetReExportedSymbolSharedLibrary(
dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
}
++sym_idx;
}
}
}
}
}