void DexLoader::gather_input_stats()

in libredex/DexLoader.cpp [65:448]


void DexLoader::gather_input_stats(dex_stats_t* stats, const dex_header* dh) {
  if (!stats) {
    return;
  }
  stats->num_types += dh->type_ids_size;
  stats->num_classes += dh->class_defs_size;
  stats->num_method_refs += dh->method_ids_size;
  stats->num_field_refs += dh->field_ids_size;
  stats->num_strings += dh->string_ids_size;
  stats->num_protos += dh->proto_ids_size;
  stats->num_bytes += dh->file_size;
  // T58562665: TODO - actually update states for callsites/methodhandles
  stats->num_callsites += 0;
  stats->num_methodhandles += 0;

  std::unordered_set<DexEncodedValueArray, boost::hash<DexEncodedValueArray>>
      enc_arrays;
  std::set<DexTypeList*, dextypelists_comparator> type_lists;
  std::unordered_set<uint32_t> anno_offsets;

  for (uint32_t cidx = 0; cidx < dh->class_defs_size; ++cidx) {
    auto* clz = m_classes->at(cidx);
    if (clz == nullptr) {
      // Skip nulls, they may have been introduced by benign duplicate classes
      continue;
    }
    auto* class_def = &m_class_defs[cidx];
    auto anno_off = class_def->annotations_off;
    if (anno_off) {
      const dex_annotations_directory_item* anno_dir =
          (const dex_annotations_directory_item*)m_idx->get_uint_data(anno_off);
      auto class_anno_off = anno_dir->class_annotations_off;
      if (class_anno_off) {
        const uint32_t* anno_data = m_idx->get_uint_data(class_anno_off);
        uint32_t count = *anno_data++;
        for (uint32_t aidx = 0; aidx < count; ++aidx) {
          anno_offsets.insert(anno_data[aidx]);
        }
      }
      const uint32_t* anno_data = (uint32_t*)(anno_dir + 1);
      for (uint32_t fidx = 0; fidx < anno_dir->fields_size; ++fidx) {
        anno_data++;
        anno_offsets.insert(*anno_data++);
      }
      for (uint32_t midx = 0; midx < anno_dir->methods_size; ++midx) {
        anno_data++;
        anno_offsets.insert(*anno_data++);
      }
      for (uint32_t pidx = 0; pidx < anno_dir->parameters_size; ++pidx) {
        anno_data++;
        uint32_t xrefoff = *anno_data++;
        if (xrefoff != 0) {
          const uint32_t* annoxref = m_idx->get_uint_data(xrefoff);
          uint32_t count = *annoxref++;
          for (uint32_t j = 0; j < count; j++) {
            uint32_t off = annoxref[j];
            anno_offsets.insert(off);
          }
        }
      }
    }
    auto* interfaces_type_list = clz->get_interfaces();
    type_lists.insert(interfaces_type_list);
    auto deva = clz->get_static_values();
    if (deva) {
      if (!enc_arrays.count(*deva)) {
        enc_arrays.emplace(std::move(*deva));
        stats->num_static_values++;
      }
    }
    stats->num_fields += clz->get_ifields().size() + clz->get_sfields().size();
    stats->num_methods +=
        clz->get_vmethods().size() + clz->get_dmethods().size();
    for (auto* meth : clz->get_vmethods()) {
      DexCode* code = meth->get_dex_code();
      if (code) {
        stats->num_instructions += code->get_instructions().size();
      }
    }
    for (auto* meth : clz->get_dmethods()) {
      DexCode* code = meth->get_dex_code();
      if (code) {
        stats->num_instructions += code->get_instructions().size();
      }
    }
  }
  for (uint32_t meth_idx = 0; meth_idx < dh->method_ids_size; ++meth_idx) {
    auto* meth = m_idx->get_methodidx(meth_idx);
    DexProto* proto = meth->get_proto();
    type_lists.insert(proto->get_args());
  }
  stats->num_annotations += anno_offsets.size();
  stats->num_type_lists += type_lists.size();

  for (uint32_t sidx = 0; sidx < dh->string_ids_size; ++sidx) {
    auto str = m_idx->get_stringidx(sidx);
    stats->strings_total_size += str->get_entry_size();
  }

  const dex_map_list* map_list =
      reinterpret_cast<const dex_map_list*>(m_file->const_data() + dh->map_off);
  bool header_seen = false;
  uint32_t header_index = 0;
  for (uint32_t i = 0; i < map_list->size; i++) {
    const auto& item = map_list->items[i];

    const uint8_t* encdata = m_idx->get_uleb_data(item.offset);
    const uint8_t* initial_encdata = encdata;

    switch (item.type) {
    case TYPE_HEADER_ITEM:
      always_assert_log(
          !header_seen,
          "Expected header_item to be unique in the map_list, "
          "but encountered one at index i=%u and another at index j=%u.",
          header_index,
          i);
      header_seen = true;
      header_index = i;
      always_assert_log(1 == item.size,
                        "Expected count of header_items in the map_list to be "
                        "exactly 1, but got ct=%u.",
                        item.size);
      stats->header_item_count += item.size;
      stats->header_item_bytes += item.size * sizeof(dex_header);
      break;
    case TYPE_STRING_ID_ITEM:
      stats->string_id_count += item.size;
      stats->string_id_bytes += item.size * sizeof(dex_string_id);
      break;
    case TYPE_TYPE_ID_ITEM:
      stats->type_id_count += item.size;
      stats->type_id_bytes += item.size * sizeof(dex_type_id);
      break;
    case TYPE_PROTO_ID_ITEM:
      stats->proto_id_count += item.size;
      stats->proto_id_bytes += item.size * sizeof(dex_proto_id);
      break;
    case TYPE_FIELD_ID_ITEM:
      stats->field_id_count += item.size;
      stats->field_id_bytes += item.size * sizeof(dex_field_id);
      break;
    case TYPE_METHOD_ID_ITEM:
      stats->method_id_count += item.size;
      stats->method_id_bytes += item.size * sizeof(dex_method_id);
      break;
    case TYPE_CLASS_DEF_ITEM:
      stats->class_def_count += item.size;
      stats->class_def_bytes += item.size * sizeof(dex_class_def);
      break;
    case TYPE_CALL_SITE_ID_ITEM:
      stats->call_site_id_count += item.size;
      stats->call_site_id_bytes += item.size * sizeof(dex_callsite_id);
      break;
    case TYPE_METHOD_HANDLE_ITEM:
      stats->method_handle_count += item.size;
      stats->method_handle_bytes += item.size * sizeof(dex_methodhandle_id);
      break;
    case TYPE_MAP_LIST:
      stats->map_list_count += item.size;
      for (uint32_t j = 0; j < item.size; j++) {
        encdata = align_ptr(encdata, 4);

        uint32_t map_list_entries = *(uint32_t*)(encdata);
        stats->map_list_bytes +=
            sizeof(uint32_t) + map_list_entries * sizeof(dex_map_item);
      }
      break;
    case TYPE_TYPE_LIST:
      stats->type_list_count += item.size;
      for (uint32_t j = 0; j < item.size; j++) {
        encdata = align_ptr(encdata, 4);

        uint32_t type_list_entries = *(uint32_t*)(encdata);
        stats->type_list_bytes +=
            sizeof(uint32_t) + type_list_entries * sizeof(dex_type_item);
      }
      break;
    case TYPE_ANNOTATION_SET_REF_LIST:
      stats->annotation_set_ref_list_count += item.size;
      for (uint32_t j = 0; j < item.size; j++) {
        encdata = align_ptr(encdata, 4);

        uint32_t annotation_set_ref_list_entries = *(uint32_t*)(encdata);
        stats->annotation_set_ref_list_bytes +=
            sizeof(uint32_t) + annotation_set_ref_list_entries *
                                   sizeof(dex_annotation_set_ref_item);
      }
      break;
    case TYPE_ANNOTATION_SET_ITEM:
      stats->annotation_set_count += item.size;
      for (uint32_t j = 0; j < item.size; j++) {
        encdata = align_ptr(encdata, 4);

        uint32_t annotation_set_entries = *(uint32_t*)(encdata);
        stats->annotation_set_bytes +=
            sizeof(uint32_t) +
            annotation_set_entries * sizeof(dex_annotation_off_item);
      }
      break;
    case TYPE_CLASS_DATA_ITEM:
      stats->class_data_count += item.size;

      for (uint32_t j = 0; j < item.size; j++) {
        // Read in field sizes.
        uint32_t static_fields_size = read_uleb128(&encdata);
        uint32_t instance_fields_size = read_uleb128(&encdata);
        uint32_t direct_methods_size = read_uleb128(&encdata);
        uint32_t virtual_methods_size = read_uleb128(&encdata);

        for (uint32_t k = 0; k < static_fields_size + instance_fields_size;
             ++k) {
          // Read and skip all of the encoded_field data.
          read_uleb128(&encdata);
          read_uleb128(&encdata);
        }

        for (uint32_t k = 0; k < direct_methods_size + virtual_methods_size;
             ++k) {
          // Read and skip all of the encoded_method data.
          read_uleb128(&encdata);
          read_uleb128(&encdata);
          read_uleb128(&encdata);
        }
      }

      stats->class_data_bytes += encdata - initial_encdata;
      break;
    case TYPE_CODE_ITEM:
      stats->code_count += item.size;

      for (uint32_t j = 0; j < item.size; j++) {
        encdata = align_ptr(encdata, 4);

        dex_code_item* code_item = (dex_code_item*)encdata;

        encdata += sizeof(dex_code_item);
        encdata += code_item->insns_size * sizeof(uint16_t);

        if (code_item->tries_size != 0 && code_item->insns_size % 2 == 1) {
          encdata += sizeof(uint16_t);
        }

        encdata += code_item->tries_size * sizeof(dex_tries_item);

        if (code_item->tries_size != 0) {
          uint32_t catch_handler_list_size = read_uleb128(&encdata);
          for (uint32_t k = 0; k < catch_handler_list_size; ++k) {
            int32_t catch_handler_size = read_sleb128(&encdata);
            uint32_t abs_size = (uint32_t)std::abs(catch_handler_size);
            for (uint32_t l = 0; l < abs_size; ++l) {
              // Read encoded_type_addr_pair.
              read_uleb128(&encdata);
              read_uleb128(&encdata);
            }
            // Read catch_all_addr
            if (catch_handler_size <= 0) {
              read_uleb128(&encdata);
            }
          }
        }
      }
      stats->code_bytes += encdata - initial_encdata;
      break;
    case TYPE_STRING_DATA_ITEM:
      stats->string_data_count += item.size;

      for (uint32_t j = 0; j < item.size; j++) {
        // Skip data that encodes the number of UTF-16 code units.
        read_uleb128(&encdata);

        // Read up to and including the NULL-terminating byte.
        while (true) {
          const uint8_t byte = *encdata;
          encdata++;
          if (byte == 0) break;
        }
      }

      stats->string_data_bytes += encdata - initial_encdata;
      break;
    case TYPE_DEBUG_INFO_ITEM:
      stats->num_dbg_items += item.size;
      for (uint32_t j = 0; j < item.size; j++) {
        // line_start
        read_uleb128(&encdata);
        // param_count
        uint32_t param_count = read_uleb128(&encdata);
        while (param_count--) {
          // Each parameter is one uleb128p1
          read_uleb128p1(&encdata);
        }
        bool running = true;
        while (running) {
          uint8_t opcode = *encdata++;
          switch (opcode) {
          case DBG_END_SEQUENCE:
            running = false;
            break;
          case DBG_ADVANCE_PC:
          case DBG_END_LOCAL:
          case DBG_RESTART_LOCAL:
            // each of these opcodes has one uleb128 arg:
            // - addr_diff
            // - register_num
            // - register_num
            read_uleb128(&encdata);
            break;
          case DBG_ADVANCE_LINE:
            // line_diff
            read_sleb128(&encdata);
            break;
          case DBG_START_LOCAL:
            // register_num
            read_uleb128(&encdata);
            // name_idx
            read_uleb128p1(&encdata);
            // type_idx
            read_uleb128p1(&encdata);
            break;
          case DBG_START_LOCAL_EXTENDED:
            // register_num
            read_uleb128(&encdata);
            // name_idx
            read_uleb128p1(&encdata);
            // type_idx
            read_uleb128p1(&encdata);
            // sig_idx
            read_uleb128p1(&encdata);
            break;
          case DBG_SET_FILE:
            // name_idx
            read_uleb128p1(&encdata);
            break;
          case DBG_SET_PROLOGUE_END:
          case DBG_SET_EPILOGUE_BEGIN:
            // These cases have no args
            break;
          default:
            // These are special opcodes. We separate them out to the default
            // case to show we're properly interpretting this program.
            break;
          }
        }
      }
      stats->dbg_total_size += encdata - initial_encdata;
      break;
    case TYPE_ANNOTATION_ITEM:
      // TBD!
      break;
    case TYPE_ENCODED_ARRAY_ITEM:
      // TBD!
      break;
    case TYPE_ANNOTATIONS_DIR_ITEM:
      stats->annotations_directory_count += item.size;

      for (uint32_t j = 0; j < item.size; ++j) {
        encdata = align_ptr(encdata, 4);
        dex_annotations_directory_item* annotations_directory_item =
            (dex_annotations_directory_item*)encdata;

        encdata += sizeof(dex_annotations_directory_item);
        encdata += sizeof(dex_field_annotation) *
                   annotations_directory_item->fields_size;
        encdata += sizeof(dex_method_annotation) *
                   annotations_directory_item->methods_size;
        encdata += sizeof(dex_parameter_annotation) *
                   annotations_directory_item->parameters_size;
      }

      stats->annotations_directory_bytes += encdata - initial_encdata;
      break;
    case TYPE_HIDDENAPI_CLASS_DATA_ITEM:
      // No stats gathered.
      break;
    default:
      fprintf(
          stderr,
          "warning: map_list item at index i=%u is of unknown type T=0x%04hX\n",
          i,
          item.type);
    }
  }
}