uint32_t emit_instruction_offset_debug_info()

in libredex/DexOutput.cpp [1540:2217]


uint32_t emit_instruction_offset_debug_info(
    DexOutputIdx* dodx,
    PositionMapper* pos_mapper,
    std::vector<CodeItemEmit*>& code_items,
    IODIMetadata& iodi_metadata,
    size_t iodi_layer,
    uint32_t line_addin,
    size_t store_number,
    size_t dex_number,
    uint8_t* output,
    uint32_t offset,
    int* dbgcount,
    std::unordered_map<DexCode*, std::vector<DebugLineItem>>* code_debug_map) {
  // Algo is as follows:
  // 1) Collect method sizes for each method of N params
  // 2) For each arity:
  //   2.1) Determine the biggest methods that we will support (see below)
  //   2.2) Emit one debug program that will emit a position for each pc up to
  //        the size calculated in 2.1
  // 3) Tie all code items back to debug program emitted in (2) and emit
  //    any normal debug info for any methods that can't use IODI (either due
  //    to being too big or being unsupported)

  // 1)
  std::map<uint32_t, DebugMethodMap> param_to_sizes;
  std::unordered_map<const DexMethod*, DebugMetadata> method_to_debug_meta;
  // We need this to calculate the size of normal debug programs for each
  // method. Hopefully no debug program is > 128k. Its ok to increase this
  // in the future.
  constexpr int TMP_SIZE = 128 * 1024;
  uint8_t* tmp = (uint8_t*)malloc(TMP_SIZE);
  std::unordered_map<const DexMethod*, std::vector<const DexMethod*>>
      clustered_methods;
  // Returns whether this is in a cluster, period, not a "current" cluster in
  // this iteration.
  auto is_in_global_cluster = [&](const DexMethod* method) {
    return iodi_metadata.get_cluster(method).size() > 1;
  };
  for (auto& it : code_items) {
    DexCode* dc = it->code;
    const auto dbg_item = dc->get_debug_item();
    redex_assert(dbg_item);
    DexMethod* method = it->method;
    redex_assert(!iodi_metadata.is_huge(method));
    uint32_t param_size = method->get_proto()->get_args()->size();
    // We still want to fill in pos_mapper and code_debug_map, so run the
    // usual code to emit debug info. We cache this and use it later if
    // it turns out we want to emit normal debug info for a given method.
    DebugMetadata metadata =
        calculate_debug_metadata(dbg_item, dc, it->code_item, pos_mapper,
                                 param_size, code_debug_map, line_addin);

    int debug_size =
        emit_debug_info_for_metadata(dodx, metadata, tmp, 0, false);
    always_assert_log(debug_size < TMP_SIZE, "Tmp buffer overrun");
    metadata.size = debug_size;
    const auto dex_size = dc->size();
    metadata.dex_size = dex_size;
    method_to_debug_meta.emplace(method, std::move(metadata));
    if (iodi_metadata.is_huge(method)) {
      continue;
    }
    auto res = param_to_sizes[param_size].emplace(MethodKey{method, dex_size},
                                                  debug_size);
    always_assert_log(res.second, "Failed to insert %s, %d pair", SHOW(method),
                      dc->size());
    if (is_in_global_cluster(method)) {
      clustered_methods[iodi_metadata.get_canonical_method(method)].push_back(
          method);
    }
  }
  free((void*)tmp);

  std20::erase_if(clustered_methods,
                  [](auto& p) { return p.second.size() <= 1; });

  std::vector<const DexMethod*> cluster_induced_order;
  for (const auto& p : clustered_methods) {
    cluster_induced_order.insert(cluster_induced_order.end(), p.second.begin(),
                                 p.second.end());
  }
  std::sort(
      cluster_induced_order.begin(), cluster_induced_order.end(),
      [&method_to_debug_meta](const DexMethod* lhs, const DexMethod* rhs) {
        if (lhs == rhs) {
          return false;
        }
        const auto& lhs_dbg = method_to_debug_meta.at(lhs);
        const auto& rhs_dbg = method_to_debug_meta.at(rhs);

        // Larger debug programs first.
        if (lhs_dbg.size != rhs_dbg.size) {
          return lhs_dbg.size > rhs_dbg.size;
        }

        // More parameters next.
        if (lhs_dbg.num_params != rhs_dbg.num_params) {
          return lhs_dbg.num_params > rhs_dbg.num_params;
        }

        // Some stable order.
        return compare_dexmethods(lhs, rhs);
      });

  ParamSizeOrder pso{method_to_debug_meta, cluster_induced_order,
                     param_to_sizes.begin(), param_to_sizes.end()};

  // 2)
  bool requires_iodi_programs =
      iodi_layer > 0 ||
      (iodi_metadata.layer_mode == IODIMetadata::IODILayerMode::kFull) ||
      (iodi_metadata.layer_mode ==
           IODIMetadata::IODILayerMode::kSkipLayer0AtApi26 &&
       iodi_metadata.min_sdk < 26) ||
      (iodi_metadata.layer_mode ==
           IODIMetadata::IODILayerMode::kAlwaysSkipLayer0ExceptPrimary &&
       store_number == 0 && dex_number == 0);
  std::unordered_map<uint32_t, std::map<uint32_t, uint32_t>> param_size_to_oset;
  uint32_t initial_offset = offset;
  for (int32_t size = pso.next(); size != -1; size = pso.next()) {
    auto param_size = size;
    const auto& dbg_sizes = param_to_sizes.at(size);

    if (dbg_sizes.empty()) {
      // May happen through cluster removal.
      continue;
    }

    // Find clustered methods in this param size.
    std::unordered_map<const DexMethod*, std::vector<MethodKey>>
        clusters_in_sizes;
    for (const auto& p : dbg_sizes) {
      clusters_in_sizes[iodi_metadata.get_canonical_method(p.first.method)]
          .push_back(p.first);
    }
    std20::erase_if(clusters_in_sizes,
                    [](auto& p) { return p.second.size() == 1; });
    size_t combinations = 1;
    for (const auto& p : clusters_in_sizes) {
      combinations *= p.second.size();
    }
    TRACE(IODI, 4, "Cluster combinations=%zu size=%zu", combinations,
          clusters_in_sizes.size());

    // 2.1) We determine the methods to use IODI we go through two filtering
    // phases:
    //   2.1.1) Filter out methods that will cause an OOM in dexlayout on
    //          Android 8+
    //   2.1.2) Filter out methods who increase uncompressed APK size

    // 2.1.1) In Android 8+ there's a background optimizer service that
    // automatically runs dex2oat with a profile collected by the runtime JIT.
    // This background optimizer includes a system called dexlayout that will
    // relocate data in order to improve locality. When relocating data it will
    // inflate debug information into an IR. This inflation currently doesn't
    // properly unique debug information that has already been inflated, and
    // instead reinflates debug information everytime a method references it.
    // Internally this vector is
    // ${number of position entries in D} * ${number of methods referencing D
    // entries long for a given debug program D. Without this filtering we've
    // found that dex2oat will OOM on most devices, resulting in no background
    // optimization (which regressed e.g. startup quite a bit).
    //
    // In order to avoid dex2oat from OOMing we set a hard limit on the
    // inflated size of a given debug program and instead of emitting one
    // single debug program for methods of arity A, we emit multiple debug
    // programs which are bucketed so that the inflated size of any single
    // debug program is smaller than what would be the inflated size of the
    // single mega-program shared by all methods.
    //
    // Max inflated count is 2^21 = 2M. Any bigger and the vector will grow to
    // 2^22 entries, any smaller and the vector will grow but not necessarily
    // be used. For now this has been arbitrarily been chosen.
    static constexpr size_t MAX_INFLATED_SIZE = 2 * 1024 * 1024;
    using Iter = DebugMethodMap::const_iterator;

    // Bucket the set of methods specified by begin, end into appropriately
    // sized buckets.
    // Returns a pair:
    // - A vector of {IODI size, method count} describing each bucket
    // - A size_t reflecting the total inflated footprint using the returned
    //   bucketing
    // If dry_run is specified then no allocations will be done and the vector
    // will be emptied (this is used to query for the total inflation size).
    auto create_buckets = [](Iter begin, Iter end, bool dry_run = false) {
      // In order to understand this algorithm let's first define what
      // the "inflated size" of an debug program is:
      //
      // The inflated size of a debug program D is the number of entries that
      // dex2oat will create in a vector when inflating debug info into IR. This
      // factor is computed as len(D) * ${number of methods using D}.
      //
      // Now, this function splits one large IODI program into multiple in order
      // to reduce the inflated size of each debug program. We must do this so
      // that dex2oat doesn't OOM. The algorithm proceeds as follows:
      //
      // - Define a max bucket size: MAX_BUCKET_INFLATED_SIZE. This is the limit
      //   on the inflated size of any given IODI debug program. We use this to
      //   determine how many buckets will be created.
      // - Since len(D) = max{ len(method) | method uses D } given D a debug
      //   program we can iterate from largest method to smallest, attempting
      //   to add the next smallest program into the current bucket and
      //   otherwise cutting the current bucket off. In pseudo code this is:
      //
      //   for method in methods, partially ordered from largest to smallest:
      //     if method can fit in current bucket:
      //       add method to current bucket
      //     else
      //       close up the current bucket and start a new one for method
      //
      //   There must be a precondition that the current bucket contains at
      //   least one method, otherwise we may run into empty buckets and
      //   silently ignored methods. We can prove that this by induction. First
      //   some terminology:
      //
      //   bucket_n := The nth bucket that has been created, starting at 0
      //   method_i := The ith largest method that's iterated over
      //
      //   Additionally we know that:
      //
      //   inflated_size(bucket_n) = max{ len(M) | M \in bucket_n }
      //                                  * len(bucket_n)
      //   and inflated_size(bucket_n) < MAX_BUCKET_INFLATED_SIZE
      //
      //   To establish the base case let's filter our set of methods to
      //     filtered_methods = { M \in methods
      //                            | len(methods) < MAX_BUCKET_INFLATED_SIZE }
      //   Now we have method_0 \in filtered_methods is such that
      //    len(method_0) < MAX_BUCKET_INFLATED_SIZE
      //   so bucket_0 can at least contain method_0 and thus is non-empty.
      //
      //   For the inductive case fix N to be the index of the current bucket
      //   and I to be the index of a method that cannot fit in the current
      //   bucket, then we know bucket_N is non-empty (by our inductive
      //   hypothesis) and thus, by above \exists M \in bucket_N exists s.t.
      //   len(M) < MAX_BUCKET_INFLATED_SIZE. We know that
      //   len(method_I) <= len(M) because the methods are partially ordered
      //   from largest to smallest and method_I comes after M. Thus we
      //   determine that len(method_I) <= len(M) < MAX_BUCKET_INFLATED_SIZE
      //   and so method_I can fit into bucket_{N+1}.
      //
      // No logic here, just picking 2^{some power} so that vectors don't
      // unnecessarily expand when inflating debug info for the current bucket.
      static constexpr size_t MAX_BUCKET_INFLATED_SIZE = 2 * 2 * 2 * 1024;
      std::vector<std::pair<uint32_t, uint32_t>> result;
      size_t total_inflated_footprint = 0;
      if (begin == end) {
        return std::make_pair(result, total_inflated_footprint);
      }
      uint32_t bucket_size = 0;
      uint32_t bucket_count = 0;
      auto append_bucket = [&](uint32_t size, uint32_t count) {
        total_inflated_footprint += size * count;
        if (!dry_run) {
          result.emplace_back(size, count);
        }
      };
      // To start we need to bucket any method that's too big for its own good
      // into its own bucket (this ensures the buckets calculated below contain
      // at least one entry).
      while (begin != end && begin->first.size > MAX_BUCKET_INFLATED_SIZE) {
        append_bucket(begin->first.size, 1);
        begin++;
      }
      for (auto iter = begin; iter != end; iter++) {
        uint32_t next_size = std::max(bucket_size, iter->first.size);
        uint32_t next_count = bucket_count + 1;
        size_t inflated_footprint = next_size * next_count;
        if (inflated_footprint > MAX_BUCKET_INFLATED_SIZE) {
          always_assert(bucket_size != 0 && bucket_count != 0);
          append_bucket(bucket_size, bucket_count);
          bucket_size = 0;
          bucket_count = 0;
        } else {
          bucket_size = next_size;
          bucket_count = next_count;
        }
      }
      if (bucket_size > 0 && bucket_count > 0) {
        append_bucket(bucket_size, bucket_count);
      }
      return std::make_pair(result, total_inflated_footprint);
    };

    auto compute = [&](const auto& sizes, bool dry_run) -> size_t {
      // The best size for us to start at is initialized as the largest method
      // This iterator will keep track of the smallest method that can use IODI.
      // If it points to end, then no method should use IODI.
      Iter best_iter = sizes.begin();
      Iter end = sizes.end();

      // Re-bucketing removing one method at a time until we've found a set of
      // methods small enough for the given constraints.
      size_t total_inflated_size = 0;
      do {
        total_inflated_size = create_buckets(best_iter, end, true).second;
      } while (total_inflated_size > MAX_INFLATED_SIZE && ++best_iter != end);
      size_t total_ignored = std::distance(sizes.begin(), best_iter);
      if (!dry_run) {
        TRACE(IODI, 3,
              "[IODI] (%u) Ignored %zu methods because they inflated too much",
              param_size, total_ignored);
      }

      // 2.1.2) In order to filter out methods who increase uncompressed APK
      // size we need to understand how IODI gets its win:
      //
      // The win is calculated as the total usual debug info size minus the size
      // of debug info when IODI is enabled. Thus, given a set of methods for
      // which IODI is enabled we have the following formula:
      //
      // win(IODI_methods) = normal_debug_size(all methods)
      //        - (IODI_debug_size(IODI_methods)
      //            + normal_debug_size(all_methods - IODI_methods))
      // where
      //  normal_debug_size(M) = the size of usual debug programs for all m in M
      //  IODI_debug_size(M) =
      //                      -----
      //                      \
      //                       \     max(len(m) + padding | m in M, arity(m) =
      //                       i)
      //                       /
      //                      /
      //                      -----
      //                  i in arities(M)
      //   or, in plain english, add together the size of a debug program for
      //   each arity i. Fixing an arity i, the size is calculated as the max
      //   length of a method with arity i with some constant padding added
      //   (the header of the dbg program)
      //
      // Simplifying the above a bit we get that:
      //
      // win(IM) =
      //          -----
      //          \
      //           \     normal_debug_size({ m in IM | arity(m) = i})
      //           /       - max(len(m) + padding | m in IM, arity(m) = i)
      //          /
      //          -----
      //      i in arities(IM)
      //
      // In order to maximize win we need to determine the best set of methods
      // that should use IODI (i.e. this is a maximization problem of win over
      // IM above). Since the summand above only depends on methods with arity
      // i, we can focus on maximizing the summand alone after fixing i. Thus we
      // need to maximize:
      //
      // win(IM) = normal_debug_size({ m in IM | arity(m) = i})
      //            - max(len(m) + padding | m in IM, arity(m) = i)
      //
      // It's clear that removing any method m s.t. len(m) < max(len(m) ...)
      // will make the overall win smaller, so our only chance is to remove the
      // biggest method. After removing the biggest method, or m_1, we get
      // a win delta of:
      //
      // win_delta_1 = len(m_1) - len(m_2) - normal_debug_size(m_1)
      // where m_2 is the next biggest method.
      //
      // We can continue to calculate more win_deltas if we were to remove the
      // subsequent biggest methods:
      //
      // win_delta_i = len(m_1) - len(m_{i+1})
      //                        - sum(j = 1, j < i, normal_debug_size(m_j))
      // or in other words, the delta of the iodi programs minus the cost of
      // incorporating all the normal debug programs up to i.
      //
      // Since there is no regularity condition on normal_debug_size(m) the
      // max of win_delta_i may occur for any i (indeed there may be an esoteric
      // case where all the debug programs are tiny but all the methods are
      // pretty large and thus it's best to not use any IODI programs).
      //
      // Note, the above assumes win(IM) > 0 at some point, but that may not be
      // true. In order to verify that using IODI is useful we need to verify
      // that win(IM) > 0 for whatever maximal IM is found was found above.
      auto iter = best_iter;
      // This is len(m_1) from above
      uint64_t base_iodi_size = iter->first.size;
      // This is that final sum in win_delta_i. It starts with just the debug
      // cost of m_1.
      uint64_t total_normal_dbg_cost = iter->second;
      // This keeps track of the best win delta. By default the delta is 0 (we
      // can always make everything use iodi)
      int64_t max_win_delta = 0;

      if (requires_iodi_programs) {
        for (iter = std::next(iter); iter != end; iter++) {
          uint64_t iodi_size = iter->first.size;
          // This is calculated as:
          //   "how much do we save by using a smaller iodi program after
          //    removing the cost of not using an iodi program for the larger
          //    methods"
          int64_t win_delta =
              (base_iodi_size - iodi_size) - total_normal_dbg_cost;
          // If it's as good as the win then we use it because we want to make
          // as small debug programs as possible due to dex2oat
          if (win_delta >= max_win_delta) {
            max_win_delta = win_delta;
            best_iter = iter;
          }
          total_normal_dbg_cost += iter->second;
        }
      }

      size_t insns_size = best_iter != end ? best_iter->first.size : 0;
      size_t padding = 1 + 1 + param_size + 1;
      if (param_size >= 128) {
        padding += 1;
        if (param_size >= 16384) {
          padding += 1;
        }
      }
      auto iodi_size = insns_size + padding;

      if (requires_iodi_programs) {
        if (total_normal_dbg_cost < iodi_size) {
          // If using IODI period isn't valuable then don't use it!
          best_iter = end;
          if (!dry_run) {
            TRACE(IODI, 3,
                  "[IODI] Opting out of IODI for %u arity methods entirely",
                  param_size);
          }
        }
      }

      // Now we've found which methods are too large to be beneficial. Tell IODI
      // infra about these large methods
      size_t num_big = 0;
      assert(sizes.begin() == best_iter || requires_iodi_programs);
      for (auto big = sizes.begin(); big != best_iter; big++) {
        if (!dry_run) {
          iodi_metadata.mark_method_huge(big->first.method);
          TRACE(IODI, 3,
                "[IODI] %s is too large to benefit from IODI: %u vs %u",
                SHOW(big->first.method), big->first.size, big->second);
        }
        num_big += 1;
      }

      size_t num_small_enough = sizes.size() - num_big;
      if (dry_run) {
        size_t sum = 0;
        for (auto it = sizes.begin(); it != best_iter; ++it) {
          sum += it->second;
        }
        // Does not include bucketing, but good enough.
        sum += num_small_enough * iodi_size;
        return sum;
      }

      // 2.2) Emit IODI programs (other debug programs will be emitted below)
      if (requires_iodi_programs) {
        TRACE(IODI, 2,
              "[IODI] @%u(%u): Of %zu methods %zu were too big, %zu at biggest "
              "%zu",
              offset, param_size, sizes.size(), num_big, num_small_enough,
              insns_size);
        if (num_small_enough == 0) {
          return 0;
        }
        auto bucket_res = create_buckets(best_iter, end);
        auto& buckets = bucket_res.first;
        total_inflated_size = bucket_res.second;
        TRACE(IODI, 3,
              "[IODI][Buckets] Bucketed %u arity methods into %zu buckets with "
              "total"
              " inflated size %zu:\n",
              param_size, buckets.size(), total_inflated_size);
        auto& size_to_offset = param_size_to_oset[param_size];
        for (auto& bucket : buckets) {
          auto bucket_size = bucket.first;
          TRACE(IODI, 3, "  - %u methods in bucket size %u @ %u", bucket.second,
                bucket_size, offset);
          size_to_offset.emplace(bucket_size, offset);
          std::vector<std::unique_ptr<DexDebugInstruction>> dbgops;
          if (bucket_size > 0) {
            // First emit an entry for pc = 0 -> line = start
            dbgops.push_back(DexDebugInstruction::create_line_entry(0, 0));
            // Now emit an entry for each pc thereafter
            // (0x1e increments addr+line by 1)
            for (size_t i = 1; i < bucket_size; i++) {
              dbgops.push_back(DexDebugInstruction::create_line_entry(1, 1));
            }
          }
          offset += DexDebugItem::encode(nullptr, output + offset, line_addin,
                                         param_size, dbgops);
          *dbgcount += 1;
        }
      }

      if (traceEnabled(IODI, 4)) {
        double ammortized_cost = 0;
        if (requires_iodi_programs) {
          ammortized_cost = (double)iodi_size / (double)num_small_enough;
        }
        for (auto it = best_iter; it != end; it++) {
          TRACE(IODI, 4,
                "[IODI][savings] %s saved %u bytes (%u), cost of %f, net %f",
                SHOW(it->first.method), it->second, it->first.size,
                ammortized_cost, (double)it->second - ammortized_cost);
        }
      }

      return 0;
    };
    auto mark_clusters_as_skip = [&](const auto& sizes) {
      // Mark methods in clusters as skip and remove them from param_to_sizes.
      for (const auto& p : sizes) {
        const auto* emitted_method = p.first.method;
        const auto* canonical =
            iodi_metadata.get_canonical_method(emitted_method);
        auto cluster_it = clustered_methods.find(canonical);
        if (cluster_it == clustered_methods.end()) {
          continue;
        }

        for (const auto* m : cluster_it->second) {
          if (m != emitted_method) {
            pso.skip(m);
            TRACE(IODI, 4, "Skipping %s for %s", SHOW(m), SHOW(emitted_method));
            auto& m_dbg = method_to_debug_meta.at(m);
            auto& param_methods = param_to_sizes.at(m_dbg.num_params);
            auto param_it = param_methods.find(MethodKey{m, m_dbg.dex_size});
            if (param_it != param_methods.end()) {
              param_methods.erase(param_it);
            }
          }
        }
      }
    };
    if (combinations == 1) {
      compute(dbg_sizes, /*dry_run=*/false);
      mark_clusters_as_skip(dbg_sizes);
    } else {
      auto sizes_wo_clusters = dbg_sizes;
      size_t max_cluster_len{0};
      size_t sum_cluster_sizes{0};
      for (auto& p : clusters_in_sizes) {
        for (const auto& k : p.second) {
          sizes_wo_clusters.erase(k);
        }
        std::sort(p.second.begin(), p.second.end(), MethodKeyCompare());
        max_cluster_len = std::max(max_cluster_len, p.second.size());
        for (const auto& k : p.second) {
          sum_cluster_sizes += dbg_sizes.at(k);
        }
      }
      TRACE(IODI, 3, "max_cluster_len=%zu sum_cluster_sizes=%zu",
            max_cluster_len, sum_cluster_sizes);

      // Very simple heuristic, "walk" in lock-step, do not try all combinations
      // (too expensive).
      size_t best_iter{0};
      size_t best_size{0};

      auto add_iteration = [&dbg_sizes, &clusters_in_sizes,
                            max_cluster_len](auto& cur_sizes, size_t iter) {
        size_t added_sizes{0};
        for (const auto& p : clusters_in_sizes) {
          size_t p_idx = p.second.size() -
                         std::min(p.second.size(), max_cluster_len - iter);
          const auto& k = p.second[p_idx];
          auto k_size = dbg_sizes.at(k);
          cur_sizes[k] = k_size;
          added_sizes += k_size;
        }
        return added_sizes;
      };

      for (size_t iter = 0; iter != max_cluster_len; ++iter) {
        auto cur_sizes = sizes_wo_clusters;
        auto added_sizes = add_iteration(cur_sizes, iter);

        auto out_size = compute(cur_sizes, /*dry_run=*/true) +
                        (sum_cluster_sizes - added_sizes);
        TRACE(IODI, 3,
              "Iteration %zu: added_sizes=%zu out_size=%zu extra_size=%zu",
              iter, added_sizes, out_size, sum_cluster_sizes - added_sizes);
        if (iter == 0) {
          best_size = out_size;
        } else if (out_size < best_size) {
          best_size = out_size;
          best_iter = iter;
        }
      }

      TRACE(IODI, 3, "Best iteration %zu (%zu)", best_iter, best_size);
      auto cur_sizes = sizes_wo_clusters;
      add_iteration(cur_sizes, best_iter);
      compute(cur_sizes, /*dry_run=*/false);
      mark_clusters_as_skip(cur_sizes);

      // Mark other cluster methods as skips.
      for (const auto& p : clusters_in_sizes) {
        size_t p_idx = p.second.size() -
                       std::min(p.second.size(), max_cluster_len - best_iter);
        for (size_t i = 0; i != p.second.size(); ++i) {
          if (i == p_idx) {
            continue;
          }
          pso.skip(p.second[i].method);
        }
      }
    }
  }

  auto post_iodi_offset = offset;
  TRACE(IODI, 2, "[IODI] IODI programs took up %d bytes\n",
        post_iodi_offset - initial_offset);
  // 3)
  auto size_offset_end = param_size_to_oset.end();
  std::unordered_set<const DexMethod*> to_remove;
  for (auto& it : code_items) {
    if (pso.skip_methods.count(it->method)) {
      continue;
    }

    DexCode* dc = it->code;
    const auto dbg = dc->get_debug_item();
    redex_assert(dbg != nullptr);
    auto code_size = dc->size();
    redex_assert(code_size != 0);
    // If a method is too big then it's been marked as so internally, so this
    // will return false.
    DexMethod* method = it->method;
    if (!iodi_metadata.is_huge(method)) {
      iodi_metadata.set_iodi_layer(method, iodi_layer);
      TRACE(IODI, 3, "Emitting %s as IODI", SHOW(method));
      if (requires_iodi_programs) {
        // Here we sanity check to make sure that all IODI programs are at least
        // as long as they need to be.
        uint32_t param_size = it->method->get_proto()->get_args()->size();
        auto size_offset_it = param_size_to_oset.find(param_size);
        always_assert_log(size_offset_it != size_offset_end,
                          "Expected to find param to offset: %s", SHOW(method));
        auto& size_to_offset = size_offset_it->second;
        // Returns first key >= code_size or end if such an entry doesn't exist.
        // Aka first debug program long enough to represent a method of size
        // code_size.
        auto offset_it = size_to_offset.lower_bound(code_size);
        auto offset_end = size_to_offset.end();
        always_assert_log(offset_it != offset_end,
                          "Expected IODI program to be big enough for %s : %u",
                          SHOW(method), code_size);
        it->code_item->debug_info_off = offset_it->second;
      } else {
        it->code_item->debug_info_off = 0;
      }
    } else {
      TRACE(IODI, 3, "Emitting %s as non-IODI", SHOW(method));
      // Recompute the debug data with no line add-in if not in a cluster.
      // TODO: If a whole cluster does not have IODI, we should emit base
      //       versions for all of them.
      DebugMetadata no_line_addin_metadata;
      const DebugMetadata* metadata = &method_to_debug_meta.at(method);
      if (!is_in_global_cluster(method) && line_addin != 0) {
        no_line_addin_metadata =
            calculate_debug_metadata(dbg, dc, it->code_item, pos_mapper,
                                     metadata->num_params, code_debug_map,
                                     /*line_addin=*/0);
        metadata = &no_line_addin_metadata;
      }
      offset +=
          emit_debug_info_for_metadata(dodx, *metadata, output, offset, true);
      *dbgcount += 1;
    }
    to_remove.insert(method);
  }
  code_items.erase(std::remove_if(code_items.begin(), code_items.end(),
                                  [&to_remove](const CodeItemEmit* cie) {
                                    return to_remove.count(cie->method) > 0;
                                  }),
                   code_items.end());
  TRACE(IODI, 2, "[IODI] Non-IODI programs took up %d bytes\n",
        offset - post_iodi_offset);
  // Return how much data we've encoded
  return offset - initial_offset;
}