size_t compute_locations_closure()

in libredex/Purity.cpp [440:695]


size_t compute_locations_closure(
    const Scope& scope,
    const method_override_graph::Graph* method_override_graph,
    std::function<boost::optional<LocationsAndDependencies>(DexMethod*)>
        init_func,
    std::unordered_map<const DexMethod*, CseUnorderedLocationSet>* result,
    const purity::CacheConfig& cache_config) {
  // 1. Let's initialize known method read locations and dependencies by
  //    scanning method bodies
  ConcurrentMap<const DexMethod*, LocationsAndDependencies>
      concurrent_method_lads;
  walk::parallel::methods(scope, [&](DexMethod* method) {
    auto lads = init_func(method);
    if (lads) {
      concurrent_method_lads.emplace(method, std::move(*lads));
    }
  });

  std::unordered_map<const DexMethod*, LocationsAndDependencies> method_lads;
  for (auto& p : concurrent_method_lads) {
    method_lads.insert(std::move(p));
  }

  // 2. Compute inverse dependencies so that we know what needs to be recomputed
  // during the fixpoint computation, and determine set of methods that are
  // initially "impacted" in the sense that they have dependencies.
  std::unordered_map<const DexMethod*, std::unordered_set<const DexMethod*>>
      inverse_dependencies;
  std::unordered_set<const DexMethod*> impacted_methods;
  for (const auto& p : method_lads) {
    auto method = p.first;
    auto& lads = p.second;
    if (!lads.dependencies.empty()) {
      for (auto d : lads.dependencies) {
        inverse_dependencies[d].insert(method);
      }
      impacted_methods.insert(method);
    }
  }

  // 3. Let's try to (semantically) inline locations, computing a fixed
  //    point. Methods for which information is directly or indirectly absent
  //    are equivalent to a general memory barrier, and are systematically
  //    pruned.

  // TODO: Instead of custom fixpoint computation using WTO, consider using the
  // MonotonicFixpointIterator, operating on a callgraph, capture the
  // dependencies, and have the Locations as the abstract domain.

  size_t iterations = 0;
  while (!impacted_methods.empty()) {
    iterations++;
    Timer t{std::string("Iteration ") + std::to_string(iterations)};

    // We order the impacted methods in a deterministic way that's likely
    // helping to reduce the number of needed iterations.
    std::vector<const DexMethod*> ordered_impacted_methods;

    {
      constexpr bool kCacheDebug = false;

      struct Cache {
        struct CacheData {
          size_t runs{0};
          size_t size_if_populated{0};
          std::vector<const DexMethod*> data;
        };
        std::unordered_map<const DexMethod*, CacheData> cache;
        size_t sum_entries{0};
        size_t pop_miss{0};

        const size_t cache_max_entries;
        const size_t cache_fill_entry_threshold;
        const size_t cache_fill_size_threshold;

        explicit Cache(const purity::CacheConfig& config)
            : cache_max_entries(config.max_entries),
              cache_fill_entry_threshold(config.fill_entry_threshold),
              cache_fill_size_threshold(config.fill_size_threshold) {}

        boost::optional<std::vector<const DexMethod*>> get(const DexMethod* m) {
          auto it = cache.find(m);
          if (it == cache.end()) {
            return boost::none;
          }
          redex_assert(it->second.runs > 0);
          if (it->second.runs < cache_fill_entry_threshold) {
            return boost::none;
          }
          ++it->second.runs;
          if (it->second.size_if_populated > 0) {
            // We did not have space.
            pop_miss++;
            return boost::none;
          }
          return it->second.data;
        }

        void new_data(const DexMethod* m,
                      const std::vector<const DexMethod*>& data) {
          CacheData& cd = cache[m];
          cd.runs++;
          if (cd.runs < cache_fill_entry_threshold) {
            return; // Do not cache, yet.
          }
          size_t size = data.size();
          if (size >= cache_fill_size_threshold &&
              sum_entries + size <= cache_max_entries) {
            redex_assert(cd.size_if_populated == 0);
            cd.data = data;
            sum_entries += size;
          } else {
            redex_assert(cd.size_if_populated == 0 ||
                         cd.size_if_populated == size);
            cd.size_if_populated = size;
          }
        }

        void print_stats() const {
          std::ostringstream oss;

          size_t sum_not_pop{0};
          size_t max_not_pop{0};
          size_t max_pop{0};
          size_t max_not_pop_runs{0};
          size_t max_pop_runs{0};
          size_t below_size_thresh{0};
          size_t runs_below_size_thresh{0};
          for (const auto& p : cache) {
            sum_not_pop += p.second.size_if_populated;
            max_not_pop = std::max(max_not_pop, p.second.size_if_populated);
            if (p.second.size_if_populated > 0) {
              max_not_pop_runs = std::max(max_not_pop_runs, p.second.runs);
              if (p.second.size_if_populated < cache_fill_size_threshold) {
                ++below_size_thresh;
                runs_below_size_thresh += p.second.runs;
              }
            } else {
              max_pop_runs = std::max(max_pop_runs, p.second.runs);
              max_pop = std::max(max_pop, p.second.data.size());
            }
          }
          oss << "  In=" << sum_entries << " Miss=" << pop_miss << std::endl;
          oss << "  CACHED: Max=" << max_pop << " Runs=" << max_pop_runs
              << std::endl;
          oss << " NCACHED: Sum=" << sum_not_pop << " Max=" << max_not_pop
              << " Runs=" << max_not_pop_runs << std::endl;
          oss << " NCACHED: #short=" << below_size_thresh
              << " Runs=" << runs_below_size_thresh;
          TRACE(PURITY, 1, "%s", oss.str().c_str());
        }
      };

      Cache cache(cache_config);
      sparta::WeakTopologicalOrdering<const DexMethod*> wto(
          nullptr,
          [&impacted_methods, &inverse_dependencies,
           &cache](const DexMethod* const& m) {
            auto cached = cache.get(m);
            if (cached && !kCacheDebug) {
              return std::move(*cached);
            }

            std::vector<const DexMethod*> successors;
            if (m == nullptr) {
              std::copy(impacted_methods.begin(), impacted_methods.end(),
                        std::back_inserter(successors));
            } else {
              auto it = inverse_dependencies.find(m);
              if (it != inverse_dependencies.end()) {
                for (auto n : it->second) {
                  if (impacted_methods.count(n)) {
                    successors.push_back(n);
                  }
                }
              }
            }
            // Make number of iterations deterministic
            std::sort(successors.begin(), successors.end(), compare_dexmethods);

            if (kCacheDebug && cached) {
              redex_assert(*cached == successors);
            }

            cache.new_data(m, successors);
            return successors;
          });

      if (traceEnabled(PURITY, 5)) {
        cache.print_stats();
      }

      wto.visit_depth_first([&ordered_impacted_methods](const DexMethod* m) {
        if (m) {
          ordered_impacted_methods.push_back(m);
        }
      });
      impacted_methods.clear();
    }

    std::vector<const DexMethod*> changed_methods;
    for (const DexMethod* method : ordered_impacted_methods) {
      auto& lads = method_lads.at(method);
      bool unknown = false;
      size_t lads_locations_size = lads.locations.size();
      for (const DexMethod* d : lads.dependencies) {
        if (d == method) {
          continue;
        }
        auto it = method_lads.find(d);
        if (it == method_lads.end()) {
          unknown = true;
          break;
        }
        const auto& other_locations = it->second.locations;
        lads.locations.insert(other_locations.begin(), other_locations.end());
      }
      if (unknown || lads_locations_size < lads.locations.size()) {
        // something changed
        changed_methods.push_back(method);
        if (unknown) {
          method_lads.erase(method);
        }
      }
    }

    // Given set of changed methods, determine set of dependents for which
    // we need to re-run the analysis in another iteration.
    for (auto changed_method : changed_methods) {
      auto idit = inverse_dependencies.find(changed_method);
      if (idit == inverse_dependencies.end()) {
        continue;
      }

      // remove inverse dependency entries as appropriate
      auto& entries = idit->second;
      std20::erase_if(entries, [&](auto* m) { return !method_lads.count(m); });

      if (entries.empty()) {
        // remove inverse dependency
        inverse_dependencies.erase(changed_method);
      } else {
        // add inverse dependencies entries to impacted methods
        impacted_methods.insert(entries.begin(), entries.end());
      }
    }
  }

  // For all methods which have a known set of locations at this point,
  // persist that information
  for (auto& p : method_lads) {
    result->emplace(p.first, std::move(p.second.locations));
  }

  return iterations;
}