service/class-merging/Model.h (265 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <boost/optional.hpp>
#include <iosfwd>
#include <json/value.h>
#include "ApproximateShapeMerging.h"
#include "DexClass.h"
#include "MergerType.h"
#include "MergingStrategies.h"
#include "PassManager.h"
#include "Trace.h"
#include "TypeSystem.h"
struct ConfigFiles;
class PassManager;
class RefChecker;
using ConstTypeHashSet = std::unordered_set<const DexType*>;
namespace class_merging {
using TypeToTypeSet = std::unordered_map<const DexType*, TypeSet>;
using TypeGroupByDex = std::vector<std::pair<boost::optional<size_t>, TypeSet>>;
enum InterDexGroupingType {
DISABLED = 0, // No interdex grouping.
NON_HOT_SET = 1, // Exclude hot set.
NON_ORDERED_SET = 2, // Exclude all ordered set.
FULL = 3, // Apply interdex grouping on the entire input.
};
enum TypeTagConfig {
// No type tags exist in the input hierarchy. No type tags need to be
// generated by Redex.
// We don't support operations that require the original type identity in this
// option.
NONE = 0,
// No type tags in the input hierarchy. Redex generates the type tags and
// fully handles the logic around type tags.
GENERATE = 1,
// The input hierarchy has type tags emitted. Redex handles the type tag value
// passing for the merged ctors.
INPUT_PASS_TYPE_TAG_TO_CTOR = 2,
// The input hierarchy has type tags emitted. It also fully handles the type
// tag logic including ctor value passing.
INPUT_HANDLED = 3,
};
/**
* A class hierarchy specification to model for erasure.
* This is normally specified via config entries:
* // array of models
* "models" : [
* {
* // this field is really not needed as we could remove the whole entry
* // but it's here for simplicity
* "enabled" : true,
* // this only makes sense when enabled is 'false' and it's intended
* // to perform the analysis without the optmization.
* // Look at the print comment in the .cpp file to see how to read the
* // analysis results
* "analysis" : true,
* // model name for printing/tracing/debugging purposes
* "name" : "Generated Code",
* // prefix to every generated class name for this model.
* // It's also used for metrics.
* // Makes it easy to see what is what
* "class_name_prefix" : "GenCode",
* // the generated model needs a type tag
* "needs_type_tag" : true;
* // the model has a type tag predefined and usable
* "has_type_tag" : true;
* "needs_type_tag" : true;
* // build MergerType only for groups that have more than min_count
* // classes, ignore others (default to 1)
* min_group_count: 100,
* // root to the model, the base type to identify all classes
* // that are candidate for erasure
* "root" : "Lcom/facebook/gencode/BaseType;",
* // exclude classes, can be classes or interfaces
* "exclude" : [
* "Lcom/facebook/gencode/ExcludedBase;"
* ],
* // a specification for the generated set that is treated specially
* // for reference analysis
* "generated" : {
* // Treat types under the same namespace specially.
* // Skip type exclusion check under the same namespace.
* // Assuming cross referencing under the same namespace are safe.
* "namespace" : true,
* // other roots from which identify types that have
* // to be treated specially
* "other_roots" : [
* "Lcom/facebook/gencode/OtherBase;"
* ]
* }
* },
* ]
*/
struct ModelSpec {
// whether the spec is to be used
bool enabled{true};
// name of the spec for debug/printing
std::string name;
// set of roots from which to find all model types
TypeSet roots;
// A set of types to be merged, they should be subtypes of the roots.
ConstTypeHashSet merging_targets;
// types to exclude from the model
ConstTypeHashSet exclude_types;
// prefixes of types to exclude from the model
std::unordered_set<std::string> exclude_prefixes;
// prefix for class generation
std::string class_name_prefix;
// type tag config
TypeTagConfig type_tag_config{TypeTagConfig::GENERATE};
// minimum nuber of mergeables to make it into a MergerType
// (no optimization otherwise)
size_t min_count{2};
// set of generated types
std::unordered_set<DexType*> gen_types;
// set of annotations marking generated code
std::unordered_set<DexType*> gen_annos;
// set of types safe to consume the class obj of merged classes
std::unordered_set<DexType*> const_class_safe_types;
// The merging strategy of the model
strategy::Strategy strategy{strategy::BY_CLASS_COUNT};
// Group splitting. This is looser than the per dex split and takes into
// account the interdex order (if any provided).
InterDexGroupingType interdex_grouping{InterDexGroupingType::DISABLED};
// whether to perform class merging on the primary dex.
bool include_primary_dex{false};
// Process @MethodMeta annotations
bool process_method_meta{false};
// Max mergeable count per merger type
boost::optional<size_t> max_count{boost::none};
// Approximate shaping
Json::Value approximate_shape_merging;
// Allows merging classes with non-primitive static fields. Enabling this will
// change initialization order.
bool merge_types_with_static_fields{false};
// Preserve debug info like line numbers.
bool keep_debug_info{false};
// A flag for method deduplication. Deduplicating throw blocks for
// human-written code may make java stack trace confusing.
bool dedup_throw_blocks{true};
// Replace string literals matching a merged type.
bool replace_type_like_const_strings{true};
// Whether string literals matching class names disqualifies classes from
// being merged
bool type_like_const_strings_unsafe{false};
// Indicates if the merging should be performed per dex.
bool per_dex_grouping{false};
// The Model targets are generated code. If so, we consider merging_targets as
// a part of the generated set.
bool is_generated_code{false};
enum class InterDexGroupingInferringMode {
kAllTypeRefs,
kClassLoads,
kClassLoadsBasicBlockFiltering,
};
InterDexGroupingInferringMode interdex_grouping_inferring_mode{
InterDexGroupingInferringMode::kAllTypeRefs};
bool generate_type_tag() const {
return type_tag_config == TypeTagConfig::GENERATE;
}
bool no_type_tag() const { return type_tag_config == TypeTagConfig::NONE; }
bool has_type_tag() const { return type_tag_config != TypeTagConfig::NONE; }
bool input_has_type_tag() const {
return type_tag_config == TypeTagConfig::INPUT_PASS_TYPE_TAG_TO_CTOR ||
type_tag_config == TypeTagConfig::INPUT_HANDLED;
}
bool pass_type_tag_to_ctor() const {
return type_tag_config == TypeTagConfig::GENERATE ||
type_tag_config == TypeTagConfig::INPUT_PASS_TYPE_TAG_TO_CTOR;
}
boost::optional<size_t> max_num_dispatch_target{boost::none};
};
struct ModelStats {
// Model level stats
uint32_t m_all_types = 0;
uint32_t m_non_mergeables = 0;
uint32_t m_excluded = 0;
uint32_t m_dropped = 0;
// InterDex grouping stats
std::map<InterdexSubgroupIdx, size_t> m_interdex_groups{};
// Stats for approximate shape merging
ApproximateStats m_approx_stats{};
// Merging related stats
uint32_t m_num_classes_merged = 0;
uint32_t m_num_generated_classes = 0;
uint32_t m_num_ctor_dedupped = 0;
uint32_t m_num_static_non_virt_dedupped = 0;
uint32_t m_num_vmethods_dedupped = 0;
uint32_t m_num_const_lifted_methods = 0;
ModelStats& operator+=(const ModelStats& stats);
void update_redex_stats(const std::string& prefix, PassManager& mgr) const;
};
/**
* A Model is a revised hierarchy for the class set under analysis.
* The purpose is to define a small number of types that can be used to
* merge a set of other types. The mergeables types will be erased.
* The model takes into account interfaces and shapes of the types
* to merge in order to define proper aggregation.
* The Model retains all the class hierarchy and mergeable type information
* that can be use to generated proper code.
* Manipulation of the Model is done via calls to the Model public API.
*/
class Model {
public:
/**
* Build a Model given a scope and a specification.
*/
static Model build_model(const Scope& scope,
const DexStoresVector& stores,
const ConfigFiles& conf,
const ModelSpec& spec,
const TypeSystem& type_system,
const RefChecker& refchecker);
const std::string& get_name() const { return m_spec.name; }
std::vector<const DexType*> get_roots() const {
std::vector<const DexType*> res;
for (const auto root_merger : m_roots) {
res.push_back(root_merger->type);
}
return res;
}
template <class HierarchyWalkerFn = void(const MergerType&)>
void walk_hierarchy(HierarchyWalkerFn walker) {
for (const auto root_merger : m_roots) {
if (!root_merger->dummy) {
walker(*root_merger);
}
walk_hierarchy_helper(walker, root_merger->type);
}
}
const DexType* get_parent(const DexType* child) const {
auto it = m_parents.find(child);
if (it == m_parents.end()) {
return nullptr;
}
return it->second;
}
const TypeSet& get_interfaces(const DexType* type) const {
const auto& intfs = m_class_to_intfs.find(type);
return intfs != m_class_to_intfs.end() ? intfs->second : empty_set;
}
const std::string& get_class_name_prefix() const {
return m_spec.class_name_prefix;
}
bool is_interdex_grouping_enabled() const {
return m_spec.interdex_grouping != InterDexGroupingType::DISABLED;
}
const ModelSpec& get_model_spec() const { return m_spec; }
const ModelStats& get_model_stats() const { return m_stats; }
bool process_method_meta() const { return m_spec.process_method_meta; }
bool keep_debug_info() const { return m_spec.keep_debug_info; }
void update_redex_stats(PassManager& mgr) const;
static void build_interdex_groups(ConfigFiles& conf);
/**
* Print everything about the model.
* The printing has a format to allow grep to isolate specific parts.
* The format is the following:
* + TypeName type_info
* - ErasedTypeName type_info
* -* MergedType fields
* -# MergedType methods
* type_info gives info on children, interfaces and method count.
* '+' can be used to look at hierarchies of types
* (i.e. grep -e "^+* L.*;")
* + Base children(k), interfaces(n), Intf1, Intf2
* ++ Derived1
* +++ Derived11
* ++ Derived2
* +++ Derived21
* adding '-' would give the hierarchy and the merged/erasable types
* (i.e. grep -e "^+* L.*;\|^-* L.*;")
* + Base
* ++ Derived1
* +++ Derived11
* ++ Shape
* -- Erasable1
* -- Erasable2
* -- Erasable3
* you can view the hierarchy with the merged types and the fields
* and methods in the merger
* (i.e. grep -e "^+* L.*;\|^-.* L.*;")
* + Base
* ++ Derived1
* +++ Derived11
* ++ Shape
* -- Erasable1
* --* field
* --# method
*/
std::string print() const;
const TypeSystem& get_type_system() const { return m_type_system; }
private:
static const TypeSet empty_set;
// the spec for this model
ModelSpec m_spec;
// stats collection of this model
ModelStats m_stats;
// the roots (base types) for the model
std::vector<MergerType*> m_roots;
// the new generated class hierarchy during analysis.
// Types are not changed during analysis and m_hierarchy represents
// the class hierarchy as known to the analysis and what the final
// hierarchy will be
ClassHierarchy m_hierarchy;
// child to parent relationship of types in the model.
// Because nothing is changed during analysis DexClass::get_super_class()
// may not have the correct relationship
std::unordered_map<const DexType*, const DexType*> m_parents;
// class to interfaces map as known to the analysis
TypeToTypeSet m_class_to_intfs;
// interface to class relationship as known to the analysis
TypeToTypeSet m_intf_to_classes;
// type to merger map
std::unordered_map<const DexType*, MergerType> m_mergers;
// Types excluded by the ModelSpec.exclude_types
TypeSet m_excluded;
// The set of non mergeables types. Those are types that are not
// erasable for whatever reason
TypeSet m_non_mergeables;
const TypeSystem& m_type_system;
const RefChecker& m_ref_checker;
// Number of merger types created with the same shape per model.
std::map<MergerType::Shape, size_t, MergerType::ShapeComp> m_shape_to_count;
const Scope& m_scope;
const ConfigFiles& m_conf;
const XDexRefs m_x_dex;
static std::unordered_map<DexType*, size_t> s_cls_to_interdex_group;
static size_t s_num_interdex_groups;
private:
/**
* Build a Model given a set of roots and a set of types deriving from the
* roots.
*/
Model(const Scope& scope,
const DexStoresVector& stores,
const ConfigFiles& conf,
const ModelSpec& spec,
const TypeSystem& type_system,
const RefChecker& refchecker);
void init(const Scope& scope,
const ModelSpec& spec,
const TypeSystem& type_system);
void build_hierarchy(const TypeSet& roots);
void build_interface_map(const DexType* type, TypeSet implemented);
MergerType* build_mergers(const DexType* root);
void exclude_types(const ConstTypeHashSet& exclude_types);
bool is_excluded(const DexType* type) const;
// MergerType creator helpers
MergerType& create_dummy_merger(const DexType* type);
void create_dummy_mergers_if_children(const DexType* type);
MergerType& create_merger_shape(const DexType* shape_type,
const MergerType::Shape& shape,
const DexType* parent,
const TypeSet& intfs,
const std::vector<const DexType*>& classes);
MergerType& create_merger_helper(
const DexType* merger_type,
const MergerType::Shape& shape,
const TypeSet& intf_set,
const boost::optional<size_t>& dex_id,
const ConstTypeVector& group_values,
const boost::optional<InterdexSubgroupIdx>& interdex_subgroup_idx,
const InterdexSubgroupIdx subgroup_idx);
void create_mergers_helper(
const DexType* merger_type,
const MergerType::Shape& shape,
const TypeSet& intf_set,
const boost::optional<size_t>& dex_id,
const TypeSet& group_values,
const strategy::Strategy strategy,
const boost::optional<InterdexSubgroupIdx>& interdex_subgroup_idx,
const boost::optional<size_t>& max_mergeables_count,
size_t min_mergeables_count);
// make shapes out of the model classes
void shape_model();
void shape_merger(const MergerType& root, MergerType::ShapeCollector& shapes);
void approximate_shapes(MergerType::ShapeCollector& shapes);
void break_by_interface(const MergerType& merger,
const MergerType::Shape& shape,
MergerType::ShapeHierarchy& hier);
void flatten_shapes(const MergerType& merger,
MergerType::ShapeCollector& shapes);
TypeGroupByDex group_per_dex(bool per_dex_grouping, const TypeSet& types);
TypeSet get_types_in_current_interdex_group(
const TypeSet& types, const ConstTypeHashSet& interdex_group_types);
std::vector<ConstTypeHashSet> group_by_interdex_set(
const ConstTypeHashSet& types);
void map_fields(MergerType& merger,
const std::vector<const DexType*>& classes);
// collect and distribute methods across MergerTypes
void collect_methods();
void add_virtual_scope(MergerType& merger, const VirtualScope& virt_scope);
void add_interface_scope(MergerType& merger, const VirtualScope& intf_scope);
void distribute_virtual_methods(const DexType* type,
std::vector<const VirtualScope*> base_scopes);
// Model internal type system helpers
void set_parent_child(const DexType* parent, const DexType* child) {
m_hierarchy[parent].insert(child);
m_parents[child] = parent;
}
void remove_child(const DexType* child) {
const auto& prev_parent_hier = m_hierarchy.find(m_parents[child]);
always_assert(prev_parent_hier != m_hierarchy.end());
auto erased = prev_parent_hier->second.erase(child);
always_assert(erased > 0);
if (prev_parent_hier->second.empty()) {
m_hierarchy.erase(prev_parent_hier);
}
}
void move_child_to_mergeables(MergerType& merger, const DexType* child) {
TRACE(CLMG, 3, "Adding child %s to merger %s", show_type(child).c_str(),
print(&merger).c_str());
remove_child(child);
merger.mergeables.insert(child);
}
static std::string show_type(const DexType* type); // To avoid "Show.h" in the
// header.
// printers
std::string print(const MergerType* merger) const;
std::string print(const DexType* type) const;
std::string print(const DexType* type, int nest) const;
// walker helper
template <class HierarchyWalkerFn = void(const MergerType&)>
void walk_hierarchy_helper(HierarchyWalkerFn walker, const DexType* type) {
const auto& children = m_hierarchy.find(type);
if (children == m_hierarchy.end()) return;
for (const auto& child : children->second) {
const auto& merger = m_mergers.find(child);
if (merger != m_mergers.end() && !merger->second.dummy) {
walker(merger->second);
}
walk_hierarchy_helper(walker, child);
}
}
};
InterDexGroupingType get_merge_per_interdex_type(
const std::string& interdex_grouping);
std::ostream& operator<<(std::ostream& os,
ModelSpec::InterDexGroupingInferringMode mode);
} // namespace class_merging