libredex/DexAnnotation.h (522 lines of code) (raw):
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <boost/functional/hash.hpp>
#include <cstring>
#include <deque>
#include <map>
#include <unordered_set>
#include <vector>
#include "Gatherable.h"
class DexFieldRef;
class DexIdx;
class DexMethod;
class DexMethodRef;
class DexOutputIdx;
class DexString;
class DexType;
enum DexEncodedValueTypes : uint8_t {
DEVT_BYTE = 0x00,
DEVT_SHORT = 0x02,
DEVT_CHAR = 0x03,
DEVT_INT = 0x04,
DEVT_LONG = 0x06,
DEVT_FLOAT = 0x10,
DEVT_DOUBLE = 0x11,
DEVT_METHOD_TYPE = 0x15,
DEVT_METHOD_HANDLE = 0x16,
DEVT_STRING = 0x17,
DEVT_TYPE = 0x18,
DEVT_FIELD = 0x19,
DEVT_METHOD = 0x1a,
DEVT_ENUM = 0x1b,
DEVT_ARRAY = 0x1c,
DEVT_ANNOTATION = 0x1d,
DEVT_NULL = 0x1e,
DEVT_BOOLEAN = 0x1f
};
inline uint8_t DEVT_HDR_TYPE(uint8_t x) { return x & 0x1f; }
inline uint8_t DEVT_HDR_ARG(uint8_t x) { return (x >> 5) & 0x7; }
inline uint8_t TO_DEVT_HDR_ARG(uint8_t x) { return (x & 0x7) << 5; }
enum DexAnnotationVisibility : uint8_t {
DAV_BUILD = 0,
DAV_RUNTIME = 1,
DAV_SYSTEM = 2,
};
class DexEncodedValue : public Gatherable {
protected:
union Val {
explicit Val(uint64_t v) { m_value = v; }
explicit Val(void* v) { m_value_ptr = v; }
explicit Val(const void* v) { m_value_ptr_const = v; }
uint64_t m_value;
void* m_value_ptr;
const void* m_value_ptr_const;
} m_val;
DexEncodedValueTypes m_evtype;
explicit DexEncodedValue(DexEncodedValueTypes type, uint64_t value = 0)
: m_val(value), m_evtype(type) {}
DexEncodedValue(DexEncodedValueTypes type, void* value_ptr)
: m_val(value_ptr), m_evtype(type) {}
DexEncodedValue(DexEncodedValueTypes type, const void* value_ptr)
: m_val(value_ptr), m_evtype(type) {}
public:
virtual ~DexEncodedValue() = default;
DexEncodedValueTypes evtype() const { return m_evtype; }
bool is_evtype_primitive() const;
void value(uint64_t value) { m_val = Val(value); }
uint64_t value() const { return m_val.m_value; }
static std::unique_ptr<DexEncodedValue> get_encoded_value(
DexIdx* idx, const uint8_t*& encdata);
DexEncodedValueTypes evtype() { return m_evtype; }
virtual void encode(DexOutputIdx* dodx, uint8_t*& encdata);
void vencode(DexOutputIdx* dodx, std::vector<uint8_t>& bytes);
// Deal with undefined behavior of reading the inactive member of a struct.
uint64_t as_value() const {
uint64_t tmp;
memcpy(&tmp, &m_val, sizeof(uint64_t));
return tmp;
}
virtual std::string show() const;
virtual std::string show_deobfuscated() const { return show(); }
virtual bool operator==(const DexEncodedValue& that) const {
return m_evtype == that.m_evtype && as_value() == that.as_value();
}
virtual bool operator!=(const DexEncodedValue& that) const {
return !(*this == that);
}
virtual size_t hash_value() const {
size_t seed = boost::hash<uint8_t>()(m_evtype);
boost::hash_combine(seed, as_value());
return seed;
}
virtual std::unique_ptr<DexEncodedValue> clone() const = 0;
bool is_zero() const;
bool is_wide() const;
static std::unique_ptr<DexEncodedValue> zero_for_type(DexType* type);
};
class DexEncodedValuePrimitive final : public DexEncodedValue {
public:
explicit DexEncodedValuePrimitive(DexEncodedValueTypes type,
uint64_t value = 0)
: DexEncodedValue(type, value) {}
std::unique_ptr<DexEncodedValue> clone() const override {
return std::make_unique<DexEncodedValuePrimitive>(*this);
}
};
inline size_t hash_value(const DexEncodedValue& v) { return v.hash_value(); }
class DexEncodedValueBit : public DexEncodedValue {
public:
DexEncodedValueBit(DexEncodedValueTypes type, bool bit)
: DexEncodedValue(type, bit) {}
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(new DexEncodedValueBit(*this));
}
};
class DexEncodedValueString : public DexEncodedValue {
public:
explicit DexEncodedValueString(const DexString* string)
: DexEncodedValue(DEVT_STRING, string) {}
const DexString* string() const {
return (const DexString*)m_val.m_value_ptr_const;
}
void string(const DexString* string) { m_val.m_value_ptr_const = string; }
void gather_strings(std::vector<const DexString*>& lstring) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
std::string show() const override;
bool operator==(const DexEncodedValue& that) const override {
if (m_evtype != that.evtype()) {
return false;
}
return m_val.m_value_ptr_const ==
static_cast<const DexEncodedValueString*>(&that)
->m_val.m_value_ptr_const;
}
size_t hash_value() const override {
size_t seed = boost::hash<uint8_t>()(m_evtype);
boost::hash_combine(seed, (uintptr_t)m_val.m_value_ptr_const);
return seed;
}
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(new DexEncodedValueString(*this));
}
};
class DexEncodedValuePtr : public DexEncodedValue {
protected:
DexEncodedValuePtr(DexEncodedValueTypes type, void* data)
: DexEncodedValue(type, data) {}
public:
bool operator==(const DexEncodedValue& that) const override {
if (m_evtype != that.evtype()) {
return false;
}
return m_val.m_value_ptr_const ==
static_cast<const DexEncodedValuePtr*>(&that)->m_val.m_value_ptr;
}
size_t hash_value() const override {
size_t seed = boost::hash<uint8_t>()(m_evtype);
boost::hash_combine(seed, (uintptr_t)m_val.m_value_ptr);
return seed;
}
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(new DexEncodedValuePtr(*this));
}
};
class DexEncodedValueType : public DexEncodedValuePtr {
public:
explicit DexEncodedValueType(DexType* type)
: DexEncodedValuePtr(DEVT_TYPE, type) {}
void gather_types(std::vector<DexType*>& ltype) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
DexType* type() const { return (DexType*)m_val.m_value_ptr; }
void set_type(DexType* type) { m_val.m_value_ptr = type; }
std::string show() const override;
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(new DexEncodedValueType(*this));
}
};
class DexEncodedValueField : public DexEncodedValuePtr {
public:
DexEncodedValueField(DexEncodedValueTypes type, DexFieldRef* field)
: DexEncodedValuePtr(type, field) {}
void gather_fields(std::vector<DexFieldRef*>& lfield) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
DexFieldRef* field() const { return (DexFieldRef*)m_val.m_value_ptr; }
void set_field(DexFieldRef* field) { m_val.m_value_ptr = field; }
std::string show() const override;
std::string show_deobfuscated() const override;
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(new DexEncodedValueField(*this));
}
};
class DexEncodedValueMethod : public DexEncodedValuePtr {
public:
explicit DexEncodedValueMethod(DexMethodRef* method)
: DexEncodedValuePtr(DEVT_METHOD, method) {}
void gather_methods(std::vector<DexMethodRef*>& lmethod) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
DexMethodRef* method() const { return (DexMethodRef*)m_val.m_value_ptr; }
void set_method(DexMethodRef* method) { m_val.m_value_ptr = method; }
std::string show() const override;
std::string show_deobfuscated() const override;
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(new DexEncodedValueMethod(*this));
}
};
class DexEncodedValueMethodType : public DexEncodedValuePtr {
public:
explicit DexEncodedValueMethodType(DexProto* proto)
: DexEncodedValuePtr(DEVT_METHOD_TYPE, proto) {}
void gather_strings(std::vector<const DexString*>& lstring) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
DexProto* proto() const { return (DexProto*)m_val.m_value_ptr; }
void set_proto(DexProto* proto) { m_val.m_value_ptr = proto; }
std::string show() const override;
std::string show_deobfuscated() const override;
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(
new DexEncodedValueMethodType(*this));
}
};
class DexEncodedValueMethodHandle : public DexEncodedValuePtr {
public:
explicit DexEncodedValueMethodHandle(DexMethodHandle* methodhandle)
: DexEncodedValuePtr(DEVT_METHOD_HANDLE, methodhandle) {}
void gather_fields(std::vector<DexFieldRef*>& lfield) const override;
void gather_methods(std::vector<DexMethodRef*>& lmethod) const override;
void gather_methodhandles(
std::vector<DexMethodHandle*>& lhandles) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
DexMethodHandle* methodhandle() const {
return (DexMethodHandle*)m_val.m_value_ptr;
}
void set_methodhandle(DexMethodHandle* methodhandle) {
m_val.m_value_ptr = methodhandle;
}
std::string show() const override;
std::string show_deobfuscated() const override;
std::unique_ptr<DexEncodedValue> clone() const override {
return std::unique_ptr<DexEncodedValue>(
new DexEncodedValueMethodHandle(*this));
}
};
// NOTE: Different from the other values, this one owns the given vector.
class DexEncodedValueArray : public DexEncodedValue {
bool m_static_val;
public:
using Storage = std::vector<std::unique_ptr<DexEncodedValue>>;
/*
* Static values are encoded without a DEVT_ARRAY header byte
* so we differentiate that here.
*/
explicit DexEncodedValueArray(Storage* evalues, bool static_val = false)
: DexEncodedValue(DEVT_ARRAY, evalues), m_static_val(static_val) {}
~DexEncodedValueArray() { delete (Storage*)m_val.m_value_ptr; }
// May not copy or assign, as the element is owned.
DexEncodedValueArray(const DexEncodedValueArray&) = delete;
DexEncodedValueArray& operator=(const DexEncodedValueArray&) = delete;
// May move.
DexEncodedValueArray(DexEncodedValueArray&& rhs) noexcept
: DexEncodedValue(DEVT_ARRAY, rhs.m_val.m_value_ptr),
m_static_val(rhs.m_static_val) {
rhs.m_val.m_value_ptr = nullptr;
}
DexEncodedValueArray& operator=(DexEncodedValueArray&& rhs) noexcept {
if (this == &rhs) {
return *this;
}
delete (Storage*)m_val.m_value_ptr;
m_val.m_value_ptr = rhs.m_val.m_value_ptr;
m_static_val = rhs.m_static_val;
rhs.m_val.m_value_ptr = nullptr;
return *this;
}
std::vector<std::unique_ptr<DexEncodedValue>>* evalues() const {
return (Storage*)m_val.m_value_ptr;
}
bool is_static_val() const { return m_static_val; }
void gather_types(std::vector<DexType*>& ltype) const override;
void gather_fields(std::vector<DexFieldRef*>& lfield) const override;
void gather_methods(std::vector<DexMethodRef*>& lmethod) const override;
void gather_strings(std::vector<const DexString*>& lstring) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
std::string show() const override;
std::string show_deobfuscated() const override;
bool operator==(const DexEncodedValueArray& that) const {
if (evalues()->size() != that.evalues()->size()) {
return false;
}
auto it = that.evalues()->begin();
for (const auto& elem : *evalues()) {
if (*elem != **it) {
return false;
}
it = std::next(it);
}
return m_evtype == that.m_evtype && m_static_val == that.m_static_val;
}
size_t hash_value() const override {
size_t seed = boost::hash<uint8_t>()(m_evtype);
boost::hash_combine(seed, m_static_val);
for (const auto& elem : *evalues()) {
boost::hash_combine(seed, *elem);
}
return seed;
}
std::unique_ptr<DexEncodedValue> clone() const override {
// Need to copy the array, if any.
Storage* evalues_copy;
if (m_val.m_value_ptr == nullptr) {
evalues_copy = nullptr;
} else {
auto* old = (Storage*)m_val.m_value_ptr;
evalues_copy = new Storage();
evalues_copy->reserve(old->size());
for (auto& orig : *old) {
evalues_copy->emplace_back(orig->clone());
}
}
return std::unique_ptr<DexEncodedValue>(
new DexEncodedValueArray(evalues_copy, m_static_val));
}
};
/* For loading static values */
std::unique_ptr<DexEncodedValueArray> get_encoded_value_array(
DexIdx* idx, const uint8_t*& encdata);
/*
* These are not "full blown" annotations, they are
* key/value pairs of encoded values. They inherit
* visibility from the referrer. Preserving the odd
* naming from the spec. In practice, these are the
* InnerClass annotations things like access flags
* or defining method/class.
*/
class DexAnnotationElement final {
public:
DexAnnotationElement(const DexString* s, std::unique_ptr<DexEncodedValue> ev)
: string(s), encoded_value(std::move(ev)) {}
DexAnnotationElement(const DexAnnotationElement&) = delete;
DexAnnotationElement& operator=(const DexAnnotationElement&) = delete;
DexAnnotationElement(DexAnnotationElement&& other) noexcept
: string(other.string), encoded_value(std::move(other.encoded_value)) {}
DexAnnotationElement& operator=(DexAnnotationElement&& other) noexcept {
string = other.string;
encoded_value = std::move(other.encoded_value);
return *this;
}
DexAnnotationElement clone() const {
return DexAnnotationElement(string, encoded_value->clone());
}
const DexString* string;
std::unique_ptr<DexEncodedValue> encoded_value;
};
using EncodedAnnotations = std::vector<DexAnnotationElement>;
std::string show(const EncodedAnnotations*);
std::string show_deobfuscated(const EncodedAnnotations*);
std::string show(const EncodedAnnotations&);
std::string show_deobfuscated(const EncodedAnnotations&);
class DexEncodedValueAnnotation : public DexEncodedValue {
DexType* m_type;
EncodedAnnotations m_annotations;
public:
DexEncodedValueAnnotation(DexType* type, EncodedAnnotations annotations)
: DexEncodedValue(DEVT_ANNOTATION),
m_type(type),
m_annotations(std::move(annotations)) {}
DexType* type() const { return m_type; }
void set_type(DexType* type) { m_type = type; }
const EncodedAnnotations& annotations() const { return m_annotations; }
void gather_types(std::vector<DexType*>& ltype) const override;
void gather_fields(std::vector<DexFieldRef*>& lfield) const override;
void gather_methods(std::vector<DexMethodRef*>& lmethod) const override;
void gather_strings(std::vector<const DexString*>& lstring) const override;
void encode(DexOutputIdx* dodx, uint8_t*& encdata) override;
std::string show() const override;
std::string show_deobfuscated() const override;
std::unique_ptr<DexEncodedValue> clone() const override {
EncodedAnnotations copy;
std::transform(m_annotations.begin(),
m_annotations.end(),
std::back_inserter(copy),
[](const auto& a) { return a.clone(); });
return std::make_unique<DexEncodedValueAnnotation>(m_type, std::move(copy));
}
};
class DexAnnotation : public Gatherable {
EncodedAnnotations m_anno_elems;
DexType* m_type;
DexAnnotationVisibility m_viz;
public:
DexAnnotation(DexType* type, DexAnnotationVisibility viz)
: m_type(type), m_viz(viz) {}
DexAnnotation(const DexAnnotation&) = delete;
DexAnnotation(DexAnnotation&&) = default;
static std::unique_ptr<DexAnnotation> get_annotation(DexIdx* idx,
uint32_t anno_off);
void gather_types(std::vector<DexType*>& ltype) const override;
void gather_fields(std::vector<DexFieldRef*>& lfield) const override;
void gather_methods(std::vector<DexMethodRef*>& lmethod) const override;
void gather_strings(std::vector<const DexString*>& lstring) const override;
const EncodedAnnotations& anno_elems() const { return m_anno_elems; }
void set_type(DexType* type) { m_type = type; }
DexType* type() const { return m_type; }
DexAnnotationVisibility viz() const { return m_viz; }
bool runtime_visible() const { return m_viz == DAV_RUNTIME; }
bool build_visible() const { return m_viz == DAV_BUILD; }
bool system_visible() const { return m_viz == DAV_SYSTEM; }
void vencode(DexOutputIdx* dodx, std::vector<uint8_t>& bytes);
void add_element(const char* key, std::unique_ptr<DexEncodedValue> value);
void add_element(DexAnnotationElement elem);
DexAnnotation clone() const {
EncodedAnnotations copy;
std::transform(m_anno_elems.begin(),
m_anno_elems.end(),
std::back_inserter(copy),
[](const auto& a) { return a.clone(); });
return DexAnnotation(std::move(copy), m_type, m_viz);
}
private:
DexAnnotation(EncodedAnnotations anno_elems,
DexType* type,
DexAnnotationVisibility viz)
: m_anno_elems(std::move(anno_elems)), m_type(type), m_viz(viz) {}
};
class DexAnnotationSet : public Gatherable {
std::vector<std::unique_ptr<DexAnnotation>> m_annotations;
public:
DexAnnotationSet() = default;
DexAnnotationSet(const DexAnnotationSet& that) {
for (const auto& anno : that.m_annotations) {
m_annotations.emplace_back(new DexAnnotation(anno->clone()));
}
}
void gather_types(std::vector<DexType*>& ltype) const override;
void gather_fields(std::vector<DexFieldRef*>& lfield) const override;
void gather_methods(std::vector<DexMethodRef*>& lmethod) const override;
void gather_strings(std::vector<const DexString*>& lstring) const override;
static std::unique_ptr<DexAnnotationSet> get_annotation_set(
DexIdx* idx, uint32_t aset_off);
unsigned long size() const { return m_annotations.size(); }
void viz_counts(unsigned long& cntanno, unsigned long& cntviz) {
cntanno = m_annotations.size();
cntviz = 0;
for (auto const& da : m_annotations) {
if (da->runtime_visible()) cntviz++;
}
}
/**
* Add in annotation missing from other annotation set.
*/
void combine_with(const DexAnnotationSet& other) {
std::unordered_set<DexType*> existing_annos_type;
for (const auto& existing_anno : m_annotations) {
existing_annos_type.emplace(existing_anno->type());
}
auto const& other_annos = other.m_annotations;
for (auto const& anno : other_annos) {
if (existing_annos_type.count(anno->type()) == 0) {
m_annotations.emplace_back(new DexAnnotation(anno->clone()));
}
}
}
const std::vector<std::unique_ptr<DexAnnotation>>& get_annotations() const {
return m_annotations;
}
std::vector<std::unique_ptr<DexAnnotation>>& get_annotations() {
return m_annotations;
}
void add_annotation(std::unique_ptr<DexAnnotation> anno) {
m_annotations.emplace_back(std::move(anno));
}
void vencode(DexOutputIdx* dodx,
std::vector<uint32_t>& asetout,
std::map<DexAnnotation*, uint32_t>& annoout);
void gather_annotations(std::vector<DexAnnotation*>& alist);
};
using ParamAnnotations = std::map<int, std::unique_ptr<DexAnnotationSet>>;
using DexFieldAnnotations =
std::vector<std::pair<DexFieldRef*, DexAnnotationSet*>>;
using DexMethodAnnotations =
std::vector<std::pair<DexMethod*, DexAnnotationSet*>>;
using DexMethodParamAnnotations =
std::vector<std::pair<DexMethod*, ParamAnnotations*>>;
class DexAnnotationDirectory {
double m_viz;
DexAnnotationSet* m_class;
std::unique_ptr<DexFieldAnnotations> m_field;
std::unique_ptr<DexMethodAnnotations> m_method;
std::unique_ptr<DexMethodParamAnnotations> m_method_param;
int m_aset_size;
int m_xref_size;
int m_anno_count;
int m_aset_count;
int m_xref_count;
void calc_internals();
public:
DexAnnotationDirectory(DexAnnotationSet* c,
std::unique_ptr<DexFieldAnnotations> f,
std::unique_ptr<DexMethodAnnotations> m,
std::unique_ptr<DexMethodParamAnnotations> mp)
: m_class(c),
m_field(std::move(f)),
m_method(std::move(m)),
m_method_param(std::move(mp)),
m_aset_size(0),
m_xref_size(0),
m_anno_count(0),
m_aset_count(0),
m_xref_count(0) {
calc_internals();
}
double viz_score() const { return m_viz; }
/* Encoded sizes */
int aset_size() { return m_aset_size; }
int xref_size() { return m_xref_size; }
int annodir_size() {
int size = 4 * sizeof(uint32_t);
if (m_field) {
size += m_field->size() * 2 * sizeof(uint32_t);
}
if (m_method) {
size += m_method->size() * 2 * sizeof(uint32_t);
}
if (m_method_param) {
size += m_method_param->size() * 2 * sizeof(uint32_t);
}
return size;
}
int aset_count() { return m_aset_count; }
int anno_count() { return m_anno_count; }
int xref_count() { return m_xref_count; }
void gather_annotations(std::vector<DexAnnotation*>& alist);
void gather_asets(std::vector<DexAnnotationSet*>& aset);
void gather_xrefs(std::vector<ParamAnnotations*>& xrefs);
void vencode(DexOutputIdx* dodx,
std::vector<uint32_t>& annodirout,
std::map<ParamAnnotations*, uint32_t>& xrefmap,
std::map<DexAnnotationSet*, uint32_t>& asetmap);
friend std::string show(const DexAnnotationDirectory*);
};
uint64_t read_evarg(const uint8_t*& encdata,
uint8_t evarg,
bool sign_extend = false);
void type_encoder(uint8_t*& encdata, uint8_t type, uint64_t val);
void type_encoder_signext(uint8_t*& encdata, uint8_t type, uint64_t val);
void type_encoder_fp(uint8_t*& encdata, uint8_t type, uint64_t val);