bool DumpImageDiffMap()

in imgdiag/imgdiag.cc [283:831]


  bool DumpImageDiffMap(pid_t image_diff_pid,
                        pid_t zygote_diff_pid,
                        const backtrace_map_t& boot_map)
    SHARED_REQUIRES(Locks::mutator_lock_) {
    std::ostream& os = *os_;
    const size_t pointer_size = InstructionSetPointerSize(
        Runtime::Current()->GetInstructionSet());

    std::string file_name =
        StringPrintf("/proc/%ld/mem", static_cast<long>(image_diff_pid));  // NOLINT [runtime/int]

    size_t boot_map_size = boot_map.end - boot_map.start;

    // Open /proc/$pid/mem as a file
    auto map_file = std::unique_ptr<File>(OS::OpenFileForReading(file_name.c_str()));
    if (map_file == nullptr) {
      os << "Failed to open " << file_name << " for reading";
      return false;
    }

    // Memory-map /proc/$pid/mem subset from the boot map
    CHECK(boot_map.end >= boot_map.start);

    std::string error_msg;

    // Walk the bytes and diff against our boot image
    const ImageHeader& boot_image_header = image_header_;

    os << "\nObserving boot image header at address "
       << reinterpret_cast<const void*>(&boot_image_header)
       << "\n\n";

    const uint8_t* image_begin_unaligned = boot_image_header.GetImageBegin();
    const uint8_t* image_mirror_end_unaligned = image_begin_unaligned +
        boot_image_header.GetImageSection(ImageHeader::kSectionObjects).Size();
    const uint8_t* image_end_unaligned = image_begin_unaligned + boot_image_header.GetImageSize();

    // Adjust range to nearest page
    const uint8_t* image_begin = AlignDown(image_begin_unaligned, kPageSize);
    const uint8_t* image_end = AlignUp(image_end_unaligned, kPageSize);

    ptrdiff_t page_off_begin = boot_image_header.GetImageBegin() - image_begin;

    if (reinterpret_cast<uintptr_t>(image_begin) > boot_map.start ||
        reinterpret_cast<uintptr_t>(image_end) < boot_map.end) {
      // Sanity check that we aren't trying to read a completely different boot image
      os << "Remote boot map is out of range of local boot map: " <<
        "local begin " << reinterpret_cast<const void*>(image_begin) <<
        ", local end " << reinterpret_cast<const void*>(image_end) <<
        ", remote begin " << reinterpret_cast<const void*>(boot_map.start) <<
        ", remote end " << reinterpret_cast<const void*>(boot_map.end);
      return false;
      // If we wanted even more validation we could map the ImageHeader from the file
    }

    std::vector<uint8_t> remote_contents(boot_map_size);
    if (!map_file->PreadFully(&remote_contents[0], boot_map_size, boot_map.start)) {
      os << "Could not fully read file " << file_name;
      return false;
    }

    std::vector<uint8_t> zygote_contents;
    std::unique_ptr<File> zygote_map_file;
    if (zygote_diff_pid != -1) {
      std::string zygote_file_name =
          StringPrintf("/proc/%ld/mem", static_cast<long>(zygote_diff_pid));  // NOLINT [runtime/int]
      zygote_map_file.reset(OS::OpenFileForReading(zygote_file_name.c_str()));
      // The boot map should be at the same address.
      zygote_contents.resize(boot_map_size);
      if (!zygote_map_file->PreadFully(&zygote_contents[0], boot_map_size, boot_map.start)) {
        LOG(WARNING) << "Could not fully read zygote file " << zygote_file_name;
        zygote_contents.clear();
      }
    }

    std::string page_map_file_name = StringPrintf(
        "/proc/%ld/pagemap", static_cast<long>(image_diff_pid));  // NOLINT [runtime/int]
    auto page_map_file = std::unique_ptr<File>(OS::OpenFileForReading(page_map_file_name.c_str()));
    if (page_map_file == nullptr) {
      os << "Failed to open " << page_map_file_name << " for reading: " << strerror(errno);
      return false;
    }

    // Not truly clean, mmap-ing boot.art again would be more pristine, but close enough
    const char* clean_page_map_file_name = "/proc/self/pagemap";
    auto clean_page_map_file = std::unique_ptr<File>(
        OS::OpenFileForReading(clean_page_map_file_name));
    if (clean_page_map_file == nullptr) {
      os << "Failed to open " << clean_page_map_file_name << " for reading: " << strerror(errno);
      return false;
    }

    auto kpage_flags_file = std::unique_ptr<File>(OS::OpenFileForReading("/proc/kpageflags"));
    if (kpage_flags_file == nullptr) {
      os << "Failed to open /proc/kpageflags for reading: " << strerror(errno);
      return false;
    }

    auto kpage_count_file = std::unique_ptr<File>(OS::OpenFileForReading("/proc/kpagecount"));
    if (kpage_count_file == nullptr) {
      os << "Failed to open /proc/kpagecount for reading:" << strerror(errno);
      return false;
    }

    // Set of the remote virtual page indices that are dirty
    std::set<size_t> dirty_page_set_remote;
    // Set of the local virtual page indices that are dirty
    std::set<size_t> dirty_page_set_local;

    size_t different_int32s = 0;
    size_t different_bytes = 0;
    size_t different_pages = 0;
    size_t virtual_page_idx = 0;   // Virtual page number (for an absolute memory address)
    size_t page_idx = 0;           // Page index relative to 0
    size_t previous_page_idx = 0;  // Previous page index relative to 0
    size_t dirty_pages = 0;
    size_t private_pages = 0;
    size_t private_dirty_pages = 0;

    // Iterate through one page at a time. Boot map begin/end already implicitly aligned.
    for (uintptr_t begin = boot_map.start; begin != boot_map.end; begin += kPageSize) {
      ptrdiff_t offset = begin - boot_map.start;

      // We treat the image header as part of the memory map for now
      // If we wanted to change this, we could pass base=start+sizeof(ImageHeader)
      // But it might still be interesting to see if any of the ImageHeader data mutated
      const uint8_t* local_ptr = reinterpret_cast<const uint8_t*>(&boot_image_header) + offset;
      uint8_t* remote_ptr = &remote_contents[offset];

      if (memcmp(local_ptr, remote_ptr, kPageSize) != 0) {
        different_pages++;

        // Count the number of 32-bit integers that are different.
        for (size_t i = 0; i < kPageSize / sizeof(uint32_t); ++i) {
          uint32_t* remote_ptr_int32 = reinterpret_cast<uint32_t*>(remote_ptr);
          const uint32_t* local_ptr_int32 = reinterpret_cast<const uint32_t*>(local_ptr);

          if (remote_ptr_int32[i] != local_ptr_int32[i]) {
            different_int32s++;
          }
        }
      }
    }

    // Iterate through one byte at a time.
    for (uintptr_t begin = boot_map.start; begin != boot_map.end; ++begin) {
      previous_page_idx = page_idx;
      ptrdiff_t offset = begin - boot_map.start;

      // We treat the image header as part of the memory map for now
      // If we wanted to change this, we could pass base=start+sizeof(ImageHeader)
      // But it might still be interesting to see if any of the ImageHeader data mutated
      const uint8_t* local_ptr = reinterpret_cast<const uint8_t*>(&boot_image_header) + offset;
      uint8_t* remote_ptr = &remote_contents[offset];

      virtual_page_idx = reinterpret_cast<uintptr_t>(local_ptr) / kPageSize;

      // Calculate the page index, relative to the 0th page where the image begins
      page_idx = (offset + page_off_begin) / kPageSize;
      if (*local_ptr != *remote_ptr) {
        // Track number of bytes that are different
        different_bytes++;
      }

      // Independently count the # of dirty pages on the remote side
      size_t remote_virtual_page_idx = begin / kPageSize;
      if (previous_page_idx != page_idx) {
        uint64_t page_count = 0xC0FFEE;
        // TODO: virtual_page_idx needs to be from the same process
        int dirtiness = (IsPageDirty(page_map_file.get(),        // Image-diff-pid procmap
                                     clean_page_map_file.get(),  // Self procmap
                                     kpage_flags_file.get(),
                                     kpage_count_file.get(),
                                     remote_virtual_page_idx,    // potentially "dirty" page
                                     virtual_page_idx,           // true "clean" page
                                     &page_count,
                                     &error_msg));
        if (dirtiness < 0) {
          os << error_msg;
          return false;
        } else if (dirtiness > 0) {
          dirty_pages++;
          dirty_page_set_remote.insert(dirty_page_set_remote.end(), remote_virtual_page_idx);
          dirty_page_set_local.insert(dirty_page_set_local.end(), virtual_page_idx);
        }

        bool is_dirty = dirtiness > 0;
        bool is_private = page_count == 1;

        if (page_count == 1) {
          private_pages++;
        }

        if (is_dirty && is_private) {
          private_dirty_pages++;
        }
      }
    }

    std::map<mirror::Class*, ClassData> class_data;

    // Walk each object in the remote image space and compare it against ours
    size_t different_objects = 0;

    std::map<off_t /* field offset */, int /* count */> art_method_field_dirty_count;
    std::vector<ArtMethod*> art_method_dirty_objects;

    std::map<off_t /* field offset */, int /* count */> class_field_dirty_count;
    std::vector<mirror::Class*> class_dirty_objects;

    // List of local objects that are clean, but located on dirty pages.
    std::vector<mirror::Object*> false_dirty_objects;
    size_t false_dirty_object_bytes = 0;

    // Look up remote classes by their descriptor
    std::map<std::string, mirror::Class*> remote_class_map;
    // Look up local classes by their descriptor
    std::map<std::string, mirror::Class*> local_class_map;

    // Objects that are dirty against the image (possibly shared or private dirty).
    std::set<mirror::Object*> image_dirty_objects;

    // Objects that are dirty against the zygote (probably private dirty).
    std::set<mirror::Object*> zygote_dirty_objects;

    size_t dirty_object_bytes = 0;
    const uint8_t* begin_image_ptr = image_begin_unaligned;
    const uint8_t* end_image_ptr = image_mirror_end_unaligned;

    const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
    while (reinterpret_cast<uintptr_t>(current) < reinterpret_cast<uintptr_t>(end_image_ptr)) {
      CHECK_ALIGNED(current, kObjectAlignment);
      mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));

      // Sanity check that we are reading a real object
      CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
      if (kUseBakerOrBrooksReadBarrier) {
        obj->AssertReadBarrierPointer();
      }

      // Iterate every page this object belongs to
      bool on_dirty_page = false;
      size_t page_off = 0;
      size_t current_page_idx;
      uintptr_t object_address;
      do {
        object_address = reinterpret_cast<uintptr_t>(current);
        current_page_idx = object_address / kPageSize + page_off;

        if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
          // This object is on a dirty page
          on_dirty_page = true;
        }

        page_off++;
      } while ((current_page_idx * kPageSize) <
               RoundUp(object_address + obj->SizeOf(), kObjectAlignment));

      mirror::Class* klass = obj->GetClass();

      // Check against the other object and see if they are different
      ptrdiff_t offset = current - begin_image_ptr;
      const uint8_t* current_remote = &remote_contents[offset];
      mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
          const_cast<uint8_t*>(current_remote));

      bool different_image_object = memcmp(current, current_remote, obj->SizeOf()) != 0;
      if (different_image_object) {
        bool different_zygote_object = false;
        if (!zygote_contents.empty()) {
          const uint8_t* zygote_ptr = &zygote_contents[offset];
          different_zygote_object = memcmp(current, zygote_ptr, obj->SizeOf()) != 0;
        }
        if (different_zygote_object) {
          // Different from zygote.
          zygote_dirty_objects.insert(obj);
        } else {
          // Just different from iamge.
          image_dirty_objects.insert(obj);
        }

        different_objects++;
        dirty_object_bytes += obj->SizeOf();

        ++class_data[klass].dirty_object_count;

        // Go byte-by-byte and figure out what exactly got dirtied
        size_t dirty_byte_count_per_object = 0;
        for (size_t i = 0; i < obj->SizeOf(); ++i) {
          if (current[i] != current_remote[i]) {
            dirty_byte_count_per_object++;
          }
        }
        class_data[klass].dirty_object_byte_count += dirty_byte_count_per_object;
        class_data[klass].dirty_object_size_in_bytes += obj->SizeOf();
        class_data[klass].dirty_objects.push_back(remote_obj);
      } else {
        ++class_data[klass].clean_object_count;
      }

      std::string descriptor = GetClassDescriptor(klass);
      if (different_image_object) {
        if (klass->IsClassClass()) {
          // this is a "Class"
          mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);

          // print the fields that are dirty
          for (size_t i = 0; i < obj->SizeOf(); ++i) {
            if (current[i] != current_remote[i]) {
              class_field_dirty_count[i]++;
            }
          }

          class_dirty_objects.push_back(obj_as_class);
        } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
          // this is an ArtMethod
          ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);

          // print the fields that are dirty
          for (size_t i = 0; i < obj->SizeOf(); ++i) {
            if (current[i] != current_remote[i]) {
              art_method_field_dirty_count[i]++;
            }
          }

          art_method_dirty_objects.push_back(art_method);
        }
      } else if (on_dirty_page) {
        // This object was either never mutated or got mutated back to the same value.
        // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
        false_dirty_objects.push_back(obj);
        class_data[klass].false_dirty_objects.push_back(obj);
        false_dirty_object_bytes += obj->SizeOf();
        class_data[obj->GetClass()].false_dirty_byte_count += obj->SizeOf();
        class_data[obj->GetClass()].false_dirty_object_count += 1;
      }

      if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
        local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
        remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
      }

      // Unconditionally store the class descriptor in case we need it later
      class_data[klass].descriptor = descriptor;
      current += RoundUp(obj->SizeOf(), kObjectAlignment);
    }

    // Looking at only dirty pages, figure out how many of those bytes belong to dirty objects.
    float true_dirtied_percent = dirty_object_bytes * 1.0f / (dirty_pages * kPageSize);
    size_t false_dirty_pages = dirty_pages - different_pages;

    os << "Mapping at [" << reinterpret_cast<void*>(boot_map.start) << ", "
       << reinterpret_cast<void*>(boot_map.end) << ") had: \n  "
       << different_bytes << " differing bytes, \n  "
       << different_int32s << " differing int32s, \n  "
       << different_objects << " different objects, \n  "
       << dirty_object_bytes << " different object [bytes], \n  "
       << false_dirty_objects.size() << " false dirty objects,\n  "
       << false_dirty_object_bytes << " false dirty object [bytes], \n  "
       << true_dirtied_percent << " different objects-vs-total in a dirty page;\n  "
       << different_pages << " different pages; \n  "
       << dirty_pages << " pages are dirty; \n  "
       << false_dirty_pages << " pages are false dirty; \n  "
       << private_pages << " pages are private; \n  "
       << private_dirty_pages << " pages are Private_Dirty\n  "
       << "";

    // vector of pairs (int count, Class*)
    auto dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
        class_data, [](const ClassData& d) { return d.dirty_object_count; });
    auto clean_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
        class_data, [](const ClassData& d) { return d.clean_object_count; });

    if (!zygote_dirty_objects.empty()) {
      os << "\n" << "  Dirty objects compared to zygote (probably private dirty): "
         << zygote_dirty_objects.size() << "\n";
      for (mirror::Object* obj : zygote_dirty_objects) {
        const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
        ptrdiff_t offset = obj_bytes - begin_image_ptr;
        uint8_t* remote_bytes = &zygote_contents[offset];
        DiffObjectContents(obj, remote_bytes, os);
      }
    }
    os << "\n" << "  Dirty objects compared to image (private or shared dirty): "
       << image_dirty_objects.size() << "\n";
    for (mirror::Object* obj : image_dirty_objects) {
      const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
      ptrdiff_t offset = obj_bytes - begin_image_ptr;
      uint8_t* remote_bytes = &remote_contents[offset];
      DiffObjectContents(obj, remote_bytes, os);
    }

    os << "\n" << "  Dirty object count by class:\n";
    for (const auto& vk_pair : dirty_object_class_values) {
      int dirty_object_count = vk_pair.first;
      mirror::Class* klass = vk_pair.second;
      int object_sizes = class_data[klass].dirty_object_size_in_bytes;
      float avg_dirty_bytes_per_class =
          class_data[klass].dirty_object_byte_count * 1.0f / object_sizes;
      float avg_object_size = object_sizes * 1.0f / dirty_object_count;
      const std::string& descriptor = class_data[klass].descriptor;
      os << "    " << PrettyClass(klass) << " ("
         << "objects: " << dirty_object_count << ", "
         << "avg dirty bytes: " << avg_dirty_bytes_per_class << ", "
         << "avg object size: " << avg_object_size << ", "
         << "class descriptor: '" << descriptor << "'"
         << ")\n";

      constexpr size_t kMaxAddressPrint = 5;
      if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
        os << "      sample object addresses: ";
        for (size_t i = 0; i < art_method_dirty_objects.size() && i < kMaxAddressPrint; ++i) {
          auto art_method = art_method_dirty_objects[i];

          os << reinterpret_cast<void*>(art_method) << ", ";
        }
        os << "\n";

        os << "      dirty byte +offset:count list = ";
        auto art_method_field_dirty_count_sorted =
            SortByValueDesc<off_t, int, int>(art_method_field_dirty_count);
        for (auto pair : art_method_field_dirty_count_sorted) {
          off_t offset = pair.second;
          int count = pair.first;

          os << "+" << offset << ":" << count << ", ";
        }

        os << "\n";

        os << "      field contents:\n";
        const auto& dirty_objects_list = class_data[klass].dirty_objects;
        for (mirror::Object* obj : dirty_objects_list) {
          // remote method
          auto art_method = reinterpret_cast<ArtMethod*>(obj);

          // remote class
          mirror::Class* remote_declaring_class =
            FixUpRemotePointer(art_method->GetDeclaringClass(), remote_contents, boot_map);

          // local class
          mirror::Class* declaring_class =
            RemoteContentsPointerToLocal(remote_declaring_class,
                                         remote_contents,
                                         boot_image_header);

          os << "        " << reinterpret_cast<void*>(obj) << " ";
          os << "  entryPointFromJni: "
             << reinterpret_cast<const void*>(
                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
          os << "  entryPointFromQuickCompiledCode: "
             << reinterpret_cast<const void*>(
                    art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
             << ", ";
          os << "  isNative? " << (art_method->IsNative() ? "yes" : "no") << ", ";
          os << "  class_status (local): " << declaring_class->GetStatus();
          os << "  class_status (remote): " << remote_declaring_class->GetStatus();
          os << "\n";
        }
      }
      if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
        os << "       sample object addresses: ";
        for (size_t i = 0; i < class_dirty_objects.size() && i < kMaxAddressPrint; ++i) {
          auto class_ptr = class_dirty_objects[i];

          os << reinterpret_cast<void*>(class_ptr) << ", ";
        }
        os << "\n";

        os << "       dirty byte +offset:count list = ";
        auto class_field_dirty_count_sorted =
            SortByValueDesc<off_t, int, int>(class_field_dirty_count);
        for (auto pair : class_field_dirty_count_sorted) {
          off_t offset = pair.second;
          int count = pair.first;

          os << "+" << offset << ":" << count << ", ";
        }
        os << "\n";

        os << "      field contents:\n";
        const auto& dirty_objects_list = class_data[klass].dirty_objects;
        for (mirror::Object* obj : dirty_objects_list) {
          // remote class object
          auto remote_klass = reinterpret_cast<mirror::Class*>(obj);

          // local class object
          auto local_klass = RemoteContentsPointerToLocal(remote_klass,
                                                          remote_contents,
                                                          boot_image_header);

          os << "        " << reinterpret_cast<void*>(obj) << " ";
          os << "  class_status (remote): " << remote_klass->GetStatus() << ", ";
          os << "  class_status (local): " << local_klass->GetStatus();
          os << "\n";
        }
      }
    }

    auto false_dirty_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
        class_data, [](const ClassData& d) { return d.false_dirty_object_count; });

    os << "\n" << "  False-dirty object count by class:\n";
    for (const auto& vk_pair : false_dirty_object_class_values) {
      int object_count = vk_pair.first;
      mirror::Class* klass = vk_pair.second;
      int object_sizes = class_data[klass].false_dirty_byte_count;
      float avg_object_size = object_sizes * 1.0f / object_count;
      const std::string& descriptor = class_data[klass].descriptor;
      os << "    " << PrettyClass(klass) << " ("
         << "objects: " << object_count << ", "
         << "avg object size: " << avg_object_size << ", "
         << "total bytes: " << object_sizes << ", "
         << "class descriptor: '" << descriptor << "'"
         << ")\n";

      if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
        auto& art_method_false_dirty_objects = class_data[klass].false_dirty_objects;

        os << "      field contents:\n";
        for (mirror::Object* obj : art_method_false_dirty_objects) {
          // local method
          auto art_method = reinterpret_cast<ArtMethod*>(obj);

          // local class
          mirror::Class* declaring_class = art_method->GetDeclaringClass();

          os << "        " << reinterpret_cast<void*>(obj) << " ";
          os << "  entryPointFromJni: "
             << reinterpret_cast<const void*>(
                    art_method->GetEntryPointFromJniPtrSize(pointer_size)) << ", ";
          os << "  entryPointFromQuickCompiledCode: "
             << reinterpret_cast<const void*>(
                    art_method->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size))
             << ", ";
          os << "  isNative? " << (art_method->IsNative() ? "yes" : "no") << ", ";
          os << "  class_status (local): " << declaring_class->GetStatus();
          os << "\n";
        }
      }
    }

    os << "\n" << "  Clean object count by class:\n";
    for (const auto& vk_pair : clean_object_class_values) {
      os << "    " << PrettyClass(vk_pair.second) << " (" << vk_pair.first << ")\n";
    }

    return true;
  }