memory_analyzer/analysis_utils.py (163 lines of code) (raw):
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import errno
import io
import os
import pickle
import select
import stat
import subprocess
import sys
from contextlib import contextmanager
from shutil import copyfile
from typing import List
from attr import dataclass
from jinja2 import Environment, FileSystemLoader
from pympler import summary
from .frontend import frontend_utils
@dataclass
class RetrievedObjects:
pid: int
title: str
data: List[List[int]]
class GDBObject:
def __init__(self, pid, current_path, executable, template_out_path):
"""
Args:
pid: numeric pid of the target
current_path: the directory containing gdb_commands.py
executable: the binary passed as the first arg to gdb
template_out_path: the location that analysis.py rendered templates
end up in.
"""
self.pid = pid
self.fifo = f"/tmp/memanz_pipe_{self.pid}"
self.current_path = current_path
# These should all be the same, so safe for threads.
os.putenv("MEMORY_ANALYZER_TEMPLATES_PATH", template_out_path)
self.executable = executable
def run_analysis(self, debug=False):
self.create_pipe()
frontend_utils.echo_info(f"Analyzing pid {self.pid}")
command_file = f"{self.current_path}/gdb_commands.py"
command = [
"gdb",
"-q",
# Activates python for GDB.
self.executable,
"-p",
f"{self.pid}",
"-ex",
"set trace-commands on",
f"{'-batch' if debug else '-batch-silent'}",
# This shouldn't be required since we specify absolute path, but
# TODO this gives us a way to inject a path with objgraph on it.
"-ex",
# Lets gdb find the correct gdb_commands script.
f"set directories {self.current_path}",
"-ex",
# Sets the correct path for gdb_commands, else C-API commands fail.
f'py sys.path.append("{self.current_path}")',
"-x",
f"{command_file}",
]
frontend_utils.echo_info(f"Setting up GDB for pid {self.pid}")
proc = subprocess.Popen(
command, stderr=sys.stderr if debug else subprocess.DEVNULL
)
with self.drain_pipe(proc) as data:
retrieved_objs = RetrievedObjects(
pid=self.pid,
title=f"Analysis for {self.pid}",
data=self.unpickle_pipe(data),
)
self._end_subprocess(proc)
return retrieved_objs
@contextmanager
def drain_pipe(self, process):
"""
We need this because by default, `open`s on named pipes block. If GDB or
the injected GDB extension in Python crash, the process will never write
to the pipe and we will block opening and `memory_analyzer` won't exit.
"""
try:
pipe = os.open(self.fifo, os.O_RDONLY | os.O_NONBLOCK)
result = io.BytesIO()
timeout = 0.1 # seconds
partial_read = None
while bool(partial_read) or process.poll() is None:
ready_fds, _, _ = select.select([pipe], [], [], timeout)
if len(ready_fds) > 0:
ready_fd = ready_fds[0]
try:
partial_read = os.read(ready_fd, select.PIPE_BUF)
except BlockingIOError:
partial_read = None
if partial_read:
result.write(partial_read)
result.seek(0)
yield result
result.close()
except Exception as e:
frontend_utils.echo_error(f"Failed with {e}")
self._end_subprocess(process)
sys.exit(1)
finally:
os.close(pipe)
def _end_subprocess(self, proc):
try:
proc.wait(5)
except TimeoutError:
proc.kill()
def create_pipe(self):
try:
os.mkfifo(self.fifo)
os.chmod(str(self.fifo), 0o666)
except OSError as oe:
if oe.errno != errno.EEXIST:
raise
def unpickle_pipe(self, fifo):
frontend_utils.echo_info("Gathering data...")
try:
items = pickle.load(fifo)
if items:
if isinstance(items, Exception):
raise items
return items
except EOFError:
return
except pickle.UnpicklingError as e:
frontend_utils.echo_error(f"Error retrieving data from process: {e}")
raise
except Exception as e:
frontend_utils.echo_error(
f"{type(e).__name__} occurred during analysis: {e}"
)
raise
def load_template(name, templates_path):
env = Environment(autoescape=False, loader=FileSystemLoader(templates_path))
return env.get_template(name)
def render_template(
template_name,
templates_path,
num_refs,
pid,
specific_refs,
output_path,
template_out_dir,
):
objgraph_template = load_template(template_name, templates_path)
template = objgraph_template.render(
num_refs=num_refs, pid=pid, specific_refs=specific_refs, output_path=output_path
)
# This path has to match the end of gdb_commands.py; the env var is set in
# GDBObject constructor above.
if template_out_dir:
with open(
os.path.join(template_out_dir, f"rendered_template-{pid}.py.out"), "w"
) as f:
f.write(template)
return template
def snapshot_diff(cur_items, snapshot_file):
"""
Attempts to compare like PIDs. If like PIDS can't be found it will just compare
the first PID listed to the first PID in the file. Any unmatched or non-first
PIDs will be ignored because we don't know what to compare them to.
"""
try:
prev_items = list(frontend_utils.get_pages(snapshot_file))
except pickle.UnpicklingError as e:
frontend_utils.echo_error(
f"Error unpickling the data from {snapshot_file}: {e}"
)
return None
differences = []
for cur_item in cur_items:
for prev_item in prev_items:
if cur_item.pid == prev_item.pid:
diff = summary.get_diff(cur_item.data, prev_item.data)
differences.append(
RetrievedObjects(
pid=cur_item.pid,
title=f"Snapshot Differences for {cur_item.pid}",
data=diff,
)
)
if not differences:
diff = summary.get_diff(cur_items[0].data, prev_items[0].data)
differences.append(
RetrievedObjects(pid=0, title=f"Snapshot Differences", data=diff)
)
return differences