profiler/profile.cpp (440 lines of code) (raw):
/* SPDX-License-Identifier: BSD-2-Clause */
/*
* Copyright (c) 2022 LG Electronics
*
* Based on profile(8) from BCC by Brendan Gregg.
* 28-Dec-2021 Eunseon Lee Created this,
* 17-Jul-2022 Yusheng Zheng modified this.
*/
#include <argp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <unistd.h>
#include <time.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include "profile.h"
#include "lua_stacks_map.h"
#include "profile.skel.h"
#include "stack_printer.h"
extern "C"
{
#include "trace_helpers.h"
#include "uprobe_helpers.h"
}
bool exiting = false;
class lua_stack_map lua_bt_map;
#define warn(...) fprintf(stderr, __VA_ARGS__)
struct profile_env env = {
.pid = -1,
.tid = -1,
.stack_storage_size = 8192,
.perf_max_stack_depth = 127,
.duration = 3,
.freq = 1,
.sample_freq = 49,
.cpu = -1,
.frame_depth = 15
};
#define UPROBE_SIZE 3
const char *argp_program_version = "profile 0.1";
const char *argp_program_bug_address =
"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
const char argp_program_doc[] =
"Profile CPU usage by sampling stack traces at a timed interval.\n"
"\n"
"USAGE: profile [OPTIONS...] [duration]\n"
"EXAMPLES:\n"
" profile # profile stack traces at 49 Hertz until Ctrl-C\n"
" profile -F 99 # profile stack traces at 99 Hertz\n"
" profile -c 1000000 # profile stack traces every 1 in a million events\n"
" profile 5 # profile at 49 Hertz for 5 seconds only\n"
" profile -f # output in folded format for flame graphs\n"
" profile -p 185 # only profile process with PID 185\n"
" profile -U # only show user space stacks (no kernel)\n"
" profile -K # only show kernel space stacks (no user)\n";
#define OPT_PERF_MAX_STACK_DEPTH 1 /* --perf-max-stack-depth */
#define OPT_STACK_STORAGE_SIZE 2 /* --stack-storage-size */
#define OPT_LUA_USER_STACK_ONLY 3 /* --lua-user-stacks-only */
#define OPT_DISABLE_LUA_USER_TRACE 4 /* --disable-lua-user-trace */
#define PERF_BUFFER_PAGES 16
#define PERF_POLL_TIMEOUT_MS 100
static const struct argp_option opts[] = {
{"pid", 'p', "PID", 0, "profile process with this PID only"},
{"tid", 'L', "TID", 0, "profile thread with this TID only"},
{"user-stacks-only", 'U', NULL, 0,
"show stacks from user space only (no kernel space stacks)"},
{"kernel-stacks-only", 'K', NULL, 0,
"show stacks from kernel space only (no user space stacks)"},
{"lua-user-stacks-only", OPT_LUA_USER_STACK_ONLY, NULL, 0,
"replace user stacks with lua stack traces (no other user space stacks)"},
{"disable-lua-user-trace", OPT_DISABLE_LUA_USER_TRACE, NULL, 0,
"disable lua user space stack trace"},
{"frequency", 'F', "FREQUENCY", 0, "sample frequency, Hertz"},
{"delimited", 'd', NULL, 0, "insert delimiter between kernel/user stacks"},
{"include-idle ", 'I', NULL, 0, "include CPU idle stacks"},
{"folded", 'f', NULL, 0, "output folded format, one line per stack (for flame graphs)"},
{"stack-storage-size", OPT_STACK_STORAGE_SIZE, "STACK-STORAGE-SIZE", 0,
"the number of unique stack traces that can be stored and displayed (default 1024)"},
{"cpu", 'C', "CPU", 0, "cpu number to run profile on"},
{"perf-max-stack-depth", OPT_PERF_MAX_STACK_DEPTH,
"PERF-MAX-STACK-DEPTH", 0, "the limit for both kernel and user stack traces (default 127)"},
{"max-frame-depth", 'D', "DEPTH", 0, "max frame depth for eBPF to travel in the stack (default 15)"},
{"verbose", 'v', NULL, 0, "Verbose debug output"},
{NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"},
{},
};
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
static int pos_args;
switch (key)
{
case 'h':
argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
break;
case 'v':
env.verbose = true;
break;
case 'p':
errno = 0;
env.pid = strtol(arg, NULL, 10);
if (errno)
{
fprintf(stderr, "invalid PID: %s\n", arg);
argp_usage(state);
}
break;
case 'L':
errno = 0;
env.tid = strtol(arg, NULL, 10);
if (errno || env.tid <= 0)
{
fprintf(stderr, "Invalid TID: %s\n", arg);
argp_usage(state);
}
break;
case 'U':
env.user_stacks_only = true;
break;
case 'K':
env.kernel_stacks_only = true;
break;
case 'F':
errno = 0;
env.sample_freq = strtol(arg, NULL, 10);
if (errno || env.sample_freq <= 0)
{
fprintf(stderr, "invalid FREQUENCY: %s\n", arg);
argp_usage(state);
}
break;
case 'D':
errno = 0;
env.frame_depth = strtol(arg, NULL, 10);
if (errno || env.frame_depth <= 0)
{
fprintf(stderr, "invalid FREQUENCY: %s\n", arg);
argp_usage(state);
}
break;
case 'd':
env.delimiter = true;
break;
case 'I':
env.include_idle = true;
break;
case 'f':
env.folded = true;
break;
case 'C':
errno = 0;
env.cpu = strtol(arg, NULL, 10);
if (errno)
{
fprintf(stderr, "invalid CPU: %s\n", arg);
argp_usage(state);
}
break;
case OPT_PERF_MAX_STACK_DEPTH:
errno = 0;
env.perf_max_stack_depth = strtol(arg, NULL, 10);
if (errno)
{
fprintf(stderr, "invalid perf max stack depth: %s\n", arg);
argp_usage(state);
}
break;
case OPT_STACK_STORAGE_SIZE:
errno = 0;
env.stack_storage_size = strtol(arg, NULL, 10);
if (errno)
{
fprintf(stderr, "invalid stack storage size: %s\n", arg);
argp_usage(state);
}
break;
case OPT_LUA_USER_STACK_ONLY:
env.lua_user_stacks_only = true;
break;
case OPT_DISABLE_LUA_USER_TRACE:
env.disable_lua_user_trace = true;
break;
case ARGP_KEY_ARG:
if (pos_args++)
{
fprintf(stderr,
"Unrecognized positional argument: %s\n", arg);
argp_usage(state);
}
errno = 0;
env.duration = strtol(arg, NULL, 10);
if (errno || env.duration <= 0)
{
fprintf(stderr, "Invalid duration (in s): %s\n", arg);
argp_usage(state);
}
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static int nr_cpus;
static int open_and_attach_perf_event(int freq, struct bpf_program *prog,
struct bpf_link *links[])
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
.sample_freq = env.sample_freq,
.freq = env.freq,
};
int i, fd;
for (i = 0; i < nr_cpus; i++)
{
if (env.cpu != -1 && env.cpu != i)
continue;
fd = syscall(__NR_perf_event_open, &attr, -1, i, -1, 0);
if (fd < 0)
{
/* Ignore CPU that is offline */
if (errno == ENODEV)
continue;
fprintf(stderr, "failed to init perf sampling: %s\n",
strerror(errno));
return -1;
}
links[i] = bpf_program__attach_perf_event(prog, fd);
if (!links[i])
{
fprintf(stderr, "failed to attach perf event on cpu: "
"%d\n",
i);
links[i] = NULL;
close(fd);
return -1;
}
}
return 0;
}
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
if (level == LIBBPF_DEBUG && !env.verbose)
return 0;
return vfprintf(stderr, format, args);
}
static void sig_handler(int sig)
{
exiting = true;
}
static void handle_lua_stack_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
const struct lua_stack_event *e = static_cast<const struct lua_stack_event *>(data);
lua_bt_map.insert_lua_stack_map(e);
}
static void handle_lua_stack_lost_events(void *ctx, int cpu, __u64 lost_cnt)
{
warn("lost %llu events on CPU #%d\n", lost_cnt, cpu);
}
static struct bpf_link *
attach_lua_func(const char *lua_path, const char *func_name, const bpf_program *prog)
{
off_t func_off = get_elf_func_offset(lua_path, func_name);
if (func_off < 0)
{
warn("could not find %s in %s\n", func_name, lua_path);
return NULL;
}
struct bpf_link *link = bpf_program__attach_uprobe(prog, false,
-1, lua_path, func_off);
if (!link)
{
warn("failed to attach %s: %d\n", func_name, -errno);
return NULL;
}
return link;
}
static int attach_lua_uprobes(struct profile_bpf *obj, struct bpf_link *links[])
{
char lua_path[128];
if (env.pid)
{
int res = 0;
res = get_pid_lib_path(env.pid, "luajit-5.1.so", lua_path, sizeof(lua_path));
if (res < 0)
{
fprintf(stderr, "warning: failed to get lib path for pid %d\n", env.pid);
return -1;
}
}
links[0] = attach_lua_func(lua_path, "lua_resume", obj->progs.handle_entry_lua);
if (!links[0])
{
return -1;
}
links[1] = attach_lua_func(lua_path, "lua_pcall", obj->progs.handle_entry_lua);
if (!links[1])
{
return -1;
}
links[2] = attach_lua_func(lua_path, "lua_yield", obj->progs.handle_entry_lua_cancel);
if (!links[2])
{
return -1;
}
return 0;
}
int main(int argc, char **argv)
{
static const struct argp argp = {
.options = opts,
.parser = parse_arg,
.doc = argp_program_doc,
};
struct syms_cache *syms_cache = NULL;
struct ksyms *ksyms = NULL;
struct bpf_link *cpu_links[MAX_CPU_NR] = {};
struct bpf_link *uprobe_links[UPROBE_SIZE] = {};
struct profile_bpf *obj = nullptr;
struct perf_buffer *pb = nullptr;
int err, i;
const char *stack_context = "user + kernel";
char thread_context[64];
char sample_context[64];
err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
if (err)
return err;
if (env.user_stacks_only && env.kernel_stacks_only)
{
fprintf(stderr, "user_stacks_only and kernel_stacks_only cannot be used together.\n");
return 1;
}
libbpf_set_print(libbpf_print_fn);
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
nr_cpus = libbpf_num_possible_cpus();
if (nr_cpus < 0)
{
printf("failed to get # of possible cpus: '%s'!\n",
strerror(-nr_cpus));
return 1;
}
if (nr_cpus > MAX_CPU_NR)
{
fprintf(stderr, "the number of cpu cores is too big, please "
"increase MAX_CPU_NR's value and recompile");
return 1;
}
obj = profile_bpf__open();
if (!obj)
{
fprintf(stderr, "failed to open BPF object\n");
return 1;
}
/* initialize global data (filtering options) */
obj->rodata->targ_pid = env.pid;
obj->rodata->targ_tid = env.tid;
obj->rodata->user_stacks_only = env.user_stacks_only;
obj->rodata->kernel_stacks_only = env.kernel_stacks_only;
obj->rodata->include_idle = env.include_idle;
obj->rodata->frame_depth = env.frame_depth;
bpf_map__set_value_size(obj->maps.stackmap,
env.perf_max_stack_depth * sizeof(unsigned long));
bpf_map__set_max_entries(obj->maps.stackmap, env.stack_storage_size);
err = profile_bpf__load(obj);
if (err)
{
fprintf(stderr, "failed to load BPF programs\n");
fprintf(stderr, "try decrease the max frame depth with -D and rerun with sudo?\n");
goto cleanup;
}
ksyms = ksyms__load();
if (!ksyms)
{
fprintf(stderr, "failed to load kallsyms\n");
goto cleanup;
}
syms_cache = syms_cache__new(0);
if (!syms_cache)
{
fprintf(stderr, "failed to create syms_cache\n");
goto cleanup;
}
err = attach_lua_uprobes(obj, uprobe_links);
if (err < 0)
{
// cannot found lua lib, so skip lua uprobe
env.disable_lua_user_trace = true;
}
pb = perf_buffer__new(bpf_map__fd(obj->maps.lua_event_output), PERF_BUFFER_PAGES,
handle_lua_stack_event, handle_lua_stack_lost_events, NULL, NULL);
if (!pb)
{
err = -errno;
warn("failed to open perf buffer: %d\n", err);
goto cleanup;
}
err = open_and_attach_perf_event(env.freq, obj->progs.do_perf_event, cpu_links);
if (err)
goto cleanup;
signal(SIGINT, sig_handler);
if (env.pid != -1)
snprintf(thread_context, sizeof(thread_context), "PID %d", env.pid);
else if (env.tid != -1)
snprintf(thread_context, sizeof(thread_context), "TID %d", env.tid);
else
snprintf(thread_context, sizeof(thread_context), "all threads");
snprintf(sample_context, sizeof(sample_context), "%d Hertz", env.sample_freq);
if (env.user_stacks_only)
stack_context = "user";
else if (env.kernel_stacks_only)
stack_context = "kernel";
if (!env.folded)
{
printf("Sampling at %s of %s by %s stack", sample_context, thread_context, stack_context);
if (env.cpu != -1)
printf(" on CPU#%d", env.cpu);
if (env.duration < 99999999)
printf(" for %d secs.\n", env.duration);
else
printf("... Hit Ctrl-C to end.\n");
}
/*
* We'll get sleep interrupted when someone presses Ctrl-C (which will
* be "handled" with noop by sig_handler).
*/
while (!exiting)
{
// print perf event to get stack trace
err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
if (err < 0 && err != -EINTR)
{
warn("error polling perf buffer: %s\n", strerror(-err));
goto cleanup;
}
/* reset err to return 0 if exiting */
err = 0;
}
print_stack_trace(ksyms, syms_cache, obj);
cleanup:
if (env.cpu != -1)
bpf_link__destroy(cpu_links[env.cpu]);
else
{
for (i = 0; i < nr_cpus; i++)
bpf_link__destroy(cpu_links[i]);
}
for (i = 0; i < UPROBE_SIZE; i++)
bpf_link__destroy(uprobe_links[i]);
profile_bpf__destroy(obj);
perf_buffer__free(pb);
syms_cache__free(syms_cache);
ksyms__free(ksyms);
return err != 0;
}