mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00
perf trace: Reorganize syscalls
Identify struct syscall information in the syscalls table by a machine type and syscall number, not just system call number. Having the machine type means that 32-bit system calls can be differentiated from 64-bit ones on a machine capable of both. Having a table for all machine types and all system call numbers would be too large, so maintain a sorted array of system calls as they are encountered. Signed-off-by: Ian Rogers <irogers@google.com> Reviewed-by: Howard Chu <howardchu95@gmail.com> Reviewed-by: Charlie Jenkins <charlie@rivosinc.com> Reviewed-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Arnaldo Carvalho de Melo <acme@kernel.org> Link: https://lore.kernel.org/r/20250319050741.269828-5-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
This commit is contained in:
parent
af472d3c44
commit
3d94b8441c
@ -66,6 +66,7 @@
|
||||
#include "syscalltbl.h"
|
||||
#include "../perf.h"
|
||||
#include "trace_augment.h"
|
||||
#include "dwarf-regs.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
@ -86,6 +87,7 @@
|
||||
|
||||
#include <linux/ctype.h>
|
||||
#include <perf/mmap.h>
|
||||
#include <tools/libc_compat.h>
|
||||
|
||||
#ifdef HAVE_LIBTRACEEVENT
|
||||
#include <event-parse.h>
|
||||
@ -149,7 +151,10 @@ struct trace {
|
||||
struct perf_tool tool;
|
||||
struct syscalltbl *sctbl;
|
||||
struct {
|
||||
/** Sorted sycall numbers used by the trace. */
|
||||
struct syscall *table;
|
||||
/** Size of table. */
|
||||
size_t table_size;
|
||||
struct {
|
||||
struct evsel *sys_enter,
|
||||
*sys_exit,
|
||||
@ -1454,22 +1459,37 @@ static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
|
||||
return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias);
|
||||
}
|
||||
|
||||
/*
|
||||
* is_exit: is this "exit" or "exit_group"?
|
||||
* is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
|
||||
* args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc.
|
||||
* nonexistent: Just a hole in the syscall table, syscall id not allocated
|
||||
/**
|
||||
* struct syscall
|
||||
*/
|
||||
struct syscall {
|
||||
/** @e_machine: The ELF machine associated with the entry. */
|
||||
int e_machine;
|
||||
/** @id: id value from the tracepoint, the system call number. */
|
||||
int id;
|
||||
struct tep_event *tp_format;
|
||||
int nr_args;
|
||||
/**
|
||||
* @args_size: sum of the sizes of the syscall arguments, anything
|
||||
* after that is augmented stuff: pathname for openat, etc.
|
||||
*/
|
||||
|
||||
int args_size;
|
||||
struct {
|
||||
struct bpf_program *sys_enter,
|
||||
*sys_exit;
|
||||
} bpf_prog;
|
||||
/** @is_exit: is this "exit" or "exit_group"? */
|
||||
bool is_exit;
|
||||
/**
|
||||
* @is_open: is this "open" or "openat"? To associate the fd returned in
|
||||
* sys_exit with the pathname in sys_enter.
|
||||
*/
|
||||
bool is_open;
|
||||
/**
|
||||
* @nonexistent: Name lookup failed. Just a hole in the syscall table,
|
||||
* syscall id not allocated.
|
||||
*/
|
||||
bool nonexistent;
|
||||
bool use_btf;
|
||||
struct tep_format_field *args;
|
||||
@ -2107,22 +2127,21 @@ static int syscall__set_arg_fmts(struct syscall *sc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int trace__read_syscall_info(struct trace *trace, int id)
|
||||
static int syscall__read_info(struct syscall *sc, struct trace *trace)
|
||||
{
|
||||
char tp_name[128];
|
||||
struct syscall *sc;
|
||||
const char *name = syscalltbl__name(trace->sctbl, id);
|
||||
const char *name;
|
||||
int err;
|
||||
|
||||
if (trace->syscalls.table == NULL) {
|
||||
trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
|
||||
if (trace->syscalls.table == NULL)
|
||||
return -ENOMEM;
|
||||
}
|
||||
sc = trace->syscalls.table + id;
|
||||
if (sc->nonexistent)
|
||||
return -EEXIST;
|
||||
|
||||
if (sc->name) {
|
||||
/* Info already read. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
name = syscalltbl__name(trace->sctbl, sc->id);
|
||||
if (name == NULL) {
|
||||
sc->nonexistent = true;
|
||||
return -EEXIST;
|
||||
@ -2145,15 +2164,16 @@ static int trace__read_syscall_info(struct trace *trace, int id)
|
||||
*/
|
||||
if (IS_ERR(sc->tp_format)) {
|
||||
sc->nonexistent = true;
|
||||
return PTR_ERR(sc->tp_format);
|
||||
err = PTR_ERR(sc->tp_format);
|
||||
sc->tp_format = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* The tracepoint format contains __syscall_nr field, so it's one more
|
||||
* than the actual number of syscall arguments.
|
||||
*/
|
||||
if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
|
||||
RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
|
||||
if (syscall__alloc_arg_fmts(sc, sc->tp_format->format.nr_fields - 1))
|
||||
return -ENOMEM;
|
||||
|
||||
sc->args = sc->tp_format->format.fields;
|
||||
@ -2442,13 +2462,69 @@ next_arg:
|
||||
return printed;
|
||||
}
|
||||
|
||||
static void syscall__init(struct syscall *sc, int e_machine, int id)
|
||||
{
|
||||
memset(sc, 0, sizeof(*sc));
|
||||
sc->e_machine = e_machine;
|
||||
sc->id = id;
|
||||
}
|
||||
|
||||
static void syscall__exit(struct syscall *sc)
|
||||
{
|
||||
if (!sc)
|
||||
return;
|
||||
|
||||
zfree(&sc->arg_fmt);
|
||||
}
|
||||
|
||||
static int syscall__cmp(const void *va, const void *vb)
|
||||
{
|
||||
const struct syscall *a = va, *b = vb;
|
||||
|
||||
if (a->e_machine != b->e_machine)
|
||||
return a->e_machine - b->e_machine;
|
||||
|
||||
return a->id - b->id;
|
||||
}
|
||||
|
||||
static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, int id)
|
||||
{
|
||||
struct syscall key = {
|
||||
.e_machine = e_machine,
|
||||
.id = id,
|
||||
};
|
||||
struct syscall *sc, *tmp;
|
||||
|
||||
if (trace->syscalls.table) {
|
||||
sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
|
||||
sizeof(struct syscall), syscall__cmp);
|
||||
if (sc)
|
||||
return sc;
|
||||
}
|
||||
|
||||
tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1,
|
||||
sizeof(struct syscall));
|
||||
if (!tmp)
|
||||
return NULL;
|
||||
|
||||
trace->syscalls.table = tmp;
|
||||
sc = &trace->syscalls.table[trace->syscalls.table_size++];
|
||||
syscall__init(sc, e_machine, id);
|
||||
qsort(trace->syscalls.table, trace->syscalls.table_size, sizeof(struct syscall),
|
||||
syscall__cmp);
|
||||
sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
|
||||
sizeof(struct syscall), syscall__cmp);
|
||||
return sc;
|
||||
}
|
||||
|
||||
typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample);
|
||||
|
||||
static struct syscall *trace__syscall_info(struct trace *trace,
|
||||
struct evsel *evsel, int id)
|
||||
static struct syscall *trace__syscall_info(struct trace *trace, struct evsel *evsel,
|
||||
int e_machine, int id)
|
||||
{
|
||||
struct syscall *sc;
|
||||
int err = 0;
|
||||
|
||||
if (id < 0) {
|
||||
@ -2473,28 +2549,20 @@ static struct syscall *trace__syscall_info(struct trace *trace,
|
||||
|
||||
err = -EINVAL;
|
||||
|
||||
if (id > trace->sctbl->syscalls.max_id) {
|
||||
goto out_cant_read;
|
||||
}
|
||||
sc = trace__find_syscall(trace, e_machine, id);
|
||||
if (sc)
|
||||
err = syscall__read_info(sc, trace);
|
||||
|
||||
if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
|
||||
(err = trace__read_syscall_info(trace, id)) != 0)
|
||||
goto out_cant_read;
|
||||
|
||||
if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
|
||||
goto out_cant_read;
|
||||
|
||||
return &trace->syscalls.table[id];
|
||||
|
||||
out_cant_read:
|
||||
if (verbose > 0) {
|
||||
if (err && verbose > 0) {
|
||||
char sbuf[STRERR_BUFSIZE];
|
||||
fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf)));
|
||||
if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL)
|
||||
fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
|
||||
|
||||
fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err,
|
||||
str_error_r(-err, sbuf, sizeof(sbuf)));
|
||||
if (sc && sc->name)
|
||||
fprintf(trace->output, "(%s)", sc->name);
|
||||
fputs(" information\n", trace->output);
|
||||
}
|
||||
return NULL;
|
||||
return err ? NULL : sc;
|
||||
}
|
||||
|
||||
struct syscall_stats {
|
||||
@ -2643,14 +2711,6 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void syscall__exit(struct syscall *sc)
|
||||
{
|
||||
if (!sc)
|
||||
return;
|
||||
|
||||
zfree(&sc->arg_fmt);
|
||||
}
|
||||
|
||||
static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
|
||||
union perf_event *event __maybe_unused,
|
||||
struct perf_sample *sample)
|
||||
@ -2662,7 +2722,7 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
|
||||
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
|
||||
int augmented_args_size = 0;
|
||||
void *augmented_args = NULL;
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
|
||||
struct thread_trace *ttrace;
|
||||
|
||||
if (sc == NULL)
|
||||
@ -2736,7 +2796,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
|
||||
struct thread_trace *ttrace;
|
||||
struct thread *thread;
|
||||
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
|
||||
char msg[1024];
|
||||
void *args, *augmented_args = NULL;
|
||||
int augmented_args_size;
|
||||
@ -2811,7 +2871,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
|
||||
struct thread *thread;
|
||||
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
|
||||
int alignment = trace->args_alignment;
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
|
||||
struct thread_trace *ttrace;
|
||||
|
||||
if (sc == NULL)
|
||||
@ -3164,7 +3224,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
|
||||
|
||||
if (evsel == trace->syscalls.events.bpf_output) {
|
||||
int id = perf_evsel__sc_tp_uint(evsel, id, sample);
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
|
||||
|
||||
if (sc) {
|
||||
fprintf(trace->output, "%s(", sc->name);
|
||||
@ -3673,7 +3733,7 @@ out_unaugmented:
|
||||
|
||||
static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
|
||||
{
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
|
||||
|
||||
if (sc == NULL)
|
||||
return;
|
||||
@ -3684,20 +3744,20 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
|
||||
|
||||
static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
|
||||
{
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
|
||||
return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
|
||||
}
|
||||
|
||||
static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
|
||||
{
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, id);
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
|
||||
return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
|
||||
}
|
||||
|
||||
static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array)
|
||||
{
|
||||
struct tep_format_field *field;
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, key);
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key);
|
||||
const struct btf_type *bt;
|
||||
char *struct_offset, *tmp, name[32];
|
||||
bool can_augment = false;
|
||||
@ -3779,13 +3839,14 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigne
|
||||
return -1;
|
||||
}
|
||||
|
||||
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
|
||||
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *_sc)
|
||||
{
|
||||
struct syscall sc = *_sc; /* Copy as trace__syscall_info may invalidate pointer. */
|
||||
struct tep_format_field *field, *candidate_field;
|
||||
/*
|
||||
* We're only interested in syscalls that have a pointer:
|
||||
*/
|
||||
for (field = sc->args; field; field = field->next) {
|
||||
for (field = sc.args; field; field = field->next) {
|
||||
if (field->flags & TEP_FIELD_IS_POINTER)
|
||||
goto try_to_find_pair;
|
||||
}
|
||||
@ -3795,15 +3856,16 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
|
||||
try_to_find_pair:
|
||||
for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
|
||||
int id = syscalltbl__id_at_idx(trace->sctbl, i);
|
||||
struct syscall *pair = trace__syscall_info(trace, NULL, id);
|
||||
/* calling trace__syscall_info() may invalidate '_sc' */
|
||||
struct syscall *pair = trace__syscall_info(trace, NULL, sc.e_machine, id);
|
||||
struct bpf_program *pair_prog;
|
||||
bool is_candidate = false;
|
||||
|
||||
if (pair == NULL || pair == sc ||
|
||||
if (pair == NULL || pair->id == sc.id ||
|
||||
pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
|
||||
continue;
|
||||
|
||||
for (field = sc->args, candidate_field = pair->args;
|
||||
for (field = sc.args, candidate_field = pair->args;
|
||||
field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
|
||||
bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
|
||||
candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
|
||||
@ -3870,7 +3932,7 @@ try_to_find_pair:
|
||||
goto next_candidate;
|
||||
}
|
||||
|
||||
pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
|
||||
pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc.name);
|
||||
return pair_prog;
|
||||
next_candidate:
|
||||
continue;
|
||||
@ -3945,7 +4007,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
|
||||
*/
|
||||
for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
|
||||
int key = syscalltbl__id_at_idx(trace->sctbl, i);
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, key);
|
||||
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key);
|
||||
struct bpf_program *pair_prog;
|
||||
int prog_fd;
|
||||
|
||||
@ -3966,7 +4028,11 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
|
||||
pair_prog = trace__find_usable_bpf_prog_entry(trace, sc);
|
||||
if (pair_prog == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Get syscall info again as find usable entry above might
|
||||
* modify the syscall table and shuffle it.
|
||||
*/
|
||||
sc = trace__syscall_info(trace, NULL, EM_HOST, key);
|
||||
sc->bpf_prog.sys_enter = pair_prog;
|
||||
|
||||
/*
|
||||
@ -4761,7 +4827,10 @@ static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
|
||||
pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
|
||||
avg /= NSEC_PER_MSEC;
|
||||
|
||||
sc = &trace->syscalls.table[entry->syscall];
|
||||
sc = trace__syscall_info(trace, /*evsel=*/NULL, EM_HOST, entry->syscall);
|
||||
if (!sc)
|
||||
continue;
|
||||
|
||||
printed += fprintf(fp, " %-15s", sc->name);
|
||||
printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f",
|
||||
n, stats->nr_failures, entry->msecs, min, avg);
|
||||
@ -5218,12 +5287,10 @@ out:
|
||||
|
||||
static void trace__exit(struct trace *trace)
|
||||
{
|
||||
int i;
|
||||
|
||||
strlist__delete(trace->ev_qualifier);
|
||||
zfree(&trace->ev_qualifier_ids.entries);
|
||||
if (trace->syscalls.table) {
|
||||
for (i = 0; i <= trace->sctbl->syscalls.max_id; i++)
|
||||
for (size_t i = 0; i < trace->syscalls.table_size; i++)
|
||||
syscall__exit(&trace->syscalls.table[i]);
|
||||
zfree(&trace->syscalls.table);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user