* [PATCH 8/9] perf, bpf: enable annotation of bpf program
From: Song Liu @ 2019-02-09 1:21 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209012142.2177204-1-songliubraving@fb.com>
This patch enables the annotation of bpf program.
A new dso type DSO_BINARY_TYPE__BPF_PROG_INFO is introduced to for BPF
programs. In symbol__disassemble(), DSO_BINARY_TYPE__BPF_PROG_INFO dso
calls into a new function symbol__disassemble_bpf(), where annotation
line information is filled based bpf_prog_info and btf saved in given
perf_env.
symbol__disassemble_bpf() uses libbfd to disassemble bpf programs.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/Makefile.config | 2 +-
tools/perf/util/annotate.c | 149 +++++++++++++++++++++++++++++++++++-
tools/perf/util/bpf-event.c | 48 ++++++++++++
tools/perf/util/bpf-event.h | 4 +
tools/perf/util/dso.c | 1 +
tools/perf/util/dso.h | 33 +++++---
tools/perf/util/symbol.c | 1 +
7 files changed, 225 insertions(+), 13 deletions(-)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b441c88cafa1..e0bafbc273af 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -701,7 +701,7 @@ else
endif
ifeq ($(feature-libbfd), 1)
- EXTLIBS += -lbfd
+ EXTLIBS += -lbfd -lopcodes
else
# we are on a system that requires -liberty and (maybe) -lz
# to link against -lbfd; test each case individually here
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 70de8f6b3aee..078017d31ca9 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -22,6 +22,7 @@
#include "annotate.h"
#include "evsel.h"
#include "evlist.h"
+#include "bpf-event.h"
#include "block-range.h"
#include "string2.h"
#include "arch/common.h"
@@ -29,6 +30,9 @@
#include <pthread.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <bpf/libbpf.h>
/* FIXME: For the HE_COLORSET */
#include "ui/browser.h"
@@ -1672,6 +1676,147 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
return 0;
}
+static void get_exec_path(char *tpath, size_t size)
+{
+ const char *path = "/proc/self/exe";
+ ssize_t len;
+
+ len = readlink(path, tpath, size - 1);
+ assert(len > 0);
+ tpath[len] = 0;
+}
+
+static int symbol__disassemble_bpf(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotation_options *opts = args->options;
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_linfo *prog_linfo = NULL;
+ struct bpf_prog_info_node *info_node;
+ int len = sym->end - sym->start;
+ disassembler_ftype disassemble;
+ struct map *map = args->ms.map;
+ struct disassemble_info info;
+ struct dso *dso = map->dso;
+ int pc = 0, count, sub_id;
+ struct btf *btf = NULL;
+ char tpath[PATH_MAX];
+ size_t buf_size;
+ int nr_skip = 0;
+ __u64 arrays;
+ char *buf;
+ bfd *bfdf;
+ FILE *s;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
+ return -1;
+
+ pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__,
+ sym->name, sym->start, sym->end - sym->start);
+
+ memset(tpath, 0, sizeof(tpath));
+ get_exec_path(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ assert(bfdf);
+ assert(bfd_check_format(bfdf, bfd_object));
+
+ s = open_memstream(&buf, &buf_size);
+ init_disassemble_info(&info, s,
+ (fprintf_ftype) fprintf);
+
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+
+ arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
+ arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+
+ info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
+ dso->bpf_prog.id);
+ if (!info_node)
+ return -1;
+ info_linear = info_node->info_linear;
+ sub_id = dso->bpf_prog.sub_id;
+
+ info.buffer = (void *)(info_linear->info.jited_prog_insns);
+ info.buffer_length = info_linear->info.jited_prog_len;
+
+ if (info_linear->info.nr_line_info)
+ prog_linfo = bpf_prog_linfo__new(&info_linear->info);
+ prog_linfo = prog_linfo;
+
+ if (info_linear->info.btf_id) {
+ struct btf_node *node;
+
+ node = perf_env__find_btf(dso->bpf_prog.env,
+ info_linear->info.btf_id);
+ if (node)
+ btf = btf__new((__u8 *)(node->data),
+ node->data_size);
+ }
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ assert(disassemble);
+
+ fflush(s);
+ do {
+ const struct bpf_line_info *linfo = NULL;
+ struct disasm_line *dl;
+ size_t prev_buf_size;
+ const char *srcline;
+ u64 addr;
+
+ addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id];
+ count = disassemble(pc, &info);
+
+ linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, addr, sub_id,
+ nr_skip);
+
+ if (linfo) {
+ srcline = btf__name_by_offset(btf, linfo->line_off);
+ nr_skip++;
+ } else
+ srcline = NULL;
+
+ fprintf(s, "\n");
+ prev_buf_size = buf_size;
+ fflush(s);
+
+ if (!opts->hide_src_code && srcline) {
+ args->offset = -1;
+ args->line = strdup(srcline);
+ args->line_nr = 0;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ annotation_line__add(&dl->al, ¬es->src->source);
+ }
+
+ args->offset = pc;
+ args->line = buf + prev_buf_size;
+ args->line_nr = 0;
+ args->ms.sym = sym;
+ dl = disasm_line__new(args);
+ annotation_line__add(&dl->al, ¬es->src->source);
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ bfd_close(bfdf);
+ return 0;
+}
+
static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
{
struct annotation_options *opts = args->options;
@@ -1699,7 +1844,9 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
pr_debug("annotating [%p] %30s : [%p] %30s\n",
dso, dso->long_name, sym, sym->name);
- if (dso__is_kcore(dso)) {
+ if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
+ return symbol__disassemble_bpf(sym, args);
+ } else if (dso__is_kcore(dso)) {
kce.kcore_filename = symfs_filename;
kce.addr = map__rip_2objdump(map, sym->start);
kce.offs = sym->start;
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 37a5b8134e00..4f347d61ed96 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -21,12 +21,60 @@ static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
return ret;
}
+static int machine__process_bpf_event_load(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info_node *info_node;
+ struct perf_env *env = machine->env;
+ int id = event->bpf_event.id;
+ unsigned int i;
+
+ /* perf-record, no need to handle bpf-event */
+ if (env == NULL)
+ return 0;
+
+ info_node = perf_env__find_bpf_prog_info(env, id);
+ if (!info_node)
+ return 0;
+ info_linear = info_node->info_linear;
+
+ for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) {
+ u64 *addrs = (u64 *)(info_linear->info.jited_ksyms);
+ u64 addr = addrs[i];
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, addr);
+
+ if (map) {
+ map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO;
+ map->dso->bpf_prog.id = id;
+ map->dso->bpf_prog.sub_id = i;
+ map->dso->bpf_prog.env = env;
+ }
+ }
+ return 0;
+}
+
int machine__process_bpf_event(struct machine *machine __maybe_unused,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
{
if (dump_trace)
perf_event__fprintf_bpf_event(event, stdout);
+
+ switch (event->bpf_event.type) {
+ case PERF_BPF_EVENT_PROG_LOAD:
+ return machine__process_bpf_event_load(machine, event, sample);
+
+ case PERF_BPF_EVENT_PROG_UNLOAD:
+ break;
+ default:
+ pr_debug("unexpected bpf_event type of %d\n",
+ event->bpf_event.type);
+ break;
+ }
return 0;
}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 60ce24e4e5c6..c4f0f1395ea5 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -2,6 +2,10 @@
#ifndef __PERF_BPF_EVENT_H
#define __PERF_BPF_EVENT_H
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
#include <linux/compiler.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 62c8cf622607..1798192bf0f9 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -181,6 +181,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__KALLSYMS:
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
case DSO_BINARY_TYPE__NOT_FOUND:
ret = -1;
break;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 8c8a7abe809d..f20d319463f1 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -14,6 +14,8 @@
#include "namespaces.h"
#include "build-id.h"
+struct perf_env;
+
enum dso_binary_type {
DSO_BINARY_TYPE__KALLSYMS = 0,
DSO_BINARY_TYPE__GUEST_KALLSYMS,
@@ -34,6 +36,7 @@ enum dso_binary_type {
DSO_BINARY_TYPE__KCORE,
DSO_BINARY_TYPE__GUEST_KCORE,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__BPF_PROG_INFO,
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -177,17 +180,25 @@ struct dso {
struct auxtrace_cache *auxtrace_cache;
int comp;
- /* dso data file */
- struct {
- struct rb_root cache;
- int fd;
- int status;
- u32 status_seen;
- size_t file_size;
- struct list_head open_entry;
- u64 debug_frame_offset;
- u64 eh_frame_hdr_offset;
- } data;
+ union {
+ /* dso data file */
+ struct {
+ struct rb_root cache;
+ int fd;
+ int status;
+ u32 status_seen;
+ size_t file_size;
+ struct list_head open_entry;
+ u64 debug_frame_offset;
+ u64 eh_frame_hdr_offset;
+ } data;
+ /* bpf prog information */
+ struct {
+ u32 id;
+ u32 sub_id;
+ struct perf_env *env;
+ } bpf_prog;
+ };
union { /* Tool specific area */
void *priv;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 48efad6d0f90..33ae59e89da2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1441,6 +1441,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
return true;
+ case DSO_BINARY_TYPE__BPF_PROG_INFO:
case DSO_BINARY_TYPE__NOT_FOUND:
default:
return false;
--
2.17.1
^ permalink raw reply related
* [PATCH 7/9] perf-top: add option --bpf-event
From: Song Liu @ 2019-02-09 1:21 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209012142.2177204-1-songliubraving@fb.com>
bpf events are only tracked when opts->bpf_event is enabled. This patch
adds command line flag to enable this for perf-top.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/builtin-top.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 27d8d42e0a4d..5271d7211b9c 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1492,6 +1492,7 @@ int cmd_top(int argc, const char **argv)
"Display raw encoding of assembly instructions (default)"),
OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
"Enable kernel symbol demangling"),
+ OPT_BOOLEAN(0, "bpf-event", &opts->bpf_event, "record bpf events"),
OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
"objdump binary to use for disassembly and annotations"),
OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
--
2.17.1
^ permalink raw reply related
* [PATCH 6/9] perf, bpf: save btf in a rbtree in perf_env
From: Song Liu @ 2019-02-09 1:21 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
btf contains information necessary to annotate bpf programs. This patch
saves btf for bpf programs loaded in the system.
perf-record saves btf information as headers to perf.data. A new header
type HEADER_BTF is introduced for this data.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/util/bpf-event.c | 22 +++++++++
tools/perf/util/bpf-event.h | 7 +++
tools/perf/util/env.c | 65 ++++++++++++++++++++++++
tools/perf/util/env.h | 3 ++
| 99 ++++++++++++++++++++++++++++++++++++-
| 1 +
6 files changed, 196 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index ead599bc4f4e..37a5b8134e00 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -30,6 +30,27 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
return 0;
}
+static int perf_fetch_btf(struct perf_env *env, u32 btf_id, struct btf *btf)
+{
+ struct btf_node *node;
+ u32 data_size;
+ const void *data;
+
+ data = btf__get_raw_data(btf, &data_size);
+
+ node = malloc(data_size + sizeof(struct btf_node));
+
+ if (!node)
+ return -1;
+
+ node->id = btf_id;
+ node->data_size = data_size;
+ memcpy(node->data, data, data_size);
+
+ perf_env__insert_btf(env, node);
+ return 0;
+}
+
/*
* Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
* program. One PERF_RECORD_BPF_EVENT is generated for the program. And
@@ -109,6 +130,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
goto out;
}
has_btf = true;
+ perf_fetch_btf(env, info->btf_id, btf);
}
/* Synthesize PERF_RECORD_KSYMBOL */
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 11e6730b6105..60ce24e4e5c6 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -20,6 +20,13 @@ struct bpf_prog_info_node {
struct rb_node rb_node;
};
+struct btf_node {
+ struct rb_node rb_node;
+ u32 id;
+ u32 data_size;
+ char data[];
+};
+
#ifdef HAVE_LIBBPF_SUPPORT
int machine__process_bpf_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 665b6fe3c7b2..6f9e3d4b94bc 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -61,6 +61,57 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
return node;
}
+void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+{
+ struct rb_node *parent = NULL;
+ __u32 btf_id = btf_node->id;
+ struct btf_node *node;
+ struct rb_node **p;
+
+ down_write(&env->bpf_info_lock);
+ p = &env->btfs.rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ node = rb_entry(parent, struct btf_node, rb_node);
+ if (btf_id < node->id) {
+ p = &(*p)->rb_left;
+ } else if (btf_id > node->id) {
+ p = &(*p)->rb_right;
+ } else {
+ pr_debug("duplicated btf %u\n", btf_id);
+ up_write(&env->bpf_info_lock);
+ return;
+ }
+ }
+
+ rb_link_node(&btf_node->rb_node, parent, p);
+ rb_insert_color(&btf_node->rb_node, &env->btfs);
+ up_write(&env->bpf_info_lock);
+}
+
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
+{
+ struct btf_node *node = NULL;
+ struct rb_node *n;
+
+ down_read(&env->bpf_info_lock);
+ n = env->btfs.rb_node;
+
+ while (n) {
+ node = rb_entry(n, struct btf_node, rb_node);
+ if (btf_id < node->id)
+ n = n->rb_left;
+ else if (btf_id > node->id)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ up_read(&env->bpf_info_lock);
+ return node;
+}
+
/* purge data in bpf_prog_infos tree */
static void purge_bpf_info(struct perf_env *env)
{
@@ -80,6 +131,19 @@ static void purge_bpf_info(struct perf_env *env)
rb_erase_init(&node->rb_node, root);
free(node);
}
+
+ root = &env->btfs;
+ next = rb_first(root);
+
+ while (next) {
+ struct btf_node *node;
+
+ node = rb_entry(next, struct btf_node, rb_node);
+ next = rb_next(&node->rb_node);
+ rb_erase_init(&node->rb_node, root);
+ free(node);
+ }
+
up_write(&env->bpf_info_lock);
}
@@ -117,6 +181,7 @@ void perf_env__exit(struct perf_env *env)
static void init_bpf_rb_trees(struct perf_env *env)
{
env->bpf_prog_infos = RB_ROOT;
+ env->btfs = RB_ROOT;
init_rwsem(&env->bpf_info_lock);
}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 5894a177b7cf..40b6e9413c22 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -69,6 +69,7 @@ struct perf_env {
u64 clockid_res_ns;
struct rw_semaphore bpf_info_lock;
struct rb_root bpf_prog_infos;
+ struct rb_root btfs;
};
extern struct perf_env perf_env;
@@ -89,4 +90,6 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
__u32 prog_id);
+void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
#endif /* __PERF_ENV_H */
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 2ae76a9d06f6..3f1562afe8e5 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1125,6 +1125,45 @@ static int write_bpf_prog_info(struct feat_fd *ff,
return ret;
}
+static int write_btf(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+ u32 count = 0;
+ int ret;
+
+ down_read(&env->bpf_info_lock);
+
+ root = &env->btfs;
+ next = rb_first(root);
+ while (next) {
+ ++count;
+ next = rb_next(next);
+ }
+
+ ret = do_write(ff, &count, sizeof(count));
+
+ if (ret < 0)
+ goto out;
+
+ next = rb_first(root);
+ while (next) {
+ struct btf_node *node;
+
+ node = rb_entry(next, struct btf_node, rb_node);
+ next = rb_next(&node->rb_node);
+ ret = do_write(ff, node,
+ sizeof(struct btf_node) + node->data_size);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ up_read(&env->bpf_info_lock);
+ return ret;
+}
+
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1628,6 +1667,28 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
up_read(&env->bpf_info_lock);
}
+static void print_btf(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+
+ down_read(&env->bpf_info_lock);
+
+ root = &env->btfs;
+ next = rb_first(root);
+
+ while (next) {
+ struct btf_node *node;
+
+ node = rb_entry(next, struct btf_node, rb_node);
+ next = rb_next(&node->rb_node);
+ fprintf(fp, "# bpf_prog_info of id %u\n", node->id);
+ }
+
+ up_read(&env->bpf_info_lock);
+}
+
static void free_event_desc(struct perf_evsel *events)
{
struct perf_evsel *evsel;
@@ -2723,6 +2784,41 @@ static int process_bpf_prog_info(struct feat_fd *ff,
return err;
}
+static int process_btf(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+ u32 count, i;
+
+ if (do_read_u32(ff, &count))
+ return -1;
+
+ down_write(&env->bpf_info_lock);
+
+ for (i = 0; i < count; ++i) {
+ struct btf_node btf_node;
+ struct btf_node *node;
+
+ if (__do_read(ff, &btf_node, sizeof(struct btf_node)))
+ return -1;
+
+ node = malloc(sizeof(struct btf_node) + btf_node.data_size);
+ if (!node)
+ return -1;
+
+ node->id = btf_node.id;
+ node->data_size = btf_node.data_size;
+
+ if (__do_read(ff, node->data, btf_node.data_size)) {
+ free(node);
+ return -1;
+ }
+ perf_env__insert_btf(env, node);
+ }
+
+ up_write(&env->bpf_info_lock);
+ return 0;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2783,7 +2879,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
FEAT_OPR(CLOCKID, clockid, false),
- FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false)
+ FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
+ FEAT_OPR(BTF, btf, false)
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 0785c91b4c3a..ba51d8e43c53 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -40,6 +40,7 @@ enum {
HEADER_MEM_TOPOLOGY,
HEADER_CLOCKID,
HEADER_BPF_PROG_INFO,
+ HEADER_BTF,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
--
2.17.1
^ permalink raw reply related
* [PATCH 9/9] perf, bpf: save information about short living bpf programs
From: Song Liu @ 2019-02-09 1:21 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209012142.2177204-1-songliubraving@fb.com>
To annotate bpf programs in perf, it is necessary to save information in
bpf_prog_info and btf. For short living bpf program, it is necessary to
save these information before it is unloaded.
This patch saves these information in a separate thread. This thread
creates its own evlist, that only tracks bpf events. This evlists uses
ring buffer with very low watermark for lower latency. When bpf load
events are received, this thread tries to gather information via sys_bpf
and save it in perf_env.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/builtin-record.c | 13 ++++
tools/perf/builtin-top.c | 12 ++++
tools/perf/util/bpf-event.c | 126 ++++++++++++++++++++++++++++++++++++
tools/perf/util/bpf-event.h | 22 +++++++
tools/perf/util/evlist.c | 20 ++++++
tools/perf/util/evlist.h | 2 +
6 files changed, 195 insertions(+)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2355e0a9eda0..46abb44aaaab 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1106,6 +1106,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct perf_data *data = &rec->data;
struct perf_session *session;
bool disabled = false, draining = false;
+ struct bpf_event_poll_args poll_args;
+ bool bpf_thread_running = false;
int fd;
atexit(record__sig_exit);
@@ -1206,6 +1208,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}
+ if (rec->opts.bpf_event) {
+ poll_args.env = &session->header.env;
+ poll_args.target = &rec->opts.target;
+ poll_args.done = &done;
+ if (bpf_event__start_polling_thread(&poll_args) == 0)
+ bpf_thread_running = true;
+ }
+
err = record__synthesize(rec, false);
if (err < 0)
goto out_child;
@@ -1456,6 +1466,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
out_delete_session:
perf_session__delete(session);
+
+ if (bpf_thread_running)
+ bpf_event__stop_polling_thread(&poll_args);
return status;
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5271d7211b9c..2586ee081967 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1524,10 +1524,12 @@ int cmd_top(int argc, const char **argv)
"number of thread to run event synthesize"),
OPT_END()
};
+ struct bpf_event_poll_args poll_args;
const char * const top_usage[] = {
"perf top [<options>]",
NULL
};
+ bool bpf_thread_running = false;
int status = hists__init();
if (status < 0)
@@ -1652,8 +1654,18 @@ int cmd_top(int argc, const char **argv)
signal(SIGWINCH, winch_sig);
}
+ if (top.record_opts.bpf_event) {
+ poll_args.env = &perf_env;
+ poll_args.target = target;
+ poll_args.done = &done;
+ if (bpf_event__start_polling_thread(&poll_args) == 0)
+ bpf_thread_running = true;
+ }
status = __cmd_top(&top);
+ if (bpf_thread_running)
+ bpf_event__stop_polling_thread(&poll_args);
+
out_delete_evlist:
perf_evlist__delete(top.evlist);
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 4f347d61ed96..23a3b0605de7 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -8,6 +8,7 @@
#include "machine.h"
#include "env.h"
#include "session.h"
+#include "evlist.h"
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
@@ -316,3 +317,128 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
free(event);
return err;
}
+
+static void perf_env_add_bpf_info(struct perf_env *env, u32 id)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info_node *info_node;
+ struct btf *btf;
+ u64 arrays;
+ u32 btf_id;
+ int fd;
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0)
+ return;
+
+ arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
+ arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
+ arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
+
+ info_linear = bpf_program__get_prog_info_linear(fd, arrays);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+ close(fd);
+ return;
+ }
+
+ btf_id = info_linear->info.btf_id;
+
+ info_node = malloc(sizeof(struct bpf_prog_info_node));
+ if (info_node) {
+ info_node->info_linear = info_linear;
+ perf_env__insert_bpf_prog_info(env, info_node);
+ } else
+ free(info_linear);
+
+ if (btf__get_from_id(btf_id, &btf)) {
+ pr_debug("%s: failed to get BTF of id %u, aborting\n",
+ __func__, btf_id);
+ close(fd);
+ return;
+ }
+ perf_fetch_btf(env, btf_id, btf);
+
+ close(fd);
+}
+
+static void *bpf_poll_thread(void *arg)
+{
+ struct bpf_event_poll_args *args = arg;
+ int i;
+
+ while (!*(args->done)) {
+ perf_evlist__poll(args->evlist, 1000);
+
+ for (i = 0; i < args->evlist->nr_mmaps; i++) {
+ struct perf_mmap *map = &args->evlist->mmap[i];
+ union perf_event *event;
+
+ if (perf_mmap__read_init(map))
+ continue;
+ while ((event = perf_mmap__read_event(map)) != NULL) {
+ pr_debug("processing vip event of type %d\n",
+ event->header.type);
+ switch (event->header.type) {
+ case PERF_RECORD_BPF_EVENT:
+ if (event->bpf_event.type != PERF_BPF_EVENT_PROG_LOAD)
+ break;
+ perf_env_add_bpf_info(args->env, event->bpf_event.id);
+ break;
+ default:
+ break;
+ }
+ perf_mmap__consume(map);
+ }
+ perf_mmap__read_done(map);
+ }
+ }
+ return NULL;
+}
+
+pthread_t poll_thread;
+
+int bpf_event__start_polling_thread(struct bpf_event_poll_args *args)
+{
+ struct perf_evsel *counter;
+
+ args->evlist = perf_evlist__new();
+
+ if (args->evlist == NULL)
+ return -1;
+
+ if (perf_evlist__create_maps(args->evlist, args->target))
+ return -1;
+
+ if (perf_evlist__add_bpf_tracker(args->evlist))
+ return -1;
+
+ evlist__for_each_entry(args->evlist, counter) {
+ if (perf_evsel__open(counter, args->evlist->cpus,
+ args->evlist->threads) < 0)
+ return -1;
+ }
+
+ if (perf_evlist__mmap(args->evlist, UINT_MAX))
+ return -1;
+
+ evlist__for_each_entry(args->evlist, counter) {
+ if (perf_evsel__enable(counter))
+ return -1;
+ }
+
+ if (pthread_create(&poll_thread, NULL, bpf_poll_thread, args))
+ return -1;
+
+ return 0;
+}
+
+void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args)
+{
+ pthread_join(poll_thread, NULL);
+ perf_evlist__exit(args->evlist);
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index c4f0f1395ea5..61914827c1e3 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -12,12 +12,17 @@
#include <bpf/libbpf.h>
#include <linux/btf.h>
#include <linux/rbtree.h>
+#include <pthread.h>
+#include <api/fd/array.h>
#include "event.h"
struct machine;
union perf_event;
+struct perf_env;
struct perf_sample;
struct record_opts;
+struct evlist;
+struct target;
struct bpf_prog_info_node {
struct bpf_prog_info_linear *info_linear;
@@ -31,6 +36,13 @@ struct btf_node {
char data[];
};
+struct bpf_event_poll_args {
+ struct perf_env *env;
+ struct perf_evlist *evlist;
+ struct target *target;
+ volatile int *done;
+};
+
#ifdef HAVE_LIBBPF_SUPPORT
int machine__process_bpf_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
@@ -39,6 +51,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
struct record_opts *opts);
+int bpf_event__start_polling_thread(struct bpf_event_poll_args *args);
+void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args);
#else
static inline int machine__process_bpf_event(struct machine *machine __maybe_unused,
union perf_event *event __maybe_unused,
@@ -54,5 +68,13 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
{
return 0;
}
+
+static inline int bpf_event__start_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
+{
+ return 0;
+}
+void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
+{
+}
#endif // HAVE_LIBBPF_SUPPORT
#endif
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8c902276d4b4..612c079579ce 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -271,6 +271,26 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist)
return 0;
}
+int perf_evlist__add_bpf_tracker(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ .watermark = 1,
+ .bpf_event = 1,
+ .wakeup_watermark = 1,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ struct perf_evsel *evsel = perf_evsel__new_idx(&attr,
+ evlist->nr_entries);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
static int perf_evlist__add_attrs(struct perf_evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs)
{
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 868294491194..a2d22715188e 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -84,6 +84,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
int perf_evlist__add_dummy(struct perf_evlist *evlist);
+int perf_evlist__add_bpf_tracker(struct perf_evlist *evlist);
+
int perf_evlist__add_newtp(struct perf_evlist *evlist,
const char *sys, const char *name, void *handler);
--
2.17.1
^ permalink raw reply related
* [PATCH 2/9] bpf: libbpf: introduce bpf_program__get_prog_info_linear()
From: Song Liu @ 2019-02-09 1:16 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209011705.2160185-1-songliubraving@fb.com>
Currently, bpf_prog_info includes 9 arrays. The user has the option to
fetch any combination of these arrays. However, this requires a lot of
handling of these arrays. This work becomes more tricky when we need to
store bpf_prog_info to a file, because these arrays are allocated
independently.
This patch introduces struct bpf_prog_info_linear, which stores arrays
of bpf_prog_info in continues memory. Helper functions are introduced
to unify the work to get different information of bpf_prog_info.
Specifically, bpf_program__get_prog_info_linear() allows the user to
select which arrays to fetch, and handles details for the user.
Plesae see the comments before enum bpf_prog_info_array for more details
and examples.
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/lib/bpf/libbpf.c | 251 +++++++++++++++++++++++++++++++++++++++
tools/lib/bpf/libbpf.h | 63 ++++++++++
tools/lib/bpf/libbpf.map | 3 +
3 files changed, 317 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e3c39edfb9d3..1f808c555747 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -113,6 +113,11 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
#endif
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
struct bpf_capabilities {
/* v4.14: kernel support for program & map names. */
__u32 name:1;
@@ -2979,3 +2984,249 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
ring_buffer_write_tail(header, data_tail);
return ret;
}
+
+struct bpf_prog_info_array_desc {
+ int array_offset; /* e.g. offset of jited_prog_insns */
+ int count_offset; /* e.g. offset of jited_prog_len */
+ int size_offset; /* > 0: offset of rec size,
+ * < 0: fix size of -size_offset
+ */
+};
+
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
+ [BPF_PROG_INFO_JITED_INSNS] = {
+ offsetof(struct bpf_prog_info, jited_prog_insns),
+ offsetof(struct bpf_prog_info, jited_prog_len),
+ -1,
+ },
+ [BPF_PROG_INFO_XLATED_INSNS] = {
+ offsetof(struct bpf_prog_info, xlated_prog_insns),
+ offsetof(struct bpf_prog_info, xlated_prog_len),
+ -1,
+ },
+ [BPF_PROG_INFO_MAP_IDS] = {
+ offsetof(struct bpf_prog_info, map_ids),
+ offsetof(struct bpf_prog_info, nr_map_ids),
+ -(int)sizeof(__u32),
+ },
+ [BPF_PROG_INFO_JITED_KSYMS] = {
+ offsetof(struct bpf_prog_info, jited_ksyms),
+ offsetof(struct bpf_prog_info, nr_jited_ksyms),
+ -(int)sizeof(__u64),
+ },
+ [BPF_PROG_INFO_JITED_FUNC_LENS] = {
+ offsetof(struct bpf_prog_info, jited_func_lens),
+ offsetof(struct bpf_prog_info, nr_jited_func_lens),
+ -(int)sizeof(__u32),
+ },
+ [BPF_PROG_INFO_FUNC_INFO] = {
+ offsetof(struct bpf_prog_info, func_info),
+ offsetof(struct bpf_prog_info, nr_func_info),
+ offsetof(struct bpf_prog_info, func_info_rec_size),
+ },
+ [BPF_PROG_INFO_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, line_info),
+ offsetof(struct bpf_prog_info, nr_line_info),
+ offsetof(struct bpf_prog_info, line_info_rec_size),
+ },
+ [BPF_PROG_INFO_JITED_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, jited_line_info),
+ offsetof(struct bpf_prog_info, nr_jited_line_info),
+ offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+ },
+ [BPF_PROG_INFO_PROG_TAGS] = {
+ offsetof(struct bpf_prog_info, prog_tags),
+ offsetof(struct bpf_prog_info, nr_prog_tags),
+ -(int)sizeof(__u8) * BPF_TAG_SIZE,
+ },
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u32)];
+ return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u64)];
+ return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+ __u32 val)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+ __u64 val)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u64)] = val;
+}
+
+struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 data_len = 0;
+ int i, err;
+ void *ptr;
+
+ if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
+ return ERR_PTR(-EINVAL);
+
+ /* step 1: get array dimensions */
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 2: calculate total size of all arrays */
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ bool include_array = (arrays & (1UL << i)) > 0;
+ struct bpf_prog_info_array_desc *desc;
+ __u32 count, size;
+
+ desc = bpf_prog_info_array_desc + i;
+
+ /* kernel is too old to support this field */
+ if (info_len < desc->array_offset + sizeof(__u32) ||
+ info_len < desc->count_offset + sizeof(__u32) ||
+ (desc->size_offset > 0 && info_len < desc->size_offset))
+ include_array = false;
+
+ if (!include_array) {
+ arrays &= ~(1UL << i); /* clear the bit */
+ continue;
+ }
+
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+ data_len += count * size;
+ }
+
+ /* step 3: allocate continuous memory */
+ data_len = roundup(data_len, sizeof(__u64));
+ info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
+ if (!info_linear)
+ return ERR_PTR(-ENOMEM);
+
+ /* step 4: fill data to info_linear->info */
+ info_linear->arrays = arrays;
+ memset(&info_linear->info, 0, sizeof(info));
+ ptr = info_linear->data;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u32 count, size;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->count_offset, count);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->size_offset, size);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset,
+ ptr_to_u64(ptr));
+ ptr += count * size;
+ }
+
+ /* step 5: call syscall again to get required arrays */
+ err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ free(info_linear);
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 6: verify the data */
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u32 v1, v2;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->count_offset);
+ if (v1 != v2)
+ pr_warning("%s: mismatch in element count\n", __func__);
+
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->size_offset);
+ if (v1 != v2)
+ pr_warning("%s: mismatch in rec size\n", __func__);
+ }
+
+ /* step 7: update info_len and data_len */
+ info_linear->info_len = sizeof(struct bpf_prog_info);
+ info_linear->data_len = data_len;
+
+ return info_linear;
+}
+
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
+{
+ int i;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ offs = addr - ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, offs);
+ }
+}
+
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
+{
+ int i;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ addr = offs + ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, addr);
+ }
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 69a7c25eaccc..4d357c26fbd2 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -372,6 +372,69 @@ LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
enum bpf_prog_type prog_type, __u32 ifindex);
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in singile
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing bpf_prog_info_linear to files, it is necessary to
+ * translate pointers bpf_prog_info to offsets. Helper functions
+ * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ * # To fetch map_ids and prog_tags:
+ * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
+ * (1UL << BPF_PROG_INFO_PROG_TAGS);
+ * struct bpf_prog_info_linear *info_linear =
+ * bpf_program__get_prog_info_linear(fd, arrays);
+ *
+ * # To save data in file
+ * bpf_program__bpil_addr_to_offs(info_linear);
+ * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ * # To read data from file
+ * read(f, info_linear, <proper_size>);
+ * bpf_program__bpil_offs_to_addr(info_linear);
+ */
+enum bpf_prog_info_array {
+ BPF_PROG_INFO_FIRST_ARRAY = 0,
+ BPF_PROG_INFO_JITED_INSNS = 0,
+ BPF_PROG_INFO_XLATED_INSNS,
+ BPF_PROG_INFO_MAP_IDS,
+ BPF_PROG_INFO_JITED_KSYMS,
+ BPF_PROG_INFO_JITED_FUNC_LENS,
+ BPF_PROG_INFO_FUNC_INFO,
+ BPF_PROG_INFO_LINE_INFO,
+ BPF_PROG_INFO_JITED_LINE_INFO,
+ BPF_PROG_INFO_PROG_TAGS,
+ BPF_PROG_INFO_LAST_ARRAY,
+};
+
+struct bpf_prog_info_linear {
+ /* size of struct bpf_prog_info, when the tool is compiled */
+ __u32 info_len;
+ /* total bytes allocated for data, round up to 8 bytes */
+ __u32 data_len;
+ /* which arrays are included in data */
+ __u64 arrays;
+ struct bpf_prog_info info;
+ __u8 data[];
+};
+
+LIBBPF_API struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays);
+
+LIBBPF_API void
+bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
+
+LIBBPF_API void
+bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 5fc8222209f8..5f2fceec6ea7 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -145,4 +145,7 @@ LIBBPF_0.0.2 {
btf_ext__new;
btf_ext__reloc_func_info;
btf_ext__reloc_line_info;
+ bpf_program__get_prog_info_linear;
+ bpf_program__bpil_addr_to_offs;
+ bpf_program__bpil_offs_to_addr;
} LIBBPF_0.0.1;
--
2.17.1
^ permalink raw reply related
* [PATCH 5/9] perf, bpf: save bpf_prog_info in a rbtree in perf_env
From: Song Liu @ 2019-02-09 1:17 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209011705.2160185-1-songliubraving@fb.com>
bpf_prog_info contains information necessary to annotate bpf programs.
This patch saves bpf_prog_info for bpf programs loaded in the system.
perf-record saves bpf_prog_info information as headers to perf.data.
A new header type HEADER_BPF_PROG_INFO is introduced for this data.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/builtin-record.c | 2 +-
tools/perf/builtin-top.c | 2 +-
tools/perf/util/bpf-event.c | 39 ++++++++---
tools/perf/util/bpf-event.h | 15 +++-
tools/perf/util/env.c | 83 ++++++++++++++++++++++
tools/perf/util/env.h | 9 +++
| 134 +++++++++++++++++++++++++++++++++++-
| 1 +
8 files changed, 271 insertions(+), 14 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 88ea11d57c6f..2355e0a9eda0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1083,7 +1083,7 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
- err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
+ err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
machine, opts);
if (err < 0)
pr_warning("Couldn't synthesize bpf events.\n");
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a486d4de56e..27d8d42e0a4d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1216,7 +1216,7 @@ static int __cmd_top(struct perf_top *top)
init_process_thread(top);
- ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process,
+ ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
&top->session->machines.host,
&top->record_opts);
if (ret < 0)
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index e6dfb95029e5..ead599bc4f4e 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -1,15 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdlib.h>
-#include <bpf/bpf.h>
-#include <bpf/btf.h>
-#include <bpf/libbpf.h>
-#include <linux/btf.h>
#include <linux/err.h>
#include "bpf-event.h"
#include "debug.h"
#include "symbol.h"
#include "machine.h"
+#include "env.h"
+#include "session.h"
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
@@ -42,7 +40,7 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
* -1 for failures;
* -2 for lack of kernel support.
*/
-static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
+static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
int fd,
@@ -52,17 +50,29 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
struct bpf_event *bpf_event = &event->bpf_event;
struct bpf_prog_info_linear *info_linear;
+ struct perf_tool *tool = session->tool;
+ struct bpf_prog_info_node *info_node;
struct bpf_prog_info *info;
struct btf *btf = NULL;
bool has_btf = false;
+ struct perf_env *env;
u32 sub_prog_cnt, i;
int err = 0;
u64 arrays;
+ /*
+ * for perf-record and perf-report use header.env;
+ * otherwise, use global perf_env.
+ */
+ env = session->data ? &session->header.env : &perf_env;
+
arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
+ arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
info_linear = bpf_program__get_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
@@ -151,8 +161,8 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
machine, process);
}
- /* Synthesize PERF_RECORD_BPF_EVENT */
if (opts->bpf_event) {
+ /* Synthesize PERF_RECORD_BPF_EVENT */
*bpf_event = (struct bpf_event){
.header = {
.type = PERF_RECORD_BPF_EVENT,
@@ -165,6 +175,19 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
memset((void *)event + event->header.size, 0, machine->id_hdr_size);
event->header.size += machine->id_hdr_size;
+
+ /* save bpf_prog_info to env */
+ info_node = malloc(sizeof(struct bpf_prog_info_node));
+ if (info_node) {
+ info_node->info_linear = info_linear;
+ perf_env__insert_bpf_prog_info(env, info_node);
+ info_linear = NULL;
+ }
+
+ /*
+ * process after saving bpf_prog_info to env, so that
+ * required information is ready for look up
+ */
err = perf_tool__process_synth_event(tool, event,
machine, process);
}
@@ -175,7 +198,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
return err ? -1 : 0;
}
-int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+int perf_event__synthesize_bpf_events(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
struct record_opts *opts)
@@ -209,7 +232,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool,
continue;
}
- err = perf_event__synthesize_one_bpf_prog(tool, process,
+ err = perf_event__synthesize_one_bpf_prog(session, process,
machine, fd,
event, opts);
close(fd);
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 7890067e1a37..11e6730b6105 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -3,19 +3,28 @@
#define __PERF_BPF_EVENT_H
#include <linux/compiler.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/rbtree.h>
#include "event.h"
struct machine;
union perf_event;
struct perf_sample;
-struct perf_tool;
struct record_opts;
+struct bpf_prog_info_node {
+ struct bpf_prog_info_linear *info_linear;
+ struct rb_node rb_node;
+};
+
#ifdef HAVE_LIBBPF_SUPPORT
int machine__process_bpf_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
-int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+int perf_event__synthesize_bpf_events(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
struct record_opts *opts);
@@ -27,7 +36,7 @@ static inline int machine__process_bpf_event(struct machine *machine __maybe_unu
return 0;
}
-static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused,
+static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
perf_event__handler_t process __maybe_unused,
struct machine *machine __maybe_unused,
struct record_opts *opts __maybe_unused)
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4c23779e271a..665b6fe3c7b2 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -8,10 +8,86 @@
struct perf_env perf_env;
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+ struct bpf_prog_info_node *info_node)
+{
+ __u32 prog_id = info_node->info_linear->info.id;
+ struct bpf_prog_info_node *node;
+ struct rb_node *parent = NULL;
+ struct rb_node **p;
+
+ down_write(&env->bpf_info_lock);
+ p = &env->bpf_prog_infos.rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ node = rb_entry(parent, struct bpf_prog_info_node, rb_node);
+ if (prog_id < node->info_linear->info.id) {
+ p = &(*p)->rb_left;
+ } else if (prog_id > node->info_linear->info.id) {
+ p = &(*p)->rb_right;
+ } else {
+ pr_debug("duplicated bpf prog info %u\n", prog_id);
+ up_write(&env->bpf_info_lock);
+ return;
+ }
+ }
+
+ rb_link_node(&info_node->rb_node, parent, p);
+ rb_insert_color(&info_node->rb_node, &env->bpf_prog_infos);
+ up_write(&env->bpf_info_lock);
+}
+
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+ __u32 prog_id)
+{
+ struct bpf_prog_info_node *node = NULL;
+ struct rb_node *n;
+
+ down_read(&env->bpf_info_lock);
+ n = env->bpf_prog_infos.rb_node;
+
+ while (n) {
+ node = rb_entry(n, struct bpf_prog_info_node, rb_node);
+ if (prog_id < node->info_linear->info.id)
+ n = n->rb_left;
+ else if (prog_id > node->info_linear->info.id)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ up_read(&env->bpf_info_lock);
+ return node;
+}
+
+/* purge data in bpf_prog_infos tree */
+static void purge_bpf_info(struct perf_env *env)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+
+ down_write(&env->bpf_info_lock);
+
+ root = &env->bpf_prog_infos;
+ next = rb_first(root);
+
+ while (next) {
+ struct bpf_prog_info_node *node;
+
+ node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+ next = rb_next(&node->rb_node);
+ rb_erase_init(&node->rb_node, root);
+ free(node);
+ }
+ up_write(&env->bpf_info_lock);
+}
+
void perf_env__exit(struct perf_env *env)
{
int i;
+ purge_bpf_info(env);
zfree(&env->hostname);
zfree(&env->os_release);
zfree(&env->version);
@@ -38,6 +114,12 @@ void perf_env__exit(struct perf_env *env)
zfree(&env->memory_nodes);
}
+static void init_bpf_rb_trees(struct perf_env *env)
+{
+ env->bpf_prog_infos = RB_ROOT;
+ init_rwsem(&env->bpf_info_lock);
+}
+
int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
{
int i;
@@ -59,6 +141,7 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
env->nr_cmdline = argc;
+ init_bpf_rb_trees(env);
return 0;
out_free:
zfree(&env->cmdline_argv);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index d01b8355f4ca..5894a177b7cf 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -3,7 +3,10 @@
#define __PERF_ENV_H
#include <linux/types.h>
+#include <linux/rbtree.h>
#include "cpumap.h"
+#include "rwsem.h"
+#include "bpf-event.h"
struct cpu_topology_map {
int socket_id;
@@ -64,6 +67,8 @@ struct perf_env {
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
u64 clockid_res_ns;
+ struct rw_semaphore bpf_info_lock;
+ struct rb_root bpf_prog_infos;
};
extern struct perf_env perf_env;
@@ -80,4 +85,8 @@ const char *perf_env__arch(struct perf_env *env);
const char *perf_env__raw_arch(struct perf_env *env);
int perf_env__nr_cpus_avail(struct perf_env *env);
+void perf_env__insert_bpf_prog_info(struct perf_env *env,
+ struct bpf_prog_info_node *info_node);
+struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
+ __u32 prog_id);
#endif /* __PERF_ENV_H */
--git a/tools/perf/util/header.c b/tools/perf/util/header.c
index dec6d218c31c..2ae76a9d06f6 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1080,6 +1080,51 @@ static int write_clockid(struct feat_fd *ff,
sizeof(ff->ph->env.clockid_res_ns));
}
+static int write_bpf_prog_info(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+ u32 count = 0;
+ int ret;
+
+ down_read(&env->bpf_info_lock);
+
+ root = &env->bpf_prog_infos;
+ next = rb_first(root);
+ while (next) {
+ ++count;
+ next = rb_next(next);
+ }
+
+ ret = do_write(ff, &count, sizeof(count));
+
+ if (ret < 0)
+ goto out;
+
+ next = rb_first(root);
+ while (next) {
+ struct bpf_prog_info_node *node;
+ size_t len;
+
+ node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+ next = rb_next(&node->rb_node);
+ len = sizeof(struct bpf_prog_info_linear) +
+ node->info_linear->data_len;
+
+ /* before writing to file, translate address to offset */
+ bpf_program__bpil_addr_to_offs(node->info_linear);
+ ret = do_write(ff, node->info_linear, len);
+ bpf_program__bpil_offs_to_addr(node->info_linear);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ up_read(&env->bpf_info_lock);
+ return ret;
+}
+
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1560,6 +1605,29 @@ static void print_clockid(struct feat_fd *ff, FILE *fp)
ff->ph->env.clockid_res_ns * 1000);
}
+static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_env *env = &ff->ph->env;
+ struct rb_root *root;
+ struct rb_node *next;
+
+ down_read(&env->bpf_info_lock);
+
+ root = &env->bpf_prog_infos;
+ next = rb_first(root);
+
+ while (next) {
+ struct bpf_prog_info_node *node;
+
+ node = rb_entry(next, struct bpf_prog_info_node, rb_node);
+ next = rb_next(&node->rb_node);
+ fprintf(fp, "# bpf_prog_info of id %u\n",
+ node->info_linear->info.id);
+ }
+
+ up_read(&env->bpf_info_lock);
+}
+
static void free_event_desc(struct perf_evsel *events)
{
struct perf_evsel *evsel;
@@ -2592,6 +2660,69 @@ static int process_clockid(struct feat_fd *ff,
return 0;
}
+static int process_bpf_prog_info(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info_node *info_node;
+ struct perf_env *env = &ff->ph->env;
+ u32 count, i;
+ int err = -1;
+
+ if (do_read_u32(ff, &count))
+ return -1;
+
+ down_write(&env->bpf_info_lock);
+
+ for (i = 0; i < count; ++i) {
+ u32 info_len, data_len;
+
+ info_linear = NULL;
+ info_node = NULL;
+ if (do_read_u32(ff, &info_len))
+ goto out;
+ if (do_read_u32(ff, &data_len))
+ goto out;
+
+ if (info_len > sizeof(struct bpf_prog_info)) {
+ pr_warning("detected invalid bpf_prog_info\n");
+ goto out;
+ }
+
+ info_linear = malloc(sizeof(struct bpf_prog_info_linear) +
+ data_len);
+ if (!info_linear)
+ goto out;
+ info_linear->info_len = sizeof(struct bpf_prog_info);
+ info_linear->data_len = data_len;
+ if (do_read_u64(ff, (u64 *)(&info_linear->arrays)))
+ goto out;
+ if (__do_read(ff, &info_linear->info, info_len))
+ goto out;
+ if (info_len < sizeof(struct bpf_prog_info))
+ memset(((void *)(&info_linear->info)) + info_len, 0,
+ sizeof(struct bpf_prog_info) - info_len);
+
+ if (__do_read(ff, info_linear->data, data_len))
+ goto out;
+ info_node = malloc(sizeof(struct bpf_prog_info_node));
+ if (!info_node)
+ goto out;
+
+ /* after reading from file, translate offset to address */
+ bpf_program__bpil_offs_to_addr(info_linear);
+ info_node->info_linear = info_linear;
+ perf_env__insert_bpf_prog_info(env, info_node);
+ }
+
+ return 0;
+out:
+ free(info_linear);
+ free(info_node);
+ up_write(&env->bpf_info_lock);
+ return err;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2651,7 +2782,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
- FEAT_OPR(CLOCKID, clockid, false)
+ FEAT_OPR(CLOCKID, clockid, false),
+ FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false)
};
struct header_print_data {
--git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 0d553ddca0a3..0785c91b4c3a 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -39,6 +39,7 @@ enum {
HEADER_SAMPLE_TIME,
HEADER_MEM_TOPOLOGY,
HEADER_CLOCKID,
+ HEADER_BPF_PROG_INFO,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
--
2.17.1
^ permalink raw reply related
* [PATCH 3/9] bpf: bpftool: use bpf_program__get_prog_info_linear() in prog.c:do_dump()
From: Song Liu @ 2019-02-09 1:16 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209011705.2160185-1-songliubraving@fb.com>
This patches uses bpf_program__get_prog_info_linear() to simplify the
logic in prog.c do_dump().
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/bpf/bpftool/prog.c | 266 +++++++++------------------------------
1 file changed, 59 insertions(+), 207 deletions(-)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 0640e9bc0ada..206b820df7c2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -393,41 +393,31 @@ static int do_show(int argc, char **argv)
static int do_dump(int argc, char **argv)
{
- unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size;
- void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL;
- unsigned int nr_finfo, nr_linfo = 0, nr_jited_linfo = 0;
+ struct bpf_prog_info_linear *info_linear;
struct bpf_prog_linfo *prog_linfo = NULL;
- unsigned long *func_ksyms = NULL;
- struct bpf_prog_info info = {};
- unsigned int *func_lens = NULL;
+ enum {DUMP_JITED, DUMP_XLATED} mode;
const char *disasm_opt = NULL;
- unsigned int nr_func_ksyms;
- unsigned int nr_func_lens;
+ struct bpf_prog_info *info;
struct dump_data dd = {};
- __u32 len = sizeof(info);
+ void *func_info = NULL;
struct btf *btf = NULL;
- unsigned int buf_size;
char *filepath = NULL;
bool opcodes = false;
bool visual = false;
char func_sig[1024];
unsigned char *buf;
bool linum = false;
- __u32 *member_len;
- __u64 *member_ptr;
+ __u32 member_len;
+ __u64 arrays;
ssize_t n;
- int err;
int fd;
if (is_prefix(*argv, "jited")) {
if (disasm_init())
return -1;
-
- member_len = &info.jited_prog_len;
- member_ptr = &info.jited_prog_insns;
+ mode = DUMP_JITED;
} else if (is_prefix(*argv, "xlated")) {
- member_len = &info.xlated_prog_len;
- member_ptr = &info.xlated_prog_insns;
+ mode = DUMP_XLATED;
} else {
p_err("expected 'xlated' or 'jited', got: %s", *argv);
return -1;
@@ -466,175 +456,50 @@ static int do_dump(int argc, char **argv)
return -1;
}
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get prog info: %s", strerror(errno));
- return -1;
- }
-
- if (!*member_len) {
- p_info("no instructions returned");
- close(fd);
- return 0;
- }
+ if (mode == DUMP_JITED)
+ arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
+ else
+ arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS;
- buf_size = *member_len;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
+ arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
- buf = malloc(buf_size);
- if (!buf) {
- p_err("mem alloc failed");
- close(fd);
+ info_linear = bpf_program__get_prog_info_linear(fd, arrays);
+ close(fd);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ p_err("can't get prog info: %s", strerror(errno));
return -1;
}
- nr_func_ksyms = info.nr_jited_ksyms;
- if (nr_func_ksyms) {
- func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
- if (!func_ksyms) {
- p_err("mem alloc failed");
- close(fd);
- goto err_free;
- }
- }
-
- nr_func_lens = info.nr_jited_func_lens;
- if (nr_func_lens) {
- func_lens = malloc(nr_func_lens * sizeof(__u32));
- if (!func_lens) {
- p_err("mem alloc failed");
- close(fd);
+ info = &info_linear->info;
+ if (mode == DUMP_JITED) {
+ if (info->jited_prog_len == 0) {
+ p_info("no instructions returned");
goto err_free;
}
- }
-
- nr_finfo = info.nr_func_info;
- finfo_rec_size = info.func_info_rec_size;
- if (nr_finfo && finfo_rec_size) {
- func_info = malloc(nr_finfo * finfo_rec_size);
- if (!func_info) {
- p_err("mem alloc failed");
- close(fd);
+ buf = (unsigned char *)(info->jited_prog_insns);
+ member_len = info->jited_prog_len;
+ } else { /* DUMP_XLATED */
+ if (info->xlated_prog_len == 0) {
+ p_err("error retrieving insn dump: kernel.kptr_restrict set?");
goto err_free;
}
+ buf = (unsigned char *)info->xlated_prog_insns;
+ member_len = info->xlated_prog_len;
}
- linfo_rec_size = info.line_info_rec_size;
- if (info.nr_line_info && linfo_rec_size && info.btf_id) {
- nr_linfo = info.nr_line_info;
- linfo = malloc(nr_linfo * linfo_rec_size);
- if (!linfo) {
- p_err("mem alloc failed");
- close(fd);
- goto err_free;
- }
- }
-
- jited_linfo_rec_size = info.jited_line_info_rec_size;
- if (info.nr_jited_line_info &&
- jited_linfo_rec_size &&
- info.nr_jited_ksyms &&
- info.nr_jited_func_lens &&
- info.btf_id) {
- nr_jited_linfo = info.nr_jited_line_info;
- jited_linfo = malloc(nr_jited_linfo * jited_linfo_rec_size);
- if (!jited_linfo) {
- p_err("mem alloc failed");
- close(fd);
- goto err_free;
- }
- }
-
- memset(&info, 0, sizeof(info));
-
- *member_ptr = ptr_to_u64(buf);
- *member_len = buf_size;
- info.jited_ksyms = ptr_to_u64(func_ksyms);
- info.nr_jited_ksyms = nr_func_ksyms;
- info.jited_func_lens = ptr_to_u64(func_lens);
- info.nr_jited_func_lens = nr_func_lens;
- info.nr_func_info = nr_finfo;
- info.func_info_rec_size = finfo_rec_size;
- info.func_info = ptr_to_u64(func_info);
- info.nr_line_info = nr_linfo;
- info.line_info_rec_size = linfo_rec_size;
- info.line_info = ptr_to_u64(linfo);
- info.nr_jited_line_info = nr_jited_linfo;
- info.jited_line_info_rec_size = jited_linfo_rec_size;
- info.jited_line_info = ptr_to_u64(jited_linfo);
-
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- close(fd);
- if (err) {
- p_err("can't get prog info: %s", strerror(errno));
- goto err_free;
- }
-
- if (*member_len > buf_size) {
- p_err("too many instructions returned");
- goto err_free;
- }
-
- if (info.nr_jited_ksyms > nr_func_ksyms) {
- p_err("too many addresses returned");
- goto err_free;
- }
-
- if (info.nr_jited_func_lens > nr_func_lens) {
- p_err("too many values returned");
- goto err_free;
- }
-
- if (info.nr_func_info != nr_finfo) {
- p_err("incorrect nr_func_info %d vs. expected %d",
- info.nr_func_info, nr_finfo);
- goto err_free;
- }
-
- if (info.func_info_rec_size != finfo_rec_size) {
- p_err("incorrect func_info_rec_size %d vs. expected %d",
- info.func_info_rec_size, finfo_rec_size);
- goto err_free;
- }
-
- if (linfo && info.nr_line_info != nr_linfo) {
- p_err("incorrect nr_line_info %u vs. expected %u",
- info.nr_line_info, nr_linfo);
- goto err_free;
- }
-
- if (info.line_info_rec_size != linfo_rec_size) {
- p_err("incorrect line_info_rec_size %u vs. expected %u",
- info.line_info_rec_size, linfo_rec_size);
- goto err_free;
- }
-
- if (jited_linfo && info.nr_jited_line_info != nr_jited_linfo) {
- p_err("incorrect nr_jited_line_info %u vs. expected %u",
- info.nr_jited_line_info, nr_jited_linfo);
- goto err_free;
- }
-
- if (info.jited_line_info_rec_size != jited_linfo_rec_size) {
- p_err("incorrect jited_line_info_rec_size %u vs. expected %u",
- info.jited_line_info_rec_size, jited_linfo_rec_size);
- goto err_free;
- }
-
- if ((member_len == &info.jited_prog_len &&
- info.jited_prog_insns == 0) ||
- (member_len == &info.xlated_prog_len &&
- info.xlated_prog_insns == 0)) {
- p_err("error retrieving insn dump: kernel.kptr_restrict set?");
- goto err_free;
- }
-
- if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) {
+ if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
p_err("failed to get btf");
goto err_free;
}
- if (nr_linfo) {
- prog_linfo = bpf_prog_linfo__new(&info);
+ func_info = (void *)info->func_info;
+
+ if (info->nr_line_info) {
+ prog_linfo = bpf_prog_linfo__new(info);
if (!prog_linfo)
p_info("error in processing bpf_line_info. continue without it.");
}
@@ -647,9 +512,9 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
- n = write(fd, buf, *member_len);
+ n = write(fd, buf, member_len);
close(fd);
- if (n != *member_len) {
+ if (n != member_len) {
p_err("error writing output file: %s",
n < 0 ? strerror(errno) : "short write");
goto err_free;
@@ -657,19 +522,19 @@ static int do_dump(int argc, char **argv)
if (json_output)
jsonw_null(json_wtr);
- } else if (member_len == &info.jited_prog_len) {
+ } else if (mode == DUMP_JITED) {
const char *name = NULL;
- if (info.ifindex) {
- name = ifindex_to_bfd_params(info.ifindex,
- info.netns_dev,
- info.netns_ino,
+ if (info->ifindex) {
+ name = ifindex_to_bfd_params(info->ifindex,
+ info->netns_dev,
+ info->netns_ino,
&disasm_opt);
if (!name)
goto err_free;
}
- if (info.nr_jited_func_lens && info.jited_func_lens) {
+ if (info->nr_jited_func_lens && info->jited_func_lens) {
struct kernel_sym *sym = NULL;
struct bpf_func_info *record;
char sym_name[SYM_MAX_NAME];
@@ -677,17 +542,16 @@ static int do_dump(int argc, char **argv)
__u64 *ksyms = NULL;
__u32 *lens;
__u32 i;
-
- if (info.nr_jited_ksyms) {
+ if (info->nr_jited_ksyms) {
kernel_syms_load(&dd);
- ksyms = (__u64 *) info.jited_ksyms;
+ ksyms = (__u64 *) info->jited_ksyms;
}
if (json_output)
jsonw_start_array(json_wtr);
- lens = (__u32 *) info.jited_func_lens;
- for (i = 0; i < info.nr_jited_func_lens; i++) {
+ lens = (__u32 *) info->jited_func_lens;
+ for (i = 0; i < info->nr_jited_func_lens; i++) {
if (ksyms) {
sym = kernel_syms_search(&dd, ksyms[i]);
if (sym)
@@ -699,7 +563,7 @@ static int do_dump(int argc, char **argv)
}
if (func_info) {
- record = func_info + i * finfo_rec_size;
+ record = func_info + i * info->func_info_rec_size;
btf_dumper_type_only(btf, record->type_id,
func_sig,
sizeof(func_sig));
@@ -736,49 +600,37 @@ static int do_dump(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
} else {
- disasm_print_insn(buf, *member_len, opcodes, name,
+ disasm_print_insn(buf, member_len, opcodes, name,
disasm_opt, btf, NULL, 0, 0, false);
}
} else if (visual) {
if (json_output)
jsonw_null(json_wtr);
else
- dump_xlated_cfg(buf, *member_len);
+ dump_xlated_cfg(buf, member_len);
} else {
kernel_syms_load(&dd);
- dd.nr_jited_ksyms = info.nr_jited_ksyms;
- dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+ dd.nr_jited_ksyms = info->nr_jited_ksyms;
+ dd.jited_ksyms = (__u64 *) info->jited_ksyms;
dd.btf = btf;
dd.func_info = func_info;
- dd.finfo_rec_size = finfo_rec_size;
+ dd.finfo_rec_size = info->func_info_rec_size;
dd.prog_linfo = prog_linfo;
if (json_output)
- dump_xlated_json(&dd, buf, *member_len, opcodes,
+ dump_xlated_json(&dd, buf, member_len, opcodes,
linum);
else
- dump_xlated_plain(&dd, buf, *member_len, opcodes,
+ dump_xlated_plain(&dd, buf, member_len, opcodes,
linum);
kernel_syms_destroy(&dd);
}
- free(buf);
- free(func_ksyms);
- free(func_lens);
- free(func_info);
- free(linfo);
- free(jited_linfo);
- bpf_prog_linfo__free(prog_linfo);
+ free(info_linear);
return 0;
err_free:
- free(buf);
- free(func_ksyms);
- free(func_lens);
- free(func_info);
- free(linfo);
- free(jited_linfo);
- bpf_prog_linfo__free(prog_linfo);
+ free(info_linear);
return -1;
}
--
2.17.1
^ permalink raw reply related
* [PATCH 0/9] perf annotation of BPF programs
From: Song Liu @ 2019-02-09 1:16 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
This series enables annotation of BPF programs in perf.
perf tool gathers information via sys_bpf and (optionally) stores them in
perf.data as headers.
Patch 1/9 fixes a minor issue in kernel;
Patch 2/9 to 4/9 introduce new helper functions and use them in perf and
bpftool;
Patch 5/9 and 6/9 saves information of bpf program in perf_env;
Patch 7/9 adds --bpf-event options to perf-top;
Patch 8/9 enables annotation of bpf programs based on information gathered
in 5/9 and 6/9;
Patch 9/9 handles information of short living BPF program that are loaded
during perf-record or perf-top.
Commands tested during developments are perf-top, perf-record, perf-report,
and perf-annotate.
===================== Note on patch dependency ========================
This set has dependency in both bpf-next tree and tip/perf/core. Current
version is developed on bpf-next tree with the following commits
cherry-picked from tip/perf/core:
(from 1/10 to 10/10)
commit 76193a94522f ("perf, bpf: Introduce PERF_RECORD_KSYMBOL")
commit d764ac646491 ("tools headers uapi: Sync tools/include/uapi/linux/perf_event.h")
commit 6ee52e2a3fe4 ("perf, bpf: Introduce PERF_RECORD_BPF_EVENT")
commit df063c83aa2c ("tools headers uapi: Sync tools/include/uapi/linux/perf_event.h")
commit 9aa0bfa370b2 ("perf tools: Handle PERF_RECORD_KSYMBOL")
commit 45178a928a4b ("perf tools: Handle PERF_RECORD_BPF_EVENT")
commit 7b612e291a5a ("perf tools: Synthesize PERF_RECORD_* for loaded BPF programs")
commit a40b95bcd30c ("perf top: Synthesize BPF events for pre-existing loaded BPF programs")
commit 6934058d9fb6 ("bpf: Add module name [bpf] to ksymbols for bpf programs")
commit 811184fb6977 ("perf bpf: Fix synthesized PERF_RECORD_KSYMBOL/BPF_EVENT")
========================================================================
Song Liu (9):
perf, bpf: consider events with attr.bpf_event as side-band events
bpf: libbpf: introduce bpf_program__get_prog_info_linear()
bpf: bpftool: use bpf_program__get_prog_info_linear() in
prog.c:do_dump()
perf, bpf: synthesize bpf events with
bpf_program__get_prog_info_linear()
perf, bpf: save bpf_prog_info in a rbtree in perf_env
perf, bpf: save btf in a rbtree in perf_env
perf-top: add option --bpf-event
perf, bpf: enable annotation of bpf program
perf, bpf: save information about short living bpf programs
kernel/events/core.c | 3 +-
tools/bpf/bpftool/prog.c | 266 ++++++---------------------
tools/lib/bpf/libbpf.c | 251 ++++++++++++++++++++++++++
tools/lib/bpf/libbpf.h | 63 +++++++
tools/lib/bpf/libbpf.map | 3 +
tools/perf/Makefile.config | 2 +-
tools/perf/builtin-record.c | 15 +-
tools/perf/builtin-top.c | 15 +-
tools/perf/util/annotate.c | 149 ++++++++++++++-
tools/perf/util/bpf-event.c | 351 +++++++++++++++++++++++++++---------
tools/perf/util/bpf-event.h | 48 ++++-
tools/perf/util/dso.c | 1 +
tools/perf/util/dso.h | 33 ++--
tools/perf/util/env.c | 148 +++++++++++++++
tools/perf/util/env.h | 12 ++
tools/perf/util/evlist.c | 20 ++
tools/perf/util/evlist.h | 2 +
tools/perf/util/header.c | 231 +++++++++++++++++++++++-
tools/perf/util/header.h | 2 +
tools/perf/util/symbol.c | 1 +
20 files changed, 1304 insertions(+), 312 deletions(-)
--
2.17.1
^ permalink raw reply
* [PATCH 4/9] perf, bpf: synthesize bpf events with bpf_program__get_prog_info_linear()
From: Song Liu @ 2019-02-09 1:17 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209011705.2160185-1-songliubraving@fb.com>
With bpf_program__get_prog_info_linear, we can simplify the logic that
synthesizes bpf events.
This patch doesn't change the behavior of the code.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/perf/util/bpf-event.c | 118 ++++++++++++------------------------
1 file changed, 40 insertions(+), 78 deletions(-)
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 796ef793f4ce..e6dfb95029e5 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -3,7 +3,9 @@
#include <stdlib.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
+#include <bpf/libbpf.h>
#include <linux/btf.h>
+#include <linux/err.h>
#include "bpf-event.h"
#include "debug.h"
#include "symbol.h"
@@ -49,99 +51,62 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
{
struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
struct bpf_event *bpf_event = &event->bpf_event;
- u32 sub_prog_cnt, i, func_info_rec_size = 0;
- u8 (*prog_tags)[BPF_TAG_SIZE] = NULL;
- struct bpf_prog_info info = { .type = 0, };
- u32 info_len = sizeof(info);
- void *func_infos = NULL;
- u64 *prog_addrs = NULL;
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info *info;
struct btf *btf = NULL;
- u32 *prog_lens = NULL;
bool has_btf = false;
- char errbuf[512];
+ u32 sub_prog_cnt, i;
int err = 0;
+ u64 arrays;
- /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */
- err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
+ arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
- if (err) {
- pr_debug("%s: failed to get BPF program info: %s, aborting\n",
- __func__, str_error_r(errno, errbuf, sizeof(errbuf)));
+ info_linear = bpf_program__get_prog_info_linear(fd, arrays);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ info_linear = NULL;
+ pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
return -1;
}
- if (info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+
+ if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) {
pr_debug("%s: the kernel is too old, aborting\n", __func__);
return -2;
}
+ info = &info_linear->info;
+
/* number of ksyms, func_lengths, and tags should match */
- sub_prog_cnt = info.nr_jited_ksyms;
- if (sub_prog_cnt != info.nr_prog_tags ||
- sub_prog_cnt != info.nr_jited_func_lens)
+ sub_prog_cnt = info->nr_jited_ksyms;
+ if (sub_prog_cnt != info->nr_prog_tags ||
+ sub_prog_cnt != info->nr_jited_func_lens)
return -1;
/* check BTF func info support */
- if (info.btf_id && info.nr_func_info && info.func_info_rec_size) {
+ if (info->btf_id && info->nr_func_info && info->func_info_rec_size) {
/* btf func info number should be same as sub_prog_cnt */
- if (sub_prog_cnt != info.nr_func_info) {
+ if (sub_prog_cnt != info->nr_func_info) {
pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
- return -1;
- }
- if (btf__get_from_id(info.btf_id, &btf)) {
- pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id);
- return -1;
+ err = -1;
+ goto out;
}
- func_info_rec_size = info.func_info_rec_size;
- func_infos = calloc(sub_prog_cnt, func_info_rec_size);
- if (!func_infos) {
- pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__);
- return -1;
+ if (btf__get_from_id(info->btf_id, &btf)) {
+ pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
+ err = -1;
+ btf = NULL;
+ goto out;
}
has_btf = true;
}
- /*
- * We need address, length, and tag for each sub program.
- * Allocate memory and call bpf_obj_get_info_by_fd() again
- */
- prog_addrs = calloc(sub_prog_cnt, sizeof(u64));
- if (!prog_addrs) {
- pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__);
- goto out;
- }
- prog_lens = calloc(sub_prog_cnt, sizeof(u32));
- if (!prog_lens) {
- pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__);
- goto out;
- }
- prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE);
- if (!prog_tags) {
- pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__);
- goto out;
- }
-
- memset(&info, 0, sizeof(info));
- info.nr_jited_ksyms = sub_prog_cnt;
- info.nr_jited_func_lens = sub_prog_cnt;
- info.nr_prog_tags = sub_prog_cnt;
- info.jited_ksyms = ptr_to_u64(prog_addrs);
- info.jited_func_lens = ptr_to_u64(prog_lens);
- info.prog_tags = ptr_to_u64(prog_tags);
- info_len = sizeof(info);
- if (has_btf) {
- info.nr_func_info = sub_prog_cnt;
- info.func_info_rec_size = func_info_rec_size;
- info.func_info = ptr_to_u64(func_infos);
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
- if (err) {
- pr_debug("%s: failed to get BPF program info, aborting\n", __func__);
- goto out;
- }
-
/* Synthesize PERF_RECORD_KSYMBOL */
for (i = 0; i < sub_prog_cnt; i++) {
+ u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(info->prog_tags);
+ __u32 *prog_lens = (__u32 *)(info->jited_func_lens);
+ __u64 *prog_addrs = (__u64 *)(info->jited_ksyms);
+ void *func_infos = (void *)(info->func_info);
const struct bpf_func_info *finfo;
const char *short_name = NULL;
const struct btf_type *t;
@@ -163,13 +128,13 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
KSYM_NAME_LEN - name_len,
prog_tags[i], BPF_TAG_SIZE);
if (has_btf) {
- finfo = func_infos + i * info.func_info_rec_size;
+ finfo = func_infos + i * info->func_info_rec_size;
t = btf__type_by_id(btf, finfo->type_id);
short_name = btf__name_by_offset(btf, t->name_off);
} else if (i == 0 && sub_prog_cnt == 1) {
/* no subprog */
- if (info.name[0])
- short_name = info.name;
+ if (info->name[0])
+ short_name = info->name;
} else
short_name = "F";
if (short_name)
@@ -195,9 +160,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
},
.type = PERF_BPF_EVENT_PROG_LOAD,
.flags = 0,
- .id = info.id,
+ .id = info->id,
};
- memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE);
+ memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
memset((void *)event + event->header.size, 0, machine->id_hdr_size);
event->header.size += machine->id_hdr_size;
err = perf_tool__process_synth_event(tool, event,
@@ -205,10 +170,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
}
out:
- free(prog_tags);
- free(prog_lens);
- free(prog_addrs);
- free(func_infos);
+ free(info_linear);
free(btf);
return err ? -1 : 0;
}
--
2.17.1
^ permalink raw reply related
* [PATCH 1/9] perf, bpf: consider events with attr.bpf_event as side-band events
From: Song Liu @ 2019-02-09 1:16 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: ast, daniel, kernel-team, peterz, acme, Song Liu
In-Reply-To: <20190209011705.2160185-1-songliubraving@fb.com>
Events with bpf_event should be considered as side-band event, as they
carry information about BPF programs.
Fixes: 6ee52e2a3fe4 ("perf, bpf: Introduce PERF_RECORD_BPF_EVENT")
Signed-off-by: Song Liu <songliubraving@fb.com>
---
kernel/events/core.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0a8dab322111..9403bdda5f8c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4238,7 +4238,8 @@ static bool is_sb_event(struct perf_event *event)
if (attr->mmap || attr->mmap_data || attr->mmap2 ||
attr->comm || attr->comm_exec ||
attr->task || attr->ksymbol ||
- attr->context_switch)
+ attr->context_switch ||
+ attr->bpf_event)
return true;
return false;
}
--
2.17.1
^ permalink raw reply related
* Re: stmmac / meson8b-dwmac
From: Martin Blumenstingl @ 2019-02-09 1:09 UTC (permalink / raw)
To: Simon Huelck
Cc: Emiliano Ingrassia, Gpeppe.cavallaro, alexandre.torgue,
linux-amlogic, netdev
In-Reply-To: <3001f244-8904-1e89-9595-62a65a7b32ae@gmx.de>
Hi Simon,
On Thu, Feb 7, 2019 at 8:30 PM Simon Huelck <simonmail@gmx.de> wrote:
>
> Hi Guys,
>
>
> i can confirm better performance with 4.14.29
>
> - ~900 MBits with iperf2 in one way
> -~ 500 - 600MBits with iperf2 in duplex in both directions
>
>
> This wasnt the case with 4.17.9, not with 4.18, 4.19 or the 5.0 series.....
I just did a small test myself on a Khadas VIM2:
# iperf3 -c 192.168.1.100
Connecting to host 192.168.1.100, port 5201
[ 5] local 192.168.1.189 port 37192 connected to 192.168.1.100 port 5201
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.00 sec 113 MBytes 946 Mbits/sec 0 354 KBytes
[ 5] 1.00-2.00 sec 112 MBytes 940 Mbits/sec 0 354 KBytes
[ 5] 2.00-3.00 sec 110 MBytes 920 Mbits/sec 241 228 KBytes
[ 5] 3.00-4.00 sec 112 MBytes 940 Mbits/sec 0 314 KBytes
[ 5] 4.00-5.00 sec 111 MBytes 933 Mbits/sec 89 83.4 KBytes
[ 5] 5.00-6.00 sec 110 MBytes 926 Mbits/sec 115 335 KBytes
[ 5] 6.00-7.00 sec 112 MBytes 941 Mbits/sec 0 358 KBytes
[ 5] 7.00-8.00 sec 112 MBytes 941 Mbits/sec 0 362 KBytes
[ 5] 8.00-9.00 sec 112 MBytes 941 Mbits/sec 0 369 KBytes
[ 5] 9.00-10.00 sec 112 MBytes 942 Mbits/sec 0 372 KBytes
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[ 5] 0.00-10.00 sec 1.09 GBytes 937 Mbits/sec 445 sender
[ 5] 0.00-10.04 sec 1.09 GBytes 932 Mbits/sec receiver
iperf Done.
(it's interesting that the sending direction has 445 retries)
# iperf3 -c 192.168.1.100 -R
Connecting to host 192.168.1.100, port 5201
Reverse mode, remote host 192.168.1.100 is sending
[ 5] local 192.168.1.189 port 37196 connected to 192.168.1.100 port 5201
[ ID] Interval Transfer Bitrate
[ 5] 0.00-1.00 sec 90.9 MBytes 763 Mbits/sec
[ 5] 1.00-2.00 sec 90.9 MBytes 762 Mbits/sec
[ 5] 2.00-3.00 sec 90.7 MBytes 760 Mbits/sec
[ 5] 3.00-4.00 sec 91.3 MBytes 766 Mbits/sec
[ 5] 4.00-5.00 sec 91.1 MBytes 764 Mbits/sec
[ 5] 5.00-6.00 sec 91.1 MBytes 765 Mbits/sec
[ 5] 6.00-7.00 sec 90.8 MBytes 762 Mbits/sec
[ 5] 7.00-8.00 sec 90.9 MBytes 762 Mbits/sec
[ 5] 8.00-9.00 sec 91.0 MBytes 764 Mbits/sec
[ 5] 9.00-10.00 sec 91.3 MBytes 766 Mbits/sec
- - - - - - - - - - - - - - - - - - - - - - - - -
[ ID] Interval Transfer Bitrate Retr
[ 5] 0.00-10.04 sec 911 MBytes 762 Mbits/sec 0 sender
[ 5] 0.00-10.00 sec 910 MBytes 763 Mbits/sec receiver
iperf Done.
(when receiving I see no retries)
for my test I used my Khadas VIM2 (as I don't have a GXBB board anymore).
test setup: PC -> built-in switch in some ath79 based OpenWrt device
-> VIM2. no VLANs are used
revision: latest mainline, which at the time of testing is:
46c291e277f937378 ("Merge tag 'armsoc-fixes-5.0' of
git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc")
> How can i help further ?
it's good to know that 4.14 has "good" performance in your scenario
can you please show the full iperf outputs for your tests (preferably
on both, 4.14 and 5.0-rcX)?
do you see any improvements on 5.0-rcX when not using VLANs (this is
just a random guess)?
Regards
Martin
^ permalink raw reply
* Re: [PATCH net-next 15/16] net: switchdev: Replace port attr set SDO with a notification
From: Florian Fainelli @ 2019-02-09 0:36 UTC (permalink / raw)
To: netdev
Cc: David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-16-f.fainelli@gmail.com>
On 2/8/19 4:32 PM, Florian Fainelli wrote:
> Drop switchdev_ops.switchdev_port_attr_set. Drop the uses of this field
> from all clients, which were migrated to use switchdev notification in
> the previous patches.
>
> Add a new function switchdev_port_attr_notify() that sends the switchdev
> notifications SWITCHDEV_PORT_ATTR_SET.
>
> Drop __switchdev_port_attr_set() and update switchdev_port_attr_set()
> likewise.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> include/net/switchdev.h | 18 --------
> net/switchdev/switchdev.c | 92 ++++++++++-----------------------------
> 2 files changed, 22 insertions(+), 88 deletions(-)
>
> diff --git a/include/net/switchdev.h b/include/net/switchdev.h
> index 4c5f7e5430cf..5387ff6f41c5 100644
> --- a/include/net/switchdev.h
> +++ b/include/net/switchdev.h
> @@ -111,21 +111,6 @@ void *switchdev_trans_item_dequeue(struct switchdev_trans *trans);
>
> typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
>
> -/**
> - * struct switchdev_ops - switchdev operations
> - *
> - * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr).
> - *
> - * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr).
> - */
> -struct switchdev_ops {
> - int (*switchdev_port_attr_get)(struct net_device *dev,
> - struct switchdev_attr *attr);
> - int (*switchdev_port_attr_set)(struct net_device *dev,
> - const struct switchdev_attr *attr,
> - struct switchdev_trans *trans);
> -};
> -
This and the hunk below bisection, I will move that into patch #16 after
receiving feedback on this.
--
Florian
^ permalink raw reply
* [PATCH net-next 09/16] switchdev: Add SWITCHDEV_PORT_ATTR_SET
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
In preparation for allowing switchdev enabled drivers to veto specific
attribute settings from within the context of the caller, introduce a
new switchdev notifier type for port attributes.
Suggested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
include/net/switchdev.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 96cd3e795f7f..4c5f7e5430cf 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -141,6 +141,8 @@ enum switchdev_notifier_type {
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+ SWITCHDEV_PORT_ATTR_SET, /* Blocking. */
};
struct switchdev_notifier_info {
@@ -163,6 +165,13 @@ struct switchdev_notifier_port_obj_info {
bool handled;
};
+struct switchdev_notifier_port_attr_info {
+ struct switchdev_notifier_info info; /* must be first */
+ const struct switchdev_attr *attr;
+ struct switchdev_trans *trans;
+ bool handled;
+};
+
static inline struct net_device *
switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
{
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 12/16] mlxsw: spectrum_switchdev: Handle SWITCHDEV_PORT_ATTR_SET
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Following patches will change the way we communicate getting or setting
a port's attribute and use a blocking notifier to perform those tasks.
Prepare mlxsw to support receiving notifier events targeting
SWITCHDEV_PORT_ATTR_SET and simply translate that into the existing
mlxsw_sp_port_attr_set() call.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
.../mellanox/mlxsw/spectrum_switchdev.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 6b09d68671cf..29ffb5cac777 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -3410,6 +3410,23 @@ mlxsw_sp_switchdev_handle_vxlan_obj_del(struct net_device *vxlan_dev,
}
}
+static int
+mlxsw_sp_switchdev_port_attr_event(unsigned long event, struct net_device *dev,
+ struct switchdev_notifier_port_attr_info *port_attr_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = mlxsw_sp_port_attr_set(dev, port_attr_info->attr,
+ port_attr_info->trans);
+ break;
+ }
+
+ port_attr_info->handled = true;
+ return notifier_from_errno(err);
+}
+
static int mlxsw_sp_switchdev_blocking_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -3433,6 +3450,8 @@ static int mlxsw_sp_switchdev_blocking_event(struct notifier_block *unused,
mlxsw_sp_port_dev_check,
mlxsw_sp_port_obj_del);
return notifier_from_errno(err);
+ case SWITCHDEV_PORT_ATTR_SET:
+ return mlxsw_sp_switchdev_port_attr_event(event, dev, ptr);
}
return NOTIFY_DONE;
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 14/16] staging: fsl-dpaa2: ethsw: Handle SWITCHDEV_PORT_ATTR_SET
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Following patches will change the way we communicate getting or setting
a port's attribute and use a blocking notifier to perform those tasks.
Prepare ethsw to support receiving notifier events targeting
SWITCHDEV_PORT_ATTR_SET and simply translate that into the existing
swdev_port_attr_set() call.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index b195b09e0d1d..d40bdcadd569 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -1092,6 +1092,24 @@ ethsw_switchdev_port_obj_event(unsigned long event, struct net_device *netdev,
return notifier_from_errno(err);
}
+static int
+ethsw_switchdev_port_attr_event(unsigned long event,
+ struct net_device *netdev,
+ struct switchdev_notifier_port_attr_info *port_attr_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = swdev_port_attr_set(netdev, port_attr_info->attr,
+ port_attr_info->trans);
+ break;
+ }
+
+ port_attr_info->handled = true;
+ return notifier_from_errno(err);
+}
+
static int port_switchdev_blocking_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -1104,6 +1122,8 @@ static int port_switchdev_blocking_event(struct notifier_block *unused,
case SWITCHDEV_PORT_OBJ_ADD: /* fall through */
case SWITCHDEV_PORT_OBJ_DEL:
return ethsw_switchdev_port_obj_event(event, dev, ptr);
+ case SWITCHDEV_PORT_ATTR_SET:
+ return ethsw_switchdev_port_attr_event(event, dev, ptr);
}
return NOTIFY_DONE;
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 16/16] net: Remove switchdev_ops
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Now that we have converted all possible callers to using a switchdev
notifier for attributes we do not have a need for implementing
switchdev_ops anymore, and this can be removed from all drivers the
net_device structure.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 12 ------------
drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 --
.../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 ------------
drivers/net/ethernet/mscc/ocelot.c | 1 -
drivers/net/ethernet/rocker/rocker_main.c | 5 -----
drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 5 -----
include/linux/netdevice.h | 3 ---
net/dsa/slave.c | 5 -----
8 files changed, 45 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8dd808b7f931..18b56afbd5d7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3220,7 +3220,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
}
mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan;
- mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
mlxsw_sp->ports[local_port] = mlxsw_sp_port;
err = register_netdev(dev);
if (err) {
@@ -3237,7 +3236,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
err_register_netdev:
mlxsw_sp->ports[local_port] = NULL;
- mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
err_port_vlan_create:
err_port_pvid_set:
@@ -3280,7 +3278,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
mlxsw_sp->ports[local_port] = NULL;
- mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true);
mlxsw_sp_port_nve_fini(mlxsw_sp_port);
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
@@ -4001,12 +3998,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_span_init;
}
- err = mlxsw_sp_switchdev_init(mlxsw_sp);
- if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n");
- goto err_switchdev_init;
- }
-
err = mlxsw_sp_counter_pool_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
@@ -4077,8 +4068,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
err_counter_pool_init:
- mlxsw_sp_switchdev_fini(mlxsw_sp);
-err_switchdev_init:
mlxsw_sp_span_fini(mlxsw_sp);
err_span_init:
mlxsw_sp_lag_fini(mlxsw_sp);
@@ -4141,7 +4130,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_nve_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
- mlxsw_sp_switchdev_fini(mlxsw_sp);
mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_lag_fini(mlxsw_sp);
mlxsw_sp_buffers_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4fe0996c7cdd..76f51087e35a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -375,8 +375,6 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes);
/* spectrum_switchdev.c */
int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);
-void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port);
-void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
bool adding);
void
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 29ffb5cac777..9951cfe0b244 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1925,10 +1925,6 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
-static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
- .switchdev_port_attr_set = mlxsw_sp_port_attr_set,
-};
-
static int
mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
@@ -3539,11 +3535,3 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
kfree(mlxsw_sp->bridge);
}
-void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
- mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
-}
-
-void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-}
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index adab478d36f1..b333965dd02f 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1649,7 +1649,6 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
dev->netdev_ops = &ocelot_port_netdev_ops;
dev->ethtool_ops = &ocelot_ethtool_ops;
- dev->switchdev_ops = &ocelot_port_switchdev_ops;
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS;
dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index f10e4888ecff..6f70b48ce6ee 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2124,10 +2124,6 @@ static int rocker_port_obj_del(struct net_device *dev,
return err;
}
-static const struct switchdev_ops rocker_port_switchdev_ops = {
- .switchdev_port_attr_set = rocker_port_attr_set,
-};
-
struct rocker_fib_event_work {
struct work_struct work;
union {
@@ -2581,7 +2577,6 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
rocker_port_dev_addr_init(rocker_port);
dev->netdev_ops = &rocker_port_netdev_ops;
dev->ethtool_ops = &rocker_port_ethtool_ops;
- dev->switchdev_ops = &rocker_port_switchdev_ops;
netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
NAPI_POLL_WEIGHT);
netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index d40bdcadd569..5cc60e6ceafc 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -914,10 +914,6 @@ static int swdev_port_obj_del(struct net_device *netdev,
return err;
}
-static const struct switchdev_ops ethsw_port_switchdev_ops = {
- .switchdev_port_attr_set = swdev_port_attr_set,
-};
-
/* For the moment, only flood setting needs to be updated */
static int port_bridge_join(struct net_device *netdev,
struct net_device *upper_dev)
@@ -1443,7 +1439,6 @@ static int ethsw_probe_port(struct ethsw_core *ethsw, u16 port_idx)
SET_NETDEV_DEV(port_netdev, dev);
port_netdev->netdev_ops = ðsw_port_ops;
port_netdev->ethtool_ops = ðsw_port_ethtool_ops;
- port_netdev->switchdev_ops = ðsw_port_switchdev_ops;
/* Set MTU limits */
port_netdev->min_mtu = ETH_MIN_MTU;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d95e634f3fe..6dc84bc43e52 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1833,9 +1833,6 @@ struct net_device {
#endif
const struct net_device_ops *netdev_ops;
const struct ethtool_ops *ethtool_ops;
-#ifdef CONFIG_NET_SWITCHDEV
- const struct switchdev_ops *switchdev_ops;
-#endif
#ifdef CONFIG_NET_L3_MASTER_DEV
const struct l3mdev_ops *l3mdev_ops;
#endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f8c7c1b2cd2f..4178da259339 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1044,10 +1044,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_get_port_parent_id = dsa_slave_get_port_parent_id,
};
-static const struct switchdev_ops dsa_slave_switchdev_ops = {
- .switchdev_port_attr_set = dsa_slave_port_attr_set,
-};
-
static struct device_type dsa_type = {
.name = "dsa",
};
@@ -1307,7 +1303,6 @@ int dsa_slave_create(struct dsa_port *port)
eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
- slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
slave_dev->min_mtu = 0;
slave_dev->max_mtu = ETH_MAX_MTU;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 15/16] net: switchdev: Replace port attr set SDO with a notification
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Drop switchdev_ops.switchdev_port_attr_set. Drop the uses of this field
from all clients, which were migrated to use switchdev notification in
the previous patches.
Add a new function switchdev_port_attr_notify() that sends the switchdev
notifications SWITCHDEV_PORT_ATTR_SET.
Drop __switchdev_port_attr_set() and update switchdev_port_attr_set()
likewise.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
include/net/switchdev.h | 18 --------
net/switchdev/switchdev.c | 92 ++++++++++-----------------------------
2 files changed, 22 insertions(+), 88 deletions(-)
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 4c5f7e5430cf..5387ff6f41c5 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -111,21 +111,6 @@ void *switchdev_trans_item_dequeue(struct switchdev_trans *trans);
typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
-/**
- * struct switchdev_ops - switchdev operations
- *
- * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr).
- *
- * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr).
- */
-struct switchdev_ops {
- int (*switchdev_port_attr_get)(struct net_device *dev,
- struct switchdev_attr *attr);
- int (*switchdev_port_attr_set)(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans);
-};
-
enum switchdev_notifier_type {
SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
SWITCHDEV_FDB_DEL_TO_BRIDGE,
@@ -224,7 +209,6 @@ int switchdev_handle_port_obj_del(struct net_device *dev,
int (*del_cb)(struct net_device *dev,
const struct switchdev_obj *obj));
-#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops))
#else
static inline void switchdev_deferred_process(void)
@@ -311,8 +295,6 @@ switchdev_handle_port_obj_del(struct net_device *dev,
return 0;
}
-#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0)
-
#endif
#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 7e1357db33d7..5a053e20363e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -174,81 +174,31 @@ static int switchdev_deferred_enqueue(struct net_device *dev,
return 0;
}
-/**
- * switchdev_port_attr_get - Get port attribute
- *
- * @dev: port device
- * @attr: attribute to get
- */
-int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+static int switchdev_port_attr_notify(enum switchdev_notifier_type nt,
+ struct net_device *dev,
+ struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
{
- const struct switchdev_ops *ops = dev->switchdev_ops;
- struct net_device *lower_dev;
- struct list_head *iter;
- struct switchdev_attr first = {
- .id = SWITCHDEV_ATTR_ID_UNDEFINED
- };
- int err = -EOPNOTSUPP;
+ int err;
+ int rc;
- if (ops && ops->switchdev_port_attr_get)
- return ops->switchdev_port_attr_get(dev, attr);
+ struct switchdev_notifier_port_attr_info attr_info = {
+ .attr = attr,
+ .trans = trans,
+ .handled = false,
+ };
- if (attr->flags & SWITCHDEV_F_NO_RECURSE)
+ rc = call_switchdev_blocking_notifiers(nt, dev, &attr_info.info, NULL);
+ err = notifier_to_errno(rc);
+ if (err) {
+ WARN_ON(!attr_info.handled);
return err;
-
- /* Switch device port(s) may be stacked under
- * bond/team/vlan dev, so recurse down to get attr on
- * each port. Return -ENODATA if attr values don't
- * compare across ports.
- */
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_attr_get(lower_dev, attr);
- if (err)
- break;
- if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
- first = *attr;
- else if (memcmp(&first, attr, sizeof(*attr)))
- return -ENODATA;
}
- return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
-
-static int __switchdev_port_attr_set(struct net_device *dev,
- const struct switchdev_attr *attr,
- struct switchdev_trans *trans)
-{
- const struct switchdev_ops *ops = dev->switchdev_ops;
- struct net_device *lower_dev;
- struct list_head *iter;
- int err = -EOPNOTSUPP;
-
- if (ops && ops->switchdev_port_attr_set) {
- err = ops->switchdev_port_attr_set(dev, attr, trans);
- goto done;
- }
-
- if (attr->flags & SWITCHDEV_F_NO_RECURSE)
- goto done;
-
- /* Switch device port(s) may be stacked under
- * bond/team/vlan dev, so recurse down to set attr on
- * each port.
- */
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = __switchdev_port_attr_set(lower_dev, attr, trans);
- if (err)
- break;
- }
-
-done:
- if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
- err = 0;
+ if (!attr_info.handled)
+ return -EOPNOTSUPP;
- return err;
+ return 0;
}
static int switchdev_port_attr_set_now(struct net_device *dev,
@@ -267,7 +217,8 @@ static int switchdev_port_attr_set_now(struct net_device *dev,
*/
trans.ph_prepare = true;
- err = __switchdev_port_attr_set(dev, attr, &trans);
+ err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
+ &trans);
if (err) {
/* Prepare phase failed: abort the transaction. Any
* resources reserved in the prepare phase are
@@ -286,7 +237,8 @@ static int switchdev_port_attr_set_now(struct net_device *dev,
*/
trans.ph_prepare = false;
- err = __switchdev_port_attr_set(dev, attr, &trans);
+ err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
+ &trans);
WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
dev->name, attr->id);
switchdev_trans_items_warn_destroy(dev, &trans);
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 13/16] net: mscc: ocelot: Handle SWITCHDEV_PORT_ATTR_SET
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Following patches will change the way we communicate getting or setting
a port's attribute and use a blocking notifier to perform those tasks.
Prepare ocelot to support receiving notifier events targeting
SWITCHDEV_PORT_ATTR_SET and simply translate that into the existing
ocelot_port_attr_set() call.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
drivers/net/ethernet/mscc/ocelot.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 195306d05bcd..adab478d36f1 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1324,10 +1324,6 @@ static int ocelot_port_obj_del(struct net_device *dev,
return ret;
}
-static const struct switchdev_ops ocelot_port_switchdev_ops = {
- .switchdev_port_attr_set = ocelot_port_attr_set,
-};
-
static int ocelot_port_bridge_join(struct ocelot_port *ocelot_port,
struct net_device *bridge)
{
@@ -1582,6 +1578,24 @@ struct notifier_block ocelot_netdevice_nb __read_mostly = {
};
EXPORT_SYMBOL(ocelot_netdevice_nb);
+static int
+ocelot_switchdev_port_attr_event(unsigned long event,
+ struct net_device *netdev,
+ struct switchdev_notifier_port_attr_info *port_attr_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = ocelot_port_attr_set(netdev, port_attr_info->attr,
+ port_attr_info->trans);
+ break;
+ }
+
+ port_attr_info->handled = true;
+ return notifier_from_errno(err);
+}
+
static int ocelot_switchdev_blocking_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -1600,6 +1614,8 @@ static int ocelot_switchdev_blocking_event(struct notifier_block *unused,
ocelot_netdevice_dev_check,
ocelot_port_obj_del);
return notifier_from_errno(err);
+ case SWITCHDEV_PORT_ATTR_SET:
+ return ocelot_switchdev_port_attr_event(event, dev, ptr);
}
return NOTIFY_DONE;
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 11/16] net: dsa: Handle SWITCHDEV_PORT_ATTR_SET
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Following patches will change the way we communicate getting or setting
a port's attribute and use a blocking notifier to perform those tasks.
Prepare DSA to support receiving notifier events targeting
SWITCHDEV_PORT_ATTR_SET and simply translate that into the existing
dsa_slave_port_attr_set() call.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
net/dsa/slave.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ae34be949d79..f8c7c1b2cd2f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1544,6 +1544,24 @@ dsa_slave_switchdev_port_obj_event(unsigned long event,
return notifier_from_errno(err);
}
+static int
+dsa_slave_switchdev_port_attr_event(unsigned long event,
+ struct net_device *netdev,
+ struct switchdev_notifier_port_attr_info *port_attr_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = dsa_slave_port_attr_set(netdev, port_attr_info->attr,
+ port_attr_info->trans);
+ break;
+ }
+
+ port_attr_info->handled = true;
+ return notifier_from_errno(err);
+}
+
static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -1556,6 +1574,8 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
case SWITCHDEV_PORT_OBJ_ADD: /* fall through */
case SWITCHDEV_PORT_OBJ_DEL:
return dsa_slave_switchdev_port_obj_event(event, dev, ptr);
+ case SWITCHDEV_PORT_ATTR_SET: /* fallthrough */
+ return dsa_slave_switchdev_port_attr_event(event, dev, ptr);
}
return NOTIFY_DONE;
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 10/16] rocker: Handle SWITCHDEV_PORT_ATTR_SET
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Following patches will change the way we communicate getting or setting
a port's attribute and use a blocking notifier to perform those tasks.
Prepare rocker to support receiving notifier events targeting
SWITCHDEV_PORT_ATTR_SET and simply translate that into the existing
rocker_port_attr_set call.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
drivers/net/ethernet/rocker/rocker_main.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index ff3f14504f4f..f10e4888ecff 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2811,6 +2811,24 @@ rocker_switchdev_port_obj_event(unsigned long event, struct net_device *netdev,
return notifier_from_errno(err);
}
+static int
+rocker_switchdev_port_attr_event(unsigned long event, struct net_device *netdev,
+ struct switchdev_notifier_port_attr_info
+ *port_attr_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_ATTR_SET:
+ err = rocker_port_attr_set(netdev, port_attr_info->attr,
+ port_attr_info->trans);
+ break;
+ }
+
+ port_attr_info->handled = true;
+ return notifier_from_errno(err);
+}
+
static int rocker_switchdev_blocking_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
@@ -2823,6 +2841,8 @@ static int rocker_switchdev_blocking_event(struct notifier_block *unused,
case SWITCHDEV_PORT_OBJ_ADD:
case SWITCHDEV_PORT_OBJ_DEL:
return rocker_switchdev_port_obj_event(event, dev, ptr);
+ case SWITCHDEV_PORT_ATTR_SET:
+ return rocker_switchdev_port_attr_event(event, dev, ptr);
}
return NOTIFY_DONE;
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 08/16] net: Get rid of switchdev_port_attr_get()
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
With the bridge no longer calling switchdev_port_attr_get() to obtain
the supported bridge port flags from a driver but instead trying to set
the bridge port flags directly and relying on driver to reject
unsupported configurations, we can effectively get rid of
switchdev_port_attr_get() entirely since this was the only place where
it was called.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
Documentation/networking/switchdev.txt | 5 ++-
.../mellanox/mlxsw/spectrum_switchdev.c | 32 -------------------
drivers/net/ethernet/rocker/rocker_main.c | 30 -----------------
drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 19 -----------
include/net/switchdev.h | 8 -----
net/dsa/slave.c | 7 ----
6 files changed, 2 insertions(+), 99 deletions(-)
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 2842f63ad47b..0d9530bf745b 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -233,9 +233,8 @@ the bridge's FDB. It's possible, but not optimal, to enable learning on the
device port and on the bridge port, and disable learning_sync.
To support learning and learning_sync port attributes, the driver implements
-switchdev op switchdev_port_attr_get/set for
-SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. The driver should initialize the attributes
-to the hardware defaults.
+switchdev op switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS.
+The driver should initialize the attributes to the hardware defaults.
FDB Ageing
^^^^^^^^^^
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 8242a373f6e7..6b09d68671cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -431,37 +431,6 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan)
mlxsw_sp_bridge_vlan_destroy(bridge_vlan);
}
-static void mlxsw_sp_port_bridge_flags_get(struct mlxsw_sp_bridge *bridge,
- struct net_device *dev,
- unsigned long *brport_flags)
-{
- struct mlxsw_sp_bridge_port *bridge_port;
-
- bridge_port = mlxsw_sp_bridge_port_find(bridge, dev);
- if (WARN_ON(!bridge_port))
- return;
-
- memcpy(brport_flags, &bridge_port->flags, sizeof(*brport_flags));
-}
-
-static int mlxsw_sp_port_attr_get(struct net_device *dev,
- struct switchdev_attr *attr)
-{
- struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-
- switch (attr->id) {
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
- mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev,
- &attr->u.brport_flags);
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
static int
mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_bridge_vlan *bridge_vlan,
@@ -1957,7 +1926,6 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
}
static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
- .switchdev_port_attr_get = mlxsw_sp_port_attr_get,
.switchdev_port_attr_set = mlxsw_sp_port_attr_set,
};
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 375c4c908bea..ff3f14504f4f 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1606,17 +1606,6 @@ rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
trans);
}
-static int
-rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port,
- unsigned long *p_brport_flags)
-{
- struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
- if (!wops->port_attr_bridge_flags_get)
- return -EOPNOTSUPP;
- return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags);
-}
-
static int
rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port,
u32 ageing_time,
@@ -2064,24 +2053,6 @@ static const struct net_device_ops rocker_port_netdev_ops = {
* swdev interface
********************/
-static int rocker_port_attr_get(struct net_device *dev,
- struct switchdev_attr *attr)
-{
- const struct rocker_port *rocker_port = netdev_priv(dev);
- int err = 0;
-
- switch (attr->id) {
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
- err = rocker_world_port_attr_bridge_flags_get(rocker_port,
- &attr->u.brport_flags);
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return err;
-}
-
static int rocker_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -2154,7 +2125,6 @@ static int rocker_port_obj_del(struct net_device *dev,
}
static const struct switchdev_ops rocker_port_switchdev_ops = {
- .switchdev_port_attr_get = rocker_port_attr_get,
.switchdev_port_attr_set = rocker_port_attr_set,
};
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 79635d1091df..b195b09e0d1d 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -640,24 +640,6 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev)
fsl_mc_free_irqs(sw_dev);
}
-static int swdev_port_attr_get(struct net_device *netdev,
- struct switchdev_attr *attr)
-{
- struct ethsw_port_priv *port_priv = netdev_priv(netdev);
-
- switch (attr->id) {
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
- attr->u.brport_flags =
- (port_priv->ethsw_data->learning ? BR_LEARNING : 0) |
- (port_priv->flood ? BR_FLOOD : 0);
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
static int port_attr_stp_state_set(struct net_device *netdev,
struct switchdev_trans *trans,
u8 state)
@@ -933,7 +915,6 @@ static int swdev_port_obj_del(struct net_device *netdev,
}
static const struct switchdev_ops ethsw_port_switchdev_ops = {
- .switchdev_port_attr_get = swdev_port_attr_get,
.switchdev_port_attr_set = swdev_port_attr_set,
};
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e2083824e577..96cd3e795f7f 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -178,8 +178,6 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info)
#ifdef CONFIG_NET_SWITCHDEV
void switchdev_deferred_process(void);
-int switchdev_port_attr_get(struct net_device *dev,
- struct switchdev_attr *attr);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr);
int switchdev_port_obj_add(struct net_device *dev,
@@ -224,12 +222,6 @@ static inline void switchdev_deferred_process(void)
{
}
-static inline int switchdev_port_attr_get(struct net_device *dev,
- struct switchdev_attr *attr)
-{
- return -EOPNOTSUPP;
-}
-
static inline int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr)
{
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5797da954e77..ae34be949d79 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -379,12 +379,6 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev,
return 0;
}
-static int dsa_slave_port_attr_get(struct net_device *dev,
- struct switchdev_attr *attr)
-{
- return -EOPNOTSUPP;
-}
-
static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
struct sk_buff *skb)
{
@@ -1051,7 +1045,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
};
static const struct switchdev_ops dsa_slave_switchdev_ops = {
- .switchdev_port_attr_get = dsa_slave_port_attr_get,
.switchdev_port_attr_set = dsa_slave_port_attr_set,
};
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 07/16] net: Remove SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Now that we have converted the bridge code and the drivers to check for
bridge port(s) flags at the time we try to set them, there is no need
for a get() -> set() sequence anymore and
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT therefore becomes unused.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
.../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 4 ----
drivers/net/ethernet/rocker/rocker_main.c | 4 ----
drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 3 ---
include/net/switchdev.h | 2 --
net/dsa/slave.c | 10 +---------
5 files changed, 1 insertion(+), 22 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 468a6d513074..8242a373f6e7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -455,10 +455,6 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev,
mlxsw_sp_port_bridge_flags_get(mlxsw_sp->bridge, attr->orig_dev,
&attr->u.brport_flags);
break;
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
- attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
- BR_MCAST_FLOOD;
- break;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 8657313b6f30..375c4c908bea 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2075,10 +2075,6 @@ static int rocker_port_attr_get(struct net_device *dev,
err = rocker_world_port_attr_bridge_flags_get(rocker_port,
&attr->u.brport_flags);
break;
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
- err = rocker_world_port_attr_bridge_flags_support_get(rocker_port,
- &attr->u.brport_flags_support);
- break;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 6228c4375835..79635d1091df 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -651,9 +651,6 @@ static int swdev_port_attr_get(struct net_device *netdev,
(port_priv->ethsw_data->learning ? BR_LEARNING : 0) |
(port_priv->flood ? BR_FLOOD : 0);
break;
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
- attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD;
- break;
default:
return -EOPNOTSUPP;
}
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 5e87b54c5dc5..e2083824e577 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -45,7 +45,6 @@ enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_UNDEFINED,
SWITCHDEV_ATTR_ID_PORT_STP_STATE,
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
- SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
SWITCHDEV_ATTR_ID_PORT_MROUTER,
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
@@ -62,7 +61,6 @@ struct switchdev_attr {
union {
u8 stp_state; /* PORT_STP_STATE */
unsigned long brport_flags; /* PORT_BRIDGE_FLAGS */
- unsigned long brport_flags_support; /* PORT_BRIDGE_FLAGS_SUPPORT */
bool mrouter; /* PORT_MROUTER */
clock_t ageing_time; /* BRIDGE_AGEING_TIME */
bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 212fc1cc27fc..5797da954e77 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -382,15 +382,7 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev,
static int dsa_slave_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
- switch (attr->id) {
- case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
- attr->u.brport_flags_support = 0;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
+ return -EOPNOTSUPP;
}
static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 06/16] net: bridge: Stop calling switchdev_port_attr_get()
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
Now that all switchdev drivers have been converted to checking the
bridge port flags during the prepare phase of the
switchdev_port_attr_set(), we can move straight to trying to set the
desired flag through SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
net/bridge/br_switchdev.c | 20 +++-----------------
1 file changed, 3 insertions(+), 17 deletions(-)
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index db9e8ab96d48..939f300522c5 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -64,29 +64,15 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
{
struct switchdev_attr attr = {
.orig_dev = p->dev,
- .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
+ .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
+ .flags = SWITCHDEV_F_DEFER,
+ .u.brport_flags = flags,
};
int err;
if (mask & ~BR_PORT_FLAGS_HW_OFFLOAD)
return 0;
- err = switchdev_port_attr_get(p->dev, &attr);
- if (err == -EOPNOTSUPP)
- return 0;
- if (err)
- return err;
-
- /* Check if specific bridge flag attribute offload is supported */
- if (!(attr.u.brport_flags_support & mask)) {
- br_warn(p->br, "bridge flag offload is not supported %u(%s)\n",
- (unsigned int)p->port_no, p->dev->name);
- return -EOPNOTSUPP;
- }
-
- attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
- attr.flags = SWITCHDEV_F_DEFER;
- attr.u.brport_flags = flags;
err = switchdev_port_attr_set(p->dev, &attr);
if (err) {
br_warn(p->br, "error setting offload flag on port %u(%s)\n",
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 05/16] rocker: Check bridge flags during prepare phase
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
In preparation for getting rid of switchdev_port_attr_get(), have rocker
check for the bridge flags being set through switchdev_port_attr_set()
with the SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS attribute identifier.
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
drivers/net/ethernet/rocker/rocker_main.c | 40 ++++++++++++++---------
1 file changed, 25 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 66f72f8c46e5..8657313b6f30 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1565,18 +1565,42 @@ static int rocker_world_port_attr_stp_state_set(struct rocker_port *rocker_port,
return wops->port_attr_stp_state_set(rocker_port, state);
}
+static int
+rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port *
+ rocker_port,
+ unsigned long *
+ p_brport_flags_support)
+{
+ struct rocker_world_ops *wops = rocker_port->rocker->wops;
+
+ if (!wops->port_attr_bridge_flags_support_get)
+ return -EOPNOTSUPP;
+ return wops->port_attr_bridge_flags_support_get(rocker_port,
+ p_brport_flags_support);
+}
+
static int
rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port,
unsigned long brport_flags,
struct switchdev_trans *trans)
{
struct rocker_world_ops *wops = rocker_port->rocker->wops;
+ unsigned long brport_flags_s;
+ int err;
if (!wops->port_attr_bridge_flags_set)
return -EOPNOTSUPP;
- if (switchdev_trans_ph_prepare(trans))
+ if (switchdev_trans_ph_prepare(trans)) {
+ err = rocker_world_port_attr_bridge_flags_support_get(rocker_port,
+ &brport_flags_s);
+ if (err)
+ return err;
+
+ if (brport_flags & ~brport_flags_s)
+ return -EOPNOTSUPP;
return 0;
+ }
return wops->port_attr_bridge_flags_set(rocker_port, brport_flags,
trans);
@@ -1593,20 +1617,6 @@ rocker_world_port_attr_bridge_flags_get(const struct rocker_port *rocker_port,
return wops->port_attr_bridge_flags_get(rocker_port, p_brport_flags);
}
-static int
-rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port *
- rocker_port,
- unsigned long *
- p_brport_flags_support)
-{
- struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
- if (!wops->port_attr_bridge_flags_support_get)
- return -EOPNOTSUPP;
- return wops->port_attr_bridge_flags_support_get(rocker_port,
- p_brport_flags_support);
-}
-
static int
rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port,
u32 ageing_time,
--
2.17.1
^ permalink raw reply related
* [PATCH net-next 04/16] net: dsa: Add setter for SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS
From: Florian Fainelli @ 2019-02-09 0:32 UTC (permalink / raw)
To: netdev
Cc: Florian Fainelli, David S. Miller, Ido Schimmel, open list,
open list:STAGING SUBSYSTEM, moderated list:ETHERNET BRIDGE, jiri,
andrew, vivien.didelot
In-Reply-To: <20190209003248.31088-1-f.fainelli@gmail.com>
In preparation for removing SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
add support for a function that processes the
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS attribute and returns not supported
for any flag set, since DSA does not currently support toggling those
bridge port attributes (yet).
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
net/dsa/dsa_priv.h | 3 +++
net/dsa/port.c | 10 ++++++++++
net/dsa/slave.c | 4 ++++
3 files changed, 17 insertions(+)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 1f4972dab9f2..97594f0b6efb 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -150,6 +150,9 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct switchdev_trans *trans);
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
struct switchdev_trans *trans);
+int dsa_port_bridge_port_flags_set(struct dsa_port *dp,
+ unsigned long brport_flags,
+ struct switchdev_trans *trans);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2d7e01b23572..2ce3752203cf 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -177,6 +177,16 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
}
+int dsa_port_bridge_port_flags_set(struct dsa_port *dp,
+ unsigned long brport_flags,
+ struct switchdev_trans *trans)
+{
+ if (brport_flags)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid)
{
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 70395a0ae52e..212fc1cc27fc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -292,6 +292,10 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
break;
+ case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ ret = dsa_port_bridge_port_flags_set(dp, attr->u.brport_flags,
+ trans);
+ break;
default:
ret = -EOPNOTSUPP;
break;
--
2.17.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox