Netdev List
 help / color / mirror / Atom feed
* [net-next] net: crypto set sk to NULL when af_alg_release.
From: Mao Wenan @ 2019-02-15 14:24 UTC (permalink / raw)
  To: netdev, davem, xiyou.wangcong, linux-kernel

KASAN has found use-after-free in sockfs_setattr.
The existed commit 6d8c50dcb029 ("socket: close race condition between sock_close()
and sockfs_setattr()") is to fix this simillar issue, but it seems to ignore
that crypto module forgets to set the sk to NULL after af_alg_release.

KASAN report details as below:
BUG: KASAN: use-after-free in sockfs_setattr+0x120/0x150
Write of size 4 at addr ffff88837b956128 by task syz-executor0/4186

CPU: 2 PID: 4186 Comm: syz-executor0 Not tainted xxx + #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.10.2-1ubuntu1 04/01/2014
Call Trace:
 dump_stack+0xca/0x13e
 print_address_description+0x79/0x330
 ? vprintk_func+0x5e/0xf0
 kasan_report+0x18a/0x2e0
 ? sockfs_setattr+0x120/0x150
 sockfs_setattr+0x120/0x150
 ? sock_register+0x2d0/0x2d0
 notify_change+0x90c/0xd40
 ? chown_common+0x2ef/0x510
 chown_common+0x2ef/0x510
 ? chmod_common+0x3b0/0x3b0
 ? __lock_is_held+0xbc/0x160
 ? __sb_start_write+0x13d/0x2b0
 ? __mnt_want_write+0x19a/0x250
 do_fchownat+0x15c/0x190
 ? __ia32_sys_chmod+0x80/0x80
 ? trace_hardirqs_on_thunk+0x1a/0x1c
 __x64_sys_fchownat+0xbf/0x160
 ? lockdep_hardirqs_on+0x39a/0x5e0
 do_syscall_64+0xc8/0x580
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462589
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89
f7 48 89 d6 48 89
ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3
48 c7 c1 bc ff ff
ff f7 d8 64 89 01 48
RSP: 002b:00007fb4b2c83c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000104
RAX: ffffffffffffffda RBX: 000000000072bfa0 RCX: 0000000000462589
RDX: 0000000000000000 RSI: 00000000200000c0 RDI: 0000000000000007
RBP: 0000000000000005 R08: 0000000000001000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb4b2c846bc
R13: 00000000004bc733 R14: 00000000006f5138 R15: 00000000ffffffff

Allocated by task 4185:
 kasan_kmalloc+0xa0/0xd0
 __kmalloc+0x14a/0x350
 sk_prot_alloc+0xf6/0x290
 sk_alloc+0x3d/0xc00
 af_alg_accept+0x9e/0x670
 hash_accept+0x4a3/0x650
 __sys_accept4+0x306/0x5c0
 __x64_sys_accept4+0x98/0x100
 do_syscall_64+0xc8/0x580
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 4184:
 __kasan_slab_free+0x12e/0x180
 kfree+0xeb/0x2f0
 __sk_destruct+0x4e6/0x6a0
 sk_destruct+0x48/0x70
 __sk_free+0xa9/0x270
 sk_free+0x2a/0x30
 af_alg_release+0x5c/0x70
 __sock_release+0xd3/0x280
 sock_close+0x1a/0x20
 __fput+0x27f/0x7f0
 task_work_run+0x136/0x1b0
 exit_to_usermode_loop+0x1a7/0x1d0
 do_syscall_64+0x461/0x580
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Syzkaller reproducer:
r0 = perf_event_open(&(0x7f0000000000)={0x0, 0x70, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_config_ext}, 0x0, 0x0,
0xffffffffffffffff, 0x0)
r1 = socket$alg(0x26, 0x5, 0x0)
getrusage(0x0, 0x0)
bind(r1, &(0x7f00000001c0)=@alg={0x26, 'hash\x00', 0x0, 0x0,
'sha256-ssse3\x00'}, 0x80)
r2 = accept(r1, 0x0, 0x0)
r3 = accept4$unix(r2, 0x0, 0x0, 0x0)
r4 = dup3(r3, r0, 0x0)
fchownat(r4, &(0x7f00000000c0)='\x00', 0x0, 0x0, 0x1000)

Fixes: 6d8c50dcb029 ("socket: close race condition between sock_close() and sockfs_setattr()")
Signed-off-by: Mao Wenan <maowenan@huawei.com>
---
 crypto/af_alg.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 17eb09d222ff..ec78a04eb136 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -122,8 +122,10 @@ static void alg_do_release(const struct af_alg_type *type, void *private)
 
 int af_alg_release(struct socket *sock)
 {
-	if (sock->sk)
+	if (sock->sk) {
 		sock_put(sock->sk);
+		sock->sk = NULL;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(af_alg_release);
-- 
2.20.1


^ permalink raw reply related

* [PATCH net] net: ip6_gre: initialize erspan_ver just for erspan tunnels
From: Lorenzo Bianconi @ 2019-02-15 14:10 UTC (permalink / raw)
  To: davem; +Cc: netdev, petrm
In-Reply-To: <cover.1550239457.git.lorenzo.bianconi@redhat.com>

After commit c706863bc890 ("net: ip6_gre: always reports o_key to
userspace"), ip6gre and ip6gretap tunnels started reporting TUNNEL_KEY
output flag even if it is not configured.
ip6gre_fill_info checks erspan_ver value to add TUNNEL_KEY for
erspan tunnels, however in commit 84581bdae9587 ("erspan: set
erspan_ver to 1 by default when adding an erspan dev")
erspan_ver is initialized to 1 even for ip6gre or ip6gretap
Fix the issue moving erspan_ver initialization in a dedicated routine

Fixes: c706863bc890 ("net: ip6_gre: always reports o_key to userspace")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
---
 net/ipv6/ip6_gre.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 801a9a0c217e..43890898b0b5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1719,6 +1719,24 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
 	return 0;
 }
 
+static void ip6erspan_set_version(struct nlattr *data[],
+				  struct __ip6_tnl_parm *parms)
+{
+	parms->erspan_ver = 1;
+	if (data[IFLA_GRE_ERSPAN_VER])
+		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+	if (parms->erspan_ver == 1) {
+		if (data[IFLA_GRE_ERSPAN_INDEX])
+			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+	} else if (parms->erspan_ver == 2) {
+		if (data[IFLA_GRE_ERSPAN_DIR])
+			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+		if (data[IFLA_GRE_ERSPAN_HWID])
+			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+	}
+}
+
 static void ip6gre_netlink_parms(struct nlattr *data[],
 				struct __ip6_tnl_parm *parms)
 {
@@ -1767,20 +1785,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 
 	if (data[IFLA_GRE_COLLECT_METADATA])
 		parms->collect_md = true;
-
-	parms->erspan_ver = 1;
-	if (data[IFLA_GRE_ERSPAN_VER])
-		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
-	if (parms->erspan_ver == 1) {
-		if (data[IFLA_GRE_ERSPAN_INDEX])
-			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
-	} else if (parms->erspan_ver == 2) {
-		if (data[IFLA_GRE_ERSPAN_DIR])
-			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
-		if (data[IFLA_GRE_ERSPAN_HWID])
-			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
-	}
 }
 
 static int ip6gre_tap_init(struct net_device *dev)
@@ -2203,6 +2207,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
 	int err;
 
 	ip6gre_netlink_parms(data, &nt->parms);
+	ip6erspan_set_version(data, &nt->parms);
 	ign = net_generic(net, ip6gre_net_id);
 
 	if (nt->parms.collect_md) {
@@ -2248,6 +2253,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
 	if (IS_ERR(t))
 		return PTR_ERR(t);
 
+	ip6erspan_set_version(data, &p);
 	ip6gre_tunnel_unlink_md(ign, t);
 	ip6gre_tunnel_unlink(ign, t);
 	ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
-- 
2.20.1


^ permalink raw reply related

* Re: [RESEND PATCH net] mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs
From: Jann Horn @ 2019-02-15 14:10 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: Network Development, kernel list
In-Reply-To: <20190214.142100.1155290437338631379.davem@davemloft.net>

On Thu, Feb 14, 2019 at 11:21 PM David Miller <davem@davemloft.net> wrote:
>
> From: Jann Horn <jannh@google.com>
> Date: Thu, 14 Feb 2019 22:26:22 +0100
>
> > On Thu, Feb 14, 2019 at 6:13 PM David Miller <davem@davemloft.net> wrote:
> >>
> >> From: Jann Horn <jannh@google.com>
> >> Date: Wed, 13 Feb 2019 22:45:59 +0100
> >>
> >> > The basic idea behind ->pagecnt_bias is: If we pre-allocate the maximum
> >> > number of references that we might need to create in the fastpath later,
> >> > the bump-allocation fastpath only has to modify the non-atomic bias value
> >> > that tracks the number of extra references we hold instead of the atomic
> >> > refcount. The maximum number of allocations we can serve (under the
> >> > assumption that no allocation is made with size 0) is nc->size, so that's
> >> > the bias used.
> >> >
> >> > However, even when all memory in the allocation has been given away, a
> >> > reference to the page is still held; and in the `offset < 0` slowpath, the
> >> > page may be reused if everyone else has dropped their references.
> >> > This means that the necessary number of references is actually
> >> > `nc->size+1`.
> >> >
> >> > Luckily, from a quick grep, it looks like the only path that can call
> >> > page_frag_alloc(fragsz=1) is TAP with the IFF_NAPI_FRAGS flag, which
> >> > requires CAP_NET_ADMIN in the init namespace and is only intended to be
> >> > used for kernel testing and fuzzing.
> >> >
> >> > To test for this issue, put a `WARN_ON(page_ref_count(page) == 0)` in the
> >> > `offset < 0` path, below the virt_to_page() call, and then repeatedly call
> >> > writev() on a TAP device with IFF_TAP|IFF_NO_PI|IFF_NAPI_FRAGS|IFF_NAPI,
> >> > with a vector consisting of 15 elements containing 1 byte each.
> >> >
> >> > Signed-off-by: Jann Horn <jannh@google.com>
> >>
> >> Applied and queued up for -stable.
> >
> > I had sent a v2 at Alexander Duyck's request an hour before you
> > applied the patch (with a minor difference that, in Alexander's
> > opinion, might be slightly more efficient). I guess the net tree
> > doesn't work like the mm tree, where patches can get removed and
> > replaced with newer versions? So if Alexander wants that change
> > (s/size/PAGE_FRAG_CACHE_MAX_SIZE/ in the refcount), someone has to
> > send that as a separate patch?
>
> Yes, please send a follow-up.  Sorry about that.

@Alexander Do you want to do that? It was your idea and I don't think
I can reasonably judge the usefulness of the change.

^ permalink raw reply

* Re: Bug in br_handle_frame
From: Nikolay Aleksandrov @ 2019-02-15 14:12 UTC (permalink / raw)
  To: Tomas Paukrt, netdev@vger.kernel.org; +Cc: roopa@cumulusnetworks.com
In-Reply-To: <2c692688-ea77-1e80-6607-6577b157873a@advantech-bb.cz>

On 15/02/2019 16:03, Tomas Paukrt wrote:
> Hi,
> 
> I have recently discovered that kernel 3.12.10 is occasionally crashing 
> due to NULL pointer dereference in function br_handle_frame when we 
> reconfigure the bridge, because function br_port_get_rcu returns NULL.
> 
> It is very hard for us to replicate this issue, because it happens about 
> once per month in our testing environment, but I have created the 
> attached patch. Can you please check it? The latest kernel seems to be 
> affected too.
> 
> Best regards
> 
> Tomas
> 

Hi,
That should not be possible, br_port_get_rcu() is a wrapper for
dev->rx_handler_data which in turn should always be present in case
rx_handler is called as can be seen in netdev_rx_handler_unregister().
Could you please share details about the crash and possibly a trace ?
Do you have any custom patches applied ?

Thanks,
 Nik


^ permalink raw reply

* Re: [PATCH v2 perf,bpf 01/11] perf, bpf: consider events with attr.bpf_event as side-band events
From: Arnaldo Carvalho de Melo @ 2019-02-15 14:14 UTC (permalink / raw)
  To: Song Liu
  Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, jolsa,
	namhyung
In-Reply-To: <20190214235840.2587123-1-songliubraving@fb.com>

Em Thu, Feb 14, 2019 at 03:58:40PM -0800, Song Liu escreveu:
> Events with bpf_event should be considered as side-band event, as they
> carry information about BPF programs.
> 
> Fixes: 6ee52e2a3fe4 ("perf, bpf: Introduce PERF_RECORD_BPF_EVENT")
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  kernel/events/core.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 0a8dab322111..9403bdda5f8c 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4238,7 +4238,8 @@ static bool is_sb_event(struct perf_event *event)
>  	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
>  	    attr->comm || attr->comm_exec ||
>  	    attr->task || attr->ksymbol ||
> -	    attr->context_switch)
> +	    attr->context_switch ||
> +	    attr->bpf_event)

What about attr->ksymbol?

- Arnaldo

>  		return true;
>  	return false;
>  }
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH v2 perf,bpf 05/11] perf, bpf: save bpf_prog_info in a rbtree in perf_env
From: Arnaldo Carvalho de Melo @ 2019-02-15 14:21 UTC (permalink / raw)
  To: Song Liu
  Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, jolsa,
	namhyung
In-Reply-To: <20190215000010.2590505-4-songliubraving@fb.com>

Em Thu, Feb 14, 2019 at 04:00:06PM -0800, Song Liu escreveu:
> bpf_prog_info contains information necessary to annotate bpf programs.
> This patch saves bpf_prog_info for bpf programs loaded in the system.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/perf/builtin-record.c |  2 +-
>  tools/perf/builtin-top.c    |  2 +-
>  tools/perf/util/bpf-event.c | 39 +++++++++++++----
>  tools/perf/util/bpf-event.h | 15 +++++--
>  tools/perf/util/env.c       | 83 +++++++++++++++++++++++++++++++++++++
>  tools/perf/util/env.h       | 14 +++++++
>  6 files changed, 142 insertions(+), 13 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 88ea11d57c6f..2355e0a9eda0 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1083,7 +1083,7 @@ static int record__synthesize(struct record *rec, bool tail)
>  		return err;
>  	}
>  
> -	err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
> +	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
>  						machine, opts);
>  	if (err < 0)
>  		pr_warning("Couldn't synthesize bpf events.\n");
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 5a486d4de56e..27d8d42e0a4d 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1216,7 +1216,7 @@ static int __cmd_top(struct perf_top *top)
>  
>  	init_process_thread(top);
>  
> -	ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process,
> +	ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
>  						&top->session->machines.host,
>  						&top->record_opts);
>  	if (ret < 0)
> diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
> index e6dfb95029e5..ead599bc4f4e 100644
> --- a/tools/perf/util/bpf-event.c
> +++ b/tools/perf/util/bpf-event.c
> @@ -1,15 +1,13 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <errno.h>
>  #include <stdlib.h>
> -#include <bpf/bpf.h>
> -#include <bpf/btf.h>
> -#include <bpf/libbpf.h>
> -#include <linux/btf.h>

I think you need these here, since in this C file you will use the
definitions for these structs, see further comments below.

>  #include <linux/err.h>
>  #include "bpf-event.h"
>  #include "debug.h"
>  #include "symbol.h"
>  #include "machine.h"
> +#include "env.h"
> +#include "session.h"
>  
>  #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
>  
> @@ -42,7 +40,7 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
>   *   -1 for failures;
>   *   -2 for lack of kernel support.
>   */
> -static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
> +static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
>  					       perf_event__handler_t process,
>  					       struct machine *machine,
>  					       int fd,
> @@ -52,17 +50,29 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
>  	struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
>  	struct bpf_event *bpf_event = &event->bpf_event;
>  	struct bpf_prog_info_linear *info_linear;
> +	struct perf_tool *tool = session->tool;
> +	struct bpf_prog_info_node *info_node;
>  	struct bpf_prog_info *info;
>  	struct btf *btf = NULL;
>  	bool has_btf = false;
> +	struct perf_env *env;
>  	u32 sub_prog_cnt, i;
>  	int err = 0;
>  	u64 arrays;
>  
> +	/*
> +	 * for perf-record and perf-report use header.env;
> +	 * otherwise, use global perf_env.
> +	 */
> +	env = session->data ? &session->header.env : &perf_env;
> +
>  	arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
>  	arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
>  	arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
>  	arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
> +	arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
> +	arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
> +	arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
>  
>  	info_linear = bpf_program__get_prog_info_linear(fd, arrays);
>  	if (IS_ERR_OR_NULL(info_linear)) {
> @@ -151,8 +161,8 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
>  						     machine, process);
>  	}
>  
> -	/* Synthesize PERF_RECORD_BPF_EVENT */
>  	if (opts->bpf_event) {
> +		/* Synthesize PERF_RECORD_BPF_EVENT */
>  		*bpf_event = (struct bpf_event){
>  			.header = {
>  				.type = PERF_RECORD_BPF_EVENT,
> @@ -165,6 +175,19 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
>  		memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE);
>  		memset((void *)event + event->header.size, 0, machine->id_hdr_size);
>  		event->header.size += machine->id_hdr_size;
> +
> +		/* save bpf_prog_info to env */
> +		info_node = malloc(sizeof(struct bpf_prog_info_node));
> +		if (info_node) {
> +			info_node->info_linear = info_linear;
> +			perf_env__insert_bpf_prog_info(env, info_node);
> +			info_linear = NULL;
> +		}
> +
> +		/*
> +		 * process after saving bpf_prog_info to env, so that
> +		 * required information is ready for look up
> +		 */
>  		err = perf_tool__process_synth_event(tool, event,
>  						     machine, process);
>  	}
> @@ -175,7 +198,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
>  	return err ? -1 : 0;
>  }
>  
> -int perf_event__synthesize_bpf_events(struct perf_tool *tool,
> +int perf_event__synthesize_bpf_events(struct perf_session *session,
>  				      perf_event__handler_t process,
>  				      struct machine *machine,
>  				      struct record_opts *opts)
> @@ -209,7 +232,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool,
>  			continue;
>  		}
>  
> -		err = perf_event__synthesize_one_bpf_prog(tool, process,
> +		err = perf_event__synthesize_one_bpf_prog(session, process,
>  							  machine, fd,
>  							  event, opts);
>  		close(fd);
> diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
> index 7890067e1a37..11e6730b6105 100644
> --- a/tools/perf/util/bpf-event.h
> +++ b/tools/perf/util/bpf-event.h
> @@ -3,19 +3,28 @@
>  #define __PERF_BPF_EVENT_H
>  
>  #include <linux/compiler.h>
> +#include <bpf/bpf.h>
> +#include <bpf/btf.h>
> +#include <bpf/libbpf.h>
> +#include <linux/btf.h>

Are you sure you'll need all of these headers here? Perhaps just some
forward declarations will do?

In fact the only bpf or btf structure here seems to be
bpf_prog_info_linear, which needs ust a forward declaration.

Avoiding these includes reduces the build time, and since the build-test
target does many builds and I want to build this in many containers, we
should try to reduce the build time by using just what is needed in each
header and C file. Also during development it helps with not rebuilding
tons of things when something unrelated changes in a header, etc.

- Arnaldo

> +#include <linux/rbtree.h>
>  #include "event.h"
>  
>  struct machine;
>  union perf_event;
>  struct perf_sample;
> -struct perf_tool;
>  struct record_opts;
>  
> +struct bpf_prog_info_node {
> +	struct bpf_prog_info_linear	*info_linear;
> +	struct rb_node			rb_node;
> +};
> +
>  #ifdef HAVE_LIBBPF_SUPPORT
>  int machine__process_bpf_event(struct machine *machine, union perf_event *event,
>  			       struct perf_sample *sample);
>  
> -int perf_event__synthesize_bpf_events(struct perf_tool *tool,
> +int perf_event__synthesize_bpf_events(struct perf_session *session,
>  				      perf_event__handler_t process,
>  				      struct machine *machine,
>  				      struct record_opts *opts);
> @@ -27,7 +36,7 @@ static inline int machine__process_bpf_event(struct machine *machine __maybe_unu
>  	return 0;
>  }
>  
> -static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused,
> +static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused,
>  						    perf_event__handler_t process __maybe_unused,
>  						    struct machine *machine __maybe_unused,
>  						    struct record_opts *opts __maybe_unused)
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index 4c23779e271a..665b6fe3c7b2 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -8,10 +8,86 @@
>  
>  struct perf_env perf_env;
>  
> +void perf_env__insert_bpf_prog_info(struct perf_env *env,
> +				    struct bpf_prog_info_node *info_node)
> +{
> +	__u32 prog_id = info_node->info_linear->info.id;
> +	struct bpf_prog_info_node *node;
> +	struct rb_node *parent = NULL;
> +	struct rb_node **p;
> +
> +	down_write(&env->bpf_info_lock);
> +	p = &env->bpf_prog_infos.rb_node;
> +
> +	while (*p != NULL) {
> +		parent = *p;
> +		node = rb_entry(parent, struct bpf_prog_info_node, rb_node);
> +		if (prog_id < node->info_linear->info.id) {
> +			p = &(*p)->rb_left;
> +		} else if (prog_id > node->info_linear->info.id) {
> +			p = &(*p)->rb_right;
> +		} else {
> +			pr_debug("duplicated bpf prog info %u\n", prog_id);
> +			up_write(&env->bpf_info_lock);
> +			return;
> +		}
> +	}
> +
> +	rb_link_node(&info_node->rb_node, parent, p);
> +	rb_insert_color(&info_node->rb_node, &env->bpf_prog_infos);
> +	up_write(&env->bpf_info_lock);
> +}
> +
> +struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
> +							__u32 prog_id)
> +{
> +	struct bpf_prog_info_node *node = NULL;
> +	struct rb_node *n;
> +
> +	down_read(&env->bpf_info_lock);
> +	n = env->bpf_prog_infos.rb_node;
> +
> +	while (n) {
> +		node = rb_entry(n, struct bpf_prog_info_node, rb_node);
> +		if (prog_id < node->info_linear->info.id)
> +			n = n->rb_left;
> +		else if (prog_id > node->info_linear->info.id)
> +			n = n->rb_right;
> +		else
> +			break;
> +	}
> +
> +	up_read(&env->bpf_info_lock);
> +	return node;
> +}
> +
> +/* purge data in bpf_prog_infos tree */
> +static void purge_bpf_info(struct perf_env *env)
> +{
> +	struct rb_root *root;
> +	struct rb_node *next;
> +
> +	down_write(&env->bpf_info_lock);
> +
> +	root = &env->bpf_prog_infos;
> +	next = rb_first(root);
> +
> +	while (next) {
> +		struct bpf_prog_info_node *node;
> +
> +		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
> +		next = rb_next(&node->rb_node);
> +		rb_erase_init(&node->rb_node, root);
> +		free(node);
> +	}
> +	up_write(&env->bpf_info_lock);
> +}
> +
>  void perf_env__exit(struct perf_env *env)
>  {
>  	int i;
>  
> +	purge_bpf_info(env);
>  	zfree(&env->hostname);
>  	zfree(&env->os_release);
>  	zfree(&env->version);
> @@ -38,6 +114,12 @@ void perf_env__exit(struct perf_env *env)
>  	zfree(&env->memory_nodes);
>  }
>  
> +static void init_bpf_rb_trees(struct perf_env *env)
> +{
> +	env->bpf_prog_infos = RB_ROOT;
> +	init_rwsem(&env->bpf_info_lock);
> +}
> +
>  int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
>  {
>  	int i;
> @@ -59,6 +141,7 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
>  
>  	env->nr_cmdline = argc;
>  
> +	init_bpf_rb_trees(env);
>  	return 0;
>  out_free:
>  	zfree(&env->cmdline_argv);
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index d01b8355f4ca..a6d25e91bc98 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -3,7 +3,10 @@
>  #define __PERF_ENV_H
>  
>  #include <linux/types.h>
> +#include <linux/rbtree.h>
>  #include "cpumap.h"
> +#include "rwsem.h"

You don't need the following header, just use a forward declaration for
the sole struct you use with a pointer:

struct bpf_prog_info_node;

> +#include "bpf-event.h"
>  
>  struct cpu_topology_map {
>  	int	socket_id;
> @@ -64,6 +67,13 @@ struct perf_env {
>  	struct memory_node	*memory_nodes;
>  	unsigned long long	 memory_bsize;
>  	u64                     clockid_res_ns;
> +
> +	/*
> +	 * bpf_info_lock protects bpf rbtrees. This is needed because the
> +	 * trees are accessed by different threads in perf-top
> +	 */
> +	struct rw_semaphore	bpf_info_lock;
> +	struct rb_root		bpf_prog_infos;

Please group this into a structure, i.e.:

	struct {
		struct rw_semaphore lock;
		struct rb_root	    infos;
	} bpf_progs;

>  };
>  
>  extern struct perf_env perf_env;
> @@ -80,4 +90,8 @@ const char *perf_env__arch(struct perf_env *env);
>  const char *perf_env__raw_arch(struct perf_env *env);
>  int perf_env__nr_cpus_avail(struct perf_env *env);
>  
> +void perf_env__insert_bpf_prog_info(struct perf_env *env,
> +				    struct bpf_prog_info_node *info_node);
> +struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
> +							__u32 prog_id);
>  #endif /* __PERF_ENV_H */
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH bpf-next 2/2] libbpf: Introduce bpf_object__btf
From: Daniel Borkmann @ 2019-02-15 14:23 UTC (permalink / raw)
  To: Yonghong Song, Andrey Ignatov, netdev@vger.kernel.org
  Cc: ast@kernel.org, Kernel Team
In-Reply-To: <fadf66fb-1ce5-06eb-d774-cb13219f802b@fb.com>

On 02/15/2019 02:53 AM, Yonghong Song wrote:
> On 2/14/19 3:01 PM, Andrey Ignatov wrote:
>> Add new accessor for bpf_object to get opaque struct btf * from it.
>>
>> struct btf * is needed for all operations with BTF and it's present in
>> bpf_object. The only thing missing is a way to get it.
>>
>> Example use-case is to get BTF key_type_id an value_type_id for a map in
> nit: an => and
>> bpf_object. It can be done with btf__get_map_kv_tids() but that function
>> requires struct btf *.

Series applied and fixed above typo, thanks!

^ permalink raw reply

* Re: [PATCH bpf-next] net: bpf: remove XDP_QUERY_XSK_UMEM enumerator
From: Daniel Borkmann @ 2019-02-15 14:23 UTC (permalink / raw)
  To: Björn Töpel, ast, netdev
  Cc: Jan Sokolowski, magnus.karlsson, magnus.karlsson, intel-wired-lan
In-Reply-To: <20190213170729.13845-1-bjorn.topel@gmail.com>

On 02/13/2019 06:07 PM, Björn Töpel wrote:
> From: Jan Sokolowski <jan.sokolowski@intel.com>
> 
> Commit c9b47cc1fabc ("xsk: fix bug when trying to use both copy and
> zero-copy on one queue id") moved the umem query code to the AF_XDP
> core, and therefore removed the need to query the netdevice for a
> umem.
> 
> This patch removes XDP_QUERY_XSK_UMEM and all code that implement that
> behavior, which is just dead code.
> 
> Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>

Applied, thanks!

^ permalink raw reply

* Re: [PATCH v2 perf,bpf 08/11] perf, bpf: save btf information as headers to perf.data
From: Arnaldo Carvalho de Melo @ 2019-02-15 14:26 UTC (permalink / raw)
  To: Song Liu, Jiri Olsa
  Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, namhyung,
	acme
In-Reply-To: <20190215000010.2590505-7-songliubraving@fb.com>

Em Thu, Feb 14, 2019 at 04:00:09PM -0800, Song Liu escreveu:
> This patch enables perf-record to save btf information as headers to
> perf.data A new header type HEADER_BTF is introduced for this data.

Jiri,

	Wouldn't it be better for this HEADER_BTF to be introduced
already as an user space event, Song, see:

  tools/perf/util/event.h

and:

  tools/perf/util/event.c

perf_event__synthesize_cpu_map()

- Arnaldo

 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/perf/util/header.c | 99 +++++++++++++++++++++++++++++++++++++++-
>  tools/perf/util/header.h |  1 +
>  2 files changed, 99 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 2ae76a9d06f6..3f1562afe8e5 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1125,6 +1125,45 @@ static int write_bpf_prog_info(struct feat_fd *ff,
>  	return ret;
>  }
>  
> +static int write_btf(struct feat_fd *ff,
> +		     struct perf_evlist *evlist __maybe_unused)
> +{
> +	struct perf_env *env = &ff->ph->env;
> +	struct rb_root *root;
> +	struct rb_node *next;
> +	u32 count = 0;
> +	int ret;
> +
> +	down_read(&env->bpf_info_lock);
> +
> +	root = &env->btfs;
> +	next = rb_first(root);
> +	while (next) {
> +		++count;
> +		next = rb_next(next);
> +	}
> +
> +	ret = do_write(ff, &count, sizeof(count));
> +
> +	if (ret < 0)
> +		goto out;
> +
> +	next = rb_first(root);
> +	while (next) {
> +		struct btf_node *node;
> +
> +		node = rb_entry(next, struct btf_node, rb_node);
> +		next = rb_next(&node->rb_node);
> +		ret = do_write(ff, node,
> +			       sizeof(struct btf_node) + node->data_size);
> +		if (ret < 0)
> +			goto out;
> +	}
> +out:
> +	up_read(&env->bpf_info_lock);
> +	return ret;
> +}
> +
>  static int cpu_cache_level__sort(const void *a, const void *b)
>  {
>  	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
> @@ -1628,6 +1667,28 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
>  	up_read(&env->bpf_info_lock);
>  }
>  
> +static void print_btf(struct feat_fd *ff, FILE *fp)
> +{
> +	struct perf_env *env = &ff->ph->env;
> +	struct rb_root *root;
> +	struct rb_node *next;
> +
> +	down_read(&env->bpf_info_lock);
> +
> +	root = &env->btfs;
> +	next = rb_first(root);
> +
> +	while (next) {
> +		struct btf_node *node;
> +
> +		node = rb_entry(next, struct btf_node, rb_node);
> +		next = rb_next(&node->rb_node);
> +		fprintf(fp, "# bpf_prog_info of id %u\n", node->id);
> +	}
> +
> +	up_read(&env->bpf_info_lock);
> +}
> +
>  static void free_event_desc(struct perf_evsel *events)
>  {
>  	struct perf_evsel *evsel;
> @@ -2723,6 +2784,41 @@ static int process_bpf_prog_info(struct feat_fd *ff,
>  	return err;
>  }
>  
> +static int process_btf(struct feat_fd *ff, void *data __maybe_unused)
> +{
> +	struct perf_env *env = &ff->ph->env;
> +	u32 count, i;
> +
> +	if (do_read_u32(ff, &count))
> +		return -1;
> +
> +	down_write(&env->bpf_info_lock);
> +
> +	for (i = 0; i < count; ++i) {
> +		struct btf_node btf_node;
> +		struct btf_node *node;
> +
> +		if (__do_read(ff, &btf_node, sizeof(struct btf_node)))
> +			return -1;
> +
> +		node = malloc(sizeof(struct btf_node) + btf_node.data_size);
> +		if (!node)
> +			return -1;
> +
> +		node->id = btf_node.id;
> +		node->data_size = btf_node.data_size;
> +
> +		if (__do_read(ff, node->data, btf_node.data_size)) {
> +			free(node);
> +			return -1;
> +		}
> +		perf_env__insert_btf(env, node);
> +	}
> +
> +	up_write(&env->bpf_info_lock);
> +	return 0;
> +}
> +
>  struct feature_ops {
>  	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
>  	void (*print)(struct feat_fd *ff, FILE *fp);
> @@ -2783,7 +2879,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
>  	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
>  	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
>  	FEAT_OPR(CLOCKID,       clockid,        false),
> -	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false)
> +	FEAT_OPR(BPF_PROG_INFO, bpf_prog_info,  false),
> +	FEAT_OPR(BTF,           btf,            false)
>  };
>  
>  struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index 0785c91b4c3a..ba51d8e43c53 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -40,6 +40,7 @@ enum {
>  	HEADER_MEM_TOPOLOGY,
>  	HEADER_CLOCKID,
>  	HEADER_BPF_PROG_INFO,
> +	HEADER_BTF,
>  	HEADER_LAST_FEATURE,
>  	HEADER_FEAT_BITS	= 256,
>  };
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH v2 perf,bpf 09/11] perf-top: add option --bpf-event
From: Arnaldo Carvalho de Melo @ 2019-02-15 14:27 UTC (permalink / raw)
  To: Song Liu
  Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, jolsa,
	namhyung
In-Reply-To: <20190215000010.2590505-8-songliubraving@fb.com>

Em Thu, Feb 14, 2019 at 04:00:10PM -0800, Song Liu escreveu:
> bpf events are only tracked when opts->bpf_event is enabled. This patch
> adds command line flag to enable this for perf-top.

Shouldn't this start as enabled and we just provide a way to disable it
for testing purposes? Normally perf top works system wide, and then we
want to get info about BPF events, I think.

- Arnaldo
 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/perf/builtin-top.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 27d8d42e0a4d..5271d7211b9c 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1492,6 +1492,7 @@ int cmd_top(int argc, const char **argv)
>  		    "Display raw encoding of assembly instructions (default)"),
>  	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
>  		    "Enable kernel symbol demangling"),
> +	OPT_BOOLEAN(0, "bpf-event", &opts->bpf_event, "record bpf events"),
>  	OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
>  		    "objdump binary to use for disassembly and annotations"),
>  	OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH v2 perf,bpf 07/11] perf, bpf: save btf in a rbtree in perf_env
From: Arnaldo Carvalho de Melo @ 2019-02-15 14:30 UTC (permalink / raw)
  To: Song Liu
  Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, jolsa,
	namhyung
In-Reply-To: <20190215000010.2590505-6-songliubraving@fb.com>

Em Thu, Feb 14, 2019 at 04:00:08PM -0800, Song Liu escreveu:
> btf contains information necessary to annotate bpf programs. This patch
> saves btf for bpf programs loaded in the system.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/perf/util/bpf-event.c | 22 +++++++++++++
>  tools/perf/util/bpf-event.h |  7 ++++
>  tools/perf/util/env.c       | 65 +++++++++++++++++++++++++++++++++++++
>  tools/perf/util/env.h       |  3 ++
>  4 files changed, 97 insertions(+)
> 
> diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
> index ead599bc4f4e..37a5b8134e00 100644
> --- a/tools/perf/util/bpf-event.c
> +++ b/tools/perf/util/bpf-event.c
> @@ -30,6 +30,27 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused,
>  	return 0;
>  }
>  
> +static int perf_fetch_btf(struct perf_env *env, u32 btf_id, struct btf *btf)

Please use perf_env__fetch_bpf

> +{
> +	struct btf_node *node;
> +	u32 data_size;
> +	const void *data;
> +
> +	data = btf__get_raw_data(btf, &data_size);
> +
> +	node = malloc(data_size + sizeof(struct btf_node));
> +
> +	if (!node)
> +		return -1;
> +
> +	node->id = btf_id;
> +	node->data_size = data_size;
> +	memcpy(node->data, data, data_size);
> +
> +	perf_env__insert_btf(env, node);

Just like you did above :-)

> +	return 0;
> +}
> +
>  /*
>   * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
>   * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
> @@ -109,6 +130,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
>  			goto out;
>  		}
>  		has_btf = true;
> +		perf_fetch_btf(env, info->btf_id, btf);
>  	}
>  
>  	/* Synthesize PERF_RECORD_KSYMBOL */
> diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
> index 11e6730b6105..60ce24e4e5c6 100644
> --- a/tools/perf/util/bpf-event.h
> +++ b/tools/perf/util/bpf-event.h
> @@ -20,6 +20,13 @@ struct bpf_prog_info_node {
>  	struct rb_node			rb_node;
>  };
>  
> +struct btf_node {
> +	struct rb_node	rb_node;
> +	u32		id;
> +	u32		data_size;
> +	char		data[];
> +};
> +
>  #ifdef HAVE_LIBBPF_SUPPORT
>  int machine__process_bpf_event(struct machine *machine, union perf_event *event,
>  			       struct perf_sample *sample);
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index 665b6fe3c7b2..6f9e3d4b94bc 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -61,6 +61,57 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
>  	return node;
>  }
>  
> +void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
> +{
> +	struct rb_node *parent = NULL;
> +	__u32 btf_id = btf_node->id;
> +	struct btf_node *node;
> +	struct rb_node **p;
> +
> +	down_write(&env->bpf_info_lock);
> +	p = &env->btfs.rb_node;
> +
> +	while (*p != NULL) {
> +		parent = *p;
> +		node = rb_entry(parent, struct btf_node, rb_node);
> +		if (btf_id < node->id) {
> +			p = &(*p)->rb_left;
> +		} else if (btf_id > node->id) {
> +			p = &(*p)->rb_right;
> +		} else {
> +			pr_debug("duplicated btf %u\n", btf_id);

			goto out:

> +			up_write(&env->bpf_info_lock);
> +			return;
> +		}
> +	}
> +
> +	rb_link_node(&btf_node->rb_node, parent, p);
> +	rb_insert_color(&btf_node->rb_node, &env->btfs);
out:
> +	up_write(&env->bpf_info_lock);
> +}
> +
> +struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
> +{
> +	struct btf_node *node = NULL;
> +	struct rb_node *n;
> +
> +	down_read(&env->bpf_info_lock);
> +	n = env->btfs.rb_node;
> +
> +	while (n) {
> +		node = rb_entry(n, struct btf_node, rb_node);
> +		if (btf_id < node->id)
> +			n = n->rb_left;
> +		else if (btf_id > node->id)
> +			n = n->rb_right;
> +		else
> +			break;
> +	}
> +
> +	up_read(&env->bpf_info_lock);
> +	return node;
> +}
> +
>  /* purge data in bpf_prog_infos tree */
>  static void purge_bpf_info(struct perf_env *env)

The above should've been perf_env__purge_bpf()

>  {
> @@ -80,6 +131,19 @@ static void purge_bpf_info(struct perf_env *env)
>  		rb_erase_init(&node->rb_node, root);
>  		free(node);
>  	}
> +
> +	root = &env->btfs;
> +	next = rb_first(root);
> +
> +	while (next) {
> +		struct btf_node *node;
> +
> +		node = rb_entry(next, struct btf_node, rb_node);
> +		next = rb_next(&node->rb_node);
> +		rb_erase_init(&node->rb_node, root);
> +		free(node);
> +	}
> +
>  	up_write(&env->bpf_info_lock);
>  }
>  
> @@ -117,6 +181,7 @@ void perf_env__exit(struct perf_env *env)
>  static void init_bpf_rb_trees(struct perf_env *env)
>  {
>  	env->bpf_prog_infos = RB_ROOT;
> +	env->btfs = RB_ROOT;
>  	init_rwsem(&env->bpf_info_lock);
>  }
>  
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index a6d25e91bc98..f0c74255423c 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -74,6 +74,7 @@ struct perf_env {
>  	 */
>  	struct rw_semaphore	bpf_info_lock;
>  	struct rb_root		bpf_prog_infos;
> +	struct rb_root		btfs;
>  };
>  
>  extern struct perf_env perf_env;
> @@ -94,4 +95,6 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env,
>  				    struct bpf_prog_info_node *info_node);
>  struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
>  							__u32 prog_id);
> +void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
> +struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
>  #endif /* __PERF_ENV_H */
> -- 
> 2.17.1

^ permalink raw reply

* Re: [PATCH] net: marvell: mvneta: fix DMA debug warning
From: Gregory CLEMENT @ 2019-02-15 14:31 UTC (permalink / raw)
  To: Russell King, Thomas Petazzoni; +Cc: David S. Miller, netdev
In-Reply-To: <E1gudxz-0001T0-0K@rmk-PC.armlinux.org.uk>

Hi Russell,
 
 On ven., févr. 15 2019, Russell King <rmk+kernel@armlinux.org.uk> wrote:

> Booting 4.20 on SolidRun Clearfog issues this warning with DMA API
> debug enabled:
>
> WARNING: CPU: 0 PID: 555 at kernel/dma/debug.c:1230 check_sync+0x514/0x5bc
> mvneta f1070000.ethernet: DMA-API: device driver tries to sync DMA memory it has not allocated [device address=0x000000002dd7dc00] [size=240 bytes]
> Modules linked in: ahci mv88e6xxx dsa_core xhci_plat_hcd xhci_hcd devlink armada_thermal marvell_cesa des_generic ehci_orion phy_armada38x_comphy mcp3021 spi_orion evbug sfp mdio_i2c ip_tables x_tables
> CPU: 0 PID: 555 Comm: bridge-network- Not tainted 4.20.0+ #291
> Hardware name: Marvell Armada 380/385 (Device Tree)
> [<c0019638>] (unwind_backtrace) from [<c0014888>] (show_stack+0x10/0x14)
> [<c0014888>] (show_stack) from [<c07f54e0>] (dump_stack+0x9c/0xd4)
> [<c07f54e0>] (dump_stack) from [<c00312bc>] (__warn+0xf8/0x124)
> [<c00312bc>] (__warn) from [<c00313b0>] (warn_slowpath_fmt+0x38/0x48)
> [<c00313b0>] (warn_slowpath_fmt) from [<c00b0370>] (check_sync+0x514/0x5bc)
> [<c00b0370>] (check_sync) from [<c00b04f8>] (debug_dma_sync_single_range_for_cpu+0x6c/0x74)
> [<c00b04f8>] (debug_dma_sync_single_range_for_cpu) from [<c051bd14>] (mvneta_poll+0x298/0xf58)
> [<c051bd14>] (mvneta_poll) from [<c0656194>] (net_rx_action+0x128/0x424)
> [<c0656194>] (net_rx_action) from [<c000a230>] (__do_softirq+0xf0/0x540)
> [<c000a230>] (__do_softirq) from [<c00386e0>] (irq_exit+0x124/0x144)
> [<c00386e0>] (irq_exit) from [<c009b5e0>] (__handle_domain_irq+0x58/0xb0)
> [<c009b5e0>] (__handle_domain_irq) from [<c03a63c4>] (gic_handle_irq+0x48/0x98)
> [<c03a63c4>] (gic_handle_irq) from [<c0009a10>] (__irq_svc+0x70/0x98)
> ...
>
> This appears to be caused by mvneta_rx_hwbm() calling
> dma_sync_single_range_for_cpu() with the wrong struct device pointer,
> as the buffer manager device pointer is used to map and unmap the
> buffer.  Fix this.
>

For me it makes sens to associate the DMA buffer to the HWBM, so this
fixes looks good for me.

Reviwed-by: Gregory CLEMENT <gregory.clement@bootlin.com>

Thanks,

Gregory


> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
> ---
> Please check that this is the correct fix.
>
>  drivers/net/ethernet/marvell/mvneta.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
> index ef0a85bbc586..0e1f87fca952 100644
> --- a/drivers/net/ethernet/marvell/mvneta.c
> +++ b/drivers/net/ethernet/marvell/mvneta.c
> @@ -2086,7 +2086,7 @@ static int mvneta_rx_hwbm(struct napi_struct *napi,
>  			if (unlikely(!skb))
>  				goto err_drop_frame_ret_pool;
>  
> -			dma_sync_single_range_for_cpu(dev->dev.parent,
> +			dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev,
>  			                              rx_desc->buf_phys_addr,
>  			                              MVNETA_MH_SIZE + NET_SKB_PAD,
>  			                              rx_bytes,
> -- 
> 2.7.4
>

-- 
Gregory Clement, Bootlin
Embedded Linux and Kernel engineering
http://bootlin.com

^ permalink raw reply

* Re: Bug in br_handle_frame
From: Nikolay Aleksandrov @ 2019-02-15 14:31 UTC (permalink / raw)
  To: Tomas Paukrt, netdev@vger.kernel.org; +Cc: roopa@cumulusnetworks.com
In-Reply-To: <f8bed7e5-0e3a-095f-4f0f-864d71695445@advantech-bb.cz>

On 15/02/2019 16:23, Tomas Paukrt wrote:
> Hi Nik,
> 
> this is the trace:
> 
> [  522.578735] Unable to handle kernel NULL pointer dereference at virtual address 00000011
> [  522.578804] pgd = c3b70000
> [  522.578842] [00000011] *pgd=03b63831, *pte=00000000, *ppte=00000000
> [  522.578943] Internal error: Oops: 17 [#1] ARM
> [  522.578980] Modules linked in:
> [  522.579039] CPU: 0    Not tainted  (3.5.0-lsp-3.3.1 #1)
> [  522.579103] PC is at br_handle_frame+0xf4/0x26c
> [  522.579146] LR is at 0xffff
> [  522.579194] pc : []    lr : [<0000ffff>]    psr: 00000013
> [  522.579194] sp : c3bd5c10  ip : 0000ffff  fp : c3bd5c44
> [  522.579242] r10: c3affd80  r9 : 0000ff3d  r8 : c3a10600
> [  522.579279] r7 : c3bd5c5c  r6 : 00000000  r5 : c3b5daa2  r4 : c3affd80
> [  522.579322] r3 : c398c800  r2 : 0000ffff  r1 : 0000ffff  r0 : 0000ffff
> [  522.579364] Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
> [  522.579407] Control: 0005317f  Table: 03b70000  DAC: 00000015
> [  522.579444] Process brctl (pid: 4573, stack limit = 0xc3bd4270)
> [  522.579482] Stack: (0xc3bd5c10 to 0xc3bd6000)
> [  522.579535] 5c00:                                     c0446f94 c0470c44 c3bd5c5c c3bd5c28
> [  522.579599] 5c20: c02c1780 c043e548 c398c800 00000001 c0470720 c043e548 c3bd5c94 c3bd5c48
> [  522.579658] 5c40: c02164d0 c02c1790 00000000 00000000 c043e540 00000120 00000120 c3affd80
> [  522.579722] 5c60: c3bd5c94 c043e548 c020a5c4 c3affd80 00000180 c398cbd0 000000be c3affd80
> [  522.579786] 5c80: 00000080 0000003e c3bd5cb4 c3bd5c98 c0216704 c02161c8 ffdd8000 00000180
> [  522.579844] 5ca0: 00000180 00000180 c3bd5cf4 c3bd5cb8 c01ab67c c02166f4 00000040 00000040
> [  522.579908] 5cc0: 00000000 00000000 20000013 c01ab438 c398cbd0 0000012c 00000040 c0470720
> [  522.579972] 5ce0: 000056f3 c0470720 c3bd5d2c c3bd5cf8 c0217568 c01ab448 c0470728 c0445270
> [  522.580031] 5d00: 00000000 00000001 c047a3ac 0000000c c3bd4000 c0445ff0 00000100 00000003
> [  522.580095] 5d20: c3bd5d6c c3bd5d30 c001b7e0 c0217474 c3bd5d6c c3bd5d40 c3bd4030 0000000a
> [  522.580154] 5d40: c0012418 c0451b9c 00000001 00000000 c0471144 c047116c 00000000 00000000
> [  522.580218] 5d60: c3bd5d7c c3bd5d70 c001bc24 c001b748 c3bd5d9c c3bd5d80 c0009b80 c001bbe4
> [  522.580282] 5d80: 00000002 c3bd5dc8 00000000 00000000 c3bd5dc4 c3bd5da0 c00086c8 c0009b54
> [  522.580346] 5da0: c0213bb0 c02c127c 60000013 ffffffff c3bd5dfc c3a1068c c3bd5e34 c3bd5dc8
> [  522.580404] 5dc0: c0008f80 c000867c 00000000 c02c1780 c3a10600 00000000 c3a10600 00000000
> [  522.580468] 5de0: c3bb33a0 c398c800 c3a1068c 00000000 00000000 c3bd5e34 c3bd5df0 c3bd5e10
> [  522.580532] 5e00: c0213bb0 c02c127c 60000013 ffffffff 00000004 c3bb33a0 00000001 c0359864
> [  522.580591] 5e20: c3bd4018 00000000 c3bd5e54 c3bd5e38 c02c1a38 c02c1050 000089a2 000089a2
> [  522.580655] 5e40: c3bb3000 c3bd5ea0 c3bd5e64 c3bd5e58 c02c2334 c02c19fc c3bd5e94 c3bd5e68
> [  522.580719] 5e60: c02190dc c02c22e4 c0469e00 be8efb10 c3bd5e94 c3bd5e80 000089a2 c0469e00
> [  522.580783] 5e80: be8efb10 c3bd5ea0 c3bd5eec c3bd5e98 c0219480 c0218e28 c3ab6d20 00000000
> [  522.580842] 5ea0: 00307262 00000000 00000000 00000000 00000004 b6f24e88 b6f4acf8 b6d768e4
> [  522.580906] 5ec0: c0262184 000089a2 fffffdfd be8efb10 c30d6500 c0009484 c3bd4000 00000000
> [  522.580975] 5ee0: c3bd5f0c c3bd5ef0 c0205680 c0219128 c0205544 c3494660 be8efb10 c30d6500
> [  522.581039] 5f00: c3bd5f7c c3bd5f10 c008ca98 c0205554 c001b83c c001b310 c3bd5f54 c3bd5f28
> [  522.581108] 5f20: c3bd4018 00000009 c0012418 c0451b9c 00000001 00000000 c047a380 c0451b9c
> [  522.581172] 5f40: 00000001 00000000 c3bd5f64 c3bd5f58 c001bc28 c00501ac be8efb10 000089a2
> [  522.581236] 5f60: 00000003 c30d6500 c0009484 c3bd4000 c3bd5fa4 c3bd5f80 c008cc4c c008c704
> [  522.581306] 5f80: c00086c8 00000000 be8eff14 00000002 be8efe10 00000036 00000000 c3bd5fa8
> [  522.581370] 5fa0: c0009300 c008cc20 be8eff14 00000002 00000003 000089a2 be8efb10 00056f0d
> [  522.581434] 5fc0: be8eff14 00000002 be8efe10 00000036 be8eff10 00000000 b6f4b000 00000000
> [  522.581498] 5fe0: b6e36ef0 be8efac4 000150d0 b6e36efc 60000010 00000003 00000000 00000000
> [  522.581535] Backtrace: 
> [  522.581647] [] (br_handle_frame+0x0/0x26c) from [] (__netif_receive_skb+0x318/0x52c)
> [  522.581695]  r9:c043e548 r8:c0470720 r7:00000001 r6:c398c800 r5:c043e548
> r4:c02c1780
> [  522.581892] [] (__netif_receive_skb+0x0/0x52c) from [] (netif_receive_skb+0x20/0x68)
> [  522.581972] [] (netif_receive_skb+0x0/0x68) from [] (macb_poll+0x244/0x3cc)
> [  522.582015]  r4:00000180
> [  522.582100] [] (macb_poll+0x0/0x3cc) from [] (net_rx_action+0x104/0x1b8)
> [  522.582196] [] (net_rx_action+0x0/0x1b8) from [] (__do_softirq+0xa8/0x14c)
> [  522.582282] [] (__do_softirq+0x0/0x14c) from [] (irq_exit+0x50/0x5c)
> [  522.582362] [] (irq_exit+0x0/0x5c) from [] (handle_IRQ+0x3c/0x8c)
> [  522.582431] [] (handle_IRQ+0x0/0x8c) from [] (vic_handle_irq+0x5c/0x9c)
> [  522.582474]  r6:00000000 r5:00000000 r4:c3bd5dc8 r3:00000002
> [  522.582607] [] (vic_handle_irq+0x0/0x9c) from [] (__irq_svc+0x40/0x60)
> [  522.582655] Exception stack(0xc3bd5dc8 to 0xc3bd5e10)
> [  522.582719] 5dc0:                   00000000 c02c1780 c3a10600 00000000 c3a10600 00000000
> [  522.582783] 5de0: c3bb33a0 c398c800 c3a1068c 00000000 00000000 c3bd5e34 c3bd5df0 c3bd5e10
> [  522.582842] 5e00: c0213bb0 c02c127c 60000013 ffffffff
> [  522.582874]  r8:c3a1068c r7:c3bd5dfc r6:ffffffff r5:60000013 r4:c02c127c
> r3:c0213bb0
> [  522.583071] [] (br_add_if+0x0/0x3f4) from [] (add_del_if+0x4c/0x6c)
> [  522.583114]  r9:00000000 r8:c3bd4018 r7:c0359864 r6:00000001 r5:c3bb33a0
> r4:00000004
> [  522.583306] [] (add_del_if+0x0/0x6c) from [] (br_dev_ioctl+0x60/0x6c)
> [  522.583343]  r6:c3bd5ea0 r5:c3bb3000 r4:000089a2 r3:000089a2
> [  522.583492] [] (br_dev_ioctl+0x0/0x6c) from [] (dev_ifsioc+0x2c4/0x300)
> [  522.583556] [] (dev_ifsioc+0x0/0x300) from [] (dev_ioctl+0x368/0x82c)
> [  522.583599]  r7:c3bd5ea0 r6:be8efb10 r5:c0469e00 r4:000089a2
> [  522.583748] [] (dev_ioctl+0x0/0x82c) from [] (sock_ioctl+0x13c/0x270)
> [  522.583834] [] (sock_ioctl+0x0/0x270) from [] (do_vfs_ioctl+0x3a4/0x51c)
> [  522.583876]  r6:c30d6500 r5:be8efb10 r4:c3494660 r3:c0205544
> [  522.584026] [] (do_vfs_ioctl+0x0/0x51c) from [] (sys_ioctl+0x3c/0x68)
> [  522.584063]  r9:c3bd4000 r8:c0009484 r7:c30d6500 r6:00000003 r5:000089a2
> r4:be8efb10
> [  522.584255] [] (sys_ioctl+0x0/0x68) from [] (ret_fast_syscall+0x0/0x2c)
> [  522.584298]  r7:00000036 r6:be8efe10 r5:00000002 r4:be8eff14
> [  522.584431] Code: e3c22c0f 11a06008 e1912002 0a000036 (e5d65011) 
> [  522.584554] ---[ end trace 715c438c778f2442 ]---
> [  522.584607] Kernel panic - not syncing: Fatal exception in interrupt
> [  522.584650] Rebooting in 1 seconds..
> 
> We have several patches that fix various (mostly security) issues. I
> have attached them.
> 
> I cannot provide any additional details, because we are not able to
> reproduce this issue. It happens when we reconfigure Ethernet interfaces.
> 
> Best regards
> 
> Tomas
> 
> Dne 15.2.2019 v 15:12 Nikolay Aleksandrov napsal(a):
>> On 15/02/2019 16:03, Tomas Paukrt wrote:
>>> Hi,
>>>
>>> I have recently discovered that kernel 3.12.10 is occasionally crashing 
>>> due to NULL pointer dereference in function br_handle_frame when we 
>>> reconfigure the bridge, because function br_port_get_rcu returns NULL.
>>>
>>> It is very hard for us to replicate this issue, because it happens about 
>>> once per month in our testing environment, but I have created the 
>>> attached patch. Can you please check it? The latest kernel seems to be 
>>> affected too.
>>>
>>> Best regards
>>>
>>> Tomas
>>>
>> Hi,
>> That should not be possible, br_port_get_rcu() is a wrapper for
>> dev->rx_handler_data which in turn should always be present in case
>> rx_handler is called as can be seen in netdev_rx_handler_unregister().
>> Could you please share details about the crash and possibly a trace ?
>> Do you have any custom patches applied ?
>>
>> Thanks,
>>  Nik
>>
>> .
>>

Hi again,
Please don't top post on netdev@. As usual I'll have to ask you to
reproduce this on a vanilla latest kernel (if possible on the current
-net or -net-next trees) without any modifications and please provide a
trace from such kernel. From the explanation it sounds like it would
take some time but noone will look into it seriously unless that happens.

Thank you,
 Nik



^ permalink raw reply

* Re: [PATCH v2 perf,bpf 11/11] perf, bpf: save information about short living bpf programs
From: Arnaldo Carvalho de Melo @ 2019-02-15 14:41 UTC (permalink / raw)
  To: Song Liu
  Cc: netdev, linux-kernel, ast, daniel, kernel-team, peterz, jolsa,
	namhyung
In-Reply-To: <20190215000045.2592135-2-songliubraving@fb.com>

Em Thu, Feb 14, 2019 at 04:00:45PM -0800, Song Liu escreveu:
> To annotate bpf programs in perf, it is necessary to save information in
> bpf_prog_info and btf. For short living bpf program, it is necessary to
> save these information before it is unloaded.
> 
> This patch saves these information in a separate thread. This thread
> creates its own evlist, that only tracks bpf events. This evlists uses
> ring buffer with very low watermark for lower latency. When bpf load
> events are received, this thread tries to gather information via sys_bpf
> and save it in perf_env.
> 
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/perf/builtin-record.c |  13 ++++
>  tools/perf/builtin-top.c    |  12 ++++
>  tools/perf/util/bpf-event.c | 129 ++++++++++++++++++++++++++++++++++++
>  tools/perf/util/bpf-event.h |  22 ++++++
>  tools/perf/util/evlist.c    |  20 ++++++
>  tools/perf/util/evlist.h    |   2 +
>  6 files changed, 198 insertions(+)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 2355e0a9eda0..46abb44aaaab 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -1106,6 +1106,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>  	struct perf_data *data = &rec->data;
>  	struct perf_session *session;
>  	bool disabled = false, draining = false;
> +	struct bpf_event_poll_args poll_args;
> +	bool bpf_thread_running = false;
>  	int fd;
>  
>  	atexit(record__sig_exit);
> @@ -1206,6 +1208,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>  		goto out_child;
>  	}
>  
> +	if (rec->opts.bpf_event) {
> +		poll_args.env = &session->header.env;
> +		poll_args.target = &rec->opts.target;
> +		poll_args.done = &done;
> +		if (bpf_event__start_polling_thread(&poll_args) == 0)
> +			bpf_thread_running = true;
> +	}
> +
>  	err = record__synthesize(rec, false);
>  	if (err < 0)
>  		goto out_child;
> @@ -1456,6 +1466,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
>  
>  out_delete_session:
>  	perf_session__delete(session);
> +
> +	if (bpf_thread_running)
> +		bpf_event__stop_polling_thread(&poll_args);
>  	return status;
>  }
>  
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 5271d7211b9c..2586ee081967 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1524,10 +1524,12 @@ int cmd_top(int argc, const char **argv)
>  			"number of thread to run event synthesize"),
>  	OPT_END()
>  	};
> +	struct bpf_event_poll_args poll_args;
>  	const char * const top_usage[] = {
>  		"perf top [<options>]",
>  		NULL
>  	};
> +	bool bpf_thread_running = false;
>  	int status = hists__init();
>  
>  	if (status < 0)
> @@ -1652,8 +1654,18 @@ int cmd_top(int argc, const char **argv)
>  		signal(SIGWINCH, winch_sig);
>  	}
>  
> +	if (top.record_opts.bpf_event) {
> +		poll_args.env = &perf_env;
> +		poll_args.target = target;
> +		poll_args.done = &done;
> +		if (bpf_event__start_polling_thread(&poll_args) == 0)
> +			bpf_thread_running = true;
> +	}
>  	status = __cmd_top(&top);
>  
> +	if (bpf_thread_running)
> +		bpf_event__stop_polling_thread(&poll_args);
> +
>  out_delete_evlist:
>  	perf_evlist__delete(top.evlist);
>  
> diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
> index 4f347d61ed96..0caf137c515b 100644
> --- a/tools/perf/util/bpf-event.c
> +++ b/tools/perf/util/bpf-event.c
> @@ -8,6 +8,7 @@
>  #include "machine.h"
>  #include "env.h"
>  #include "session.h"
> +#include "evlist.h"
>  
>  #define ptr_to_u64(ptr)    ((__u64)(unsigned long)(ptr))
>  
> @@ -316,3 +317,131 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
>  	free(event);
>  	return err;
>  }
> +
> +static void perf_env_add_bpf_info(struct perf_env *env, u32 id)
> +{
> +	struct bpf_prog_info_linear *info_linear;
> +	struct bpf_prog_info_node *info_node;
> +	struct btf *btf = NULL;
> +	u64 arrays;
> +	u32 btf_id;
> +	int fd;
> +
> +	fd = bpf_prog_get_fd_by_id(id);
> +	if (fd < 0)
> +		return;
> +
> +	arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
> +	arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
> +	arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
> +	arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
> +	arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
> +	arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
> +	arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
> +
> +	info_linear = bpf_program__get_prog_info_linear(fd, arrays);
> +	if (IS_ERR_OR_NULL(info_linear)) {
> +		pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
> +		goto out;
> +	}
> +
> +	btf_id = info_linear->info.btf_id;
> +
> +	info_node = malloc(sizeof(struct bpf_prog_info_node));
> +	if (info_node) {
> +		info_node->info_linear = info_linear;
> +		perf_env__insert_bpf_prog_info(env, info_node);
> +	} else
> +		free(info_linear);
> +
> +	if (btf_id == 0)
> +		goto out;
> +
> +	if (btf__get_from_id(btf_id, &btf)) {
> +		pr_debug("%s: failed to get BTF of id %u, aborting\n",
> +			 __func__, btf_id);
> +		goto out;
> +	}
> +	perf_fetch_btf(env, btf_id, btf);
> +
> +out:
> +	free(btf);
> +	close(fd);
> +}
> +
> +static void *bpf_poll_thread(void *arg)
> +{
> +	struct bpf_event_poll_args *args = arg;
> +	int i;
> +
> +	while (!*(args->done)) {
> +		perf_evlist__poll(args->evlist, 1000);
> +
> +		for (i = 0; i < args->evlist->nr_mmaps; i++) {
> +			struct perf_mmap *map = &args->evlist->mmap[i];
> +			union perf_event *event;
> +
> +			if (perf_mmap__read_init(map))
> +				continue;
> +			while ((event = perf_mmap__read_event(map)) != NULL) {
> +				pr_debug("processing vip event of type %d\n",
> +					 event->header.type);
> +				switch (event->header.type) {
> +				case PERF_RECORD_BPF_EVENT:
> +					if (event->bpf_event.type != PERF_BPF_EVENT_PROG_LOAD)
> +						break;
> +					perf_env_add_bpf_info(args->env, event->bpf_event.id);
> +					break;
> +				default:
> +					break;
> +				}
> +				perf_mmap__consume(map);
> +			}
> +			perf_mmap__read_done(map);
> +		}
> +	}
> +	return NULL;
> +}
> +
> +pthread_t poll_thread;
> +
> +int bpf_event__start_polling_thread(struct bpf_event_poll_args *args)
> +{
> +	struct perf_evsel *counter;
> +
> +	args->evlist = perf_evlist__new();
> +
> +	if (args->evlist == NULL)
> +		return -1;
> +
> +	if (perf_evlist__create_maps(args->evlist, args->target))
		goto out_delete_evlist;
> +
> +	if (perf_evlist__add_bpf_tracker(args->evlist))
		goto out_delete_evlist;
> +
> +	evlist__for_each_entry(args->evlist, counter) {
> +		if (perf_evsel__open(counter, args->evlist->cpus,
> +				     args->evlist->threads) < 0)
			goto out_delete_evlist;
> +	}
> +
> +	if (perf_evlist__mmap(args->evlist, UINT_MAX))
		goto out_delete_evlist;
> +
> +	evlist__for_each_entry(args->evlist, counter) {
> +		if (perf_evsel__enable(counter))
			goto out_delete_evlist;
> +	}
> +
> +	if (pthread_create(&poll_thread, NULL, bpf_poll_thread, args))
		goto out_delete_evlist;	
> +
> +	return 0;
out_delete_evlist:
	perf_evlist__delete(args->evlist);
	args->evlist = NULL;

	return -1;
> +}
> +
> +void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args)
> +{
> +	pthread_join(poll_thread, NULL);
> +	perf_evlist__exit(args->evlist);
> +}
> diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
> index c4f0f1395ea5..61914827c1e3 100644
> --- a/tools/perf/util/bpf-event.h
> +++ b/tools/perf/util/bpf-event.h
> @@ -12,12 +12,17 @@
>  #include <bpf/libbpf.h>
>  #include <linux/btf.h>
>  #include <linux/rbtree.h>
> +#include <pthread.h>
> +#include <api/fd/array.h>
>  #include "event.h"
>  
>  struct machine;
>  union perf_event;
> +struct perf_env;
>  struct perf_sample;
>  struct record_opts;
> +struct evlist;
> +struct target;
>  
>  struct bpf_prog_info_node {
>  	struct bpf_prog_info_linear	*info_linear;
> @@ -31,6 +36,13 @@ struct btf_node {
>  	char		data[];
>  };
>  
> +struct bpf_event_poll_args {
> +	struct perf_env		*env;
> +	struct perf_evlist	*evlist;
> +	struct target		*target;
> +	volatile int		*done;
> +};
> +
>  #ifdef HAVE_LIBBPF_SUPPORT
>  int machine__process_bpf_event(struct machine *machine, union perf_event *event,
>  			       struct perf_sample *sample);
> @@ -39,6 +51,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
>  				      perf_event__handler_t process,
>  				      struct machine *machine,
>  				      struct record_opts *opts);
> +int bpf_event__start_polling_thread(struct bpf_event_poll_args *args);
> +void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args);
>  #else
>  static inline int machine__process_bpf_event(struct machine *machine __maybe_unused,
>  					     union perf_event *event __maybe_unused,
> @@ -54,5 +68,13 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session
>  {
>  	return 0;
>  }
> +
> +static inline int bpf_event__start_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
> +{
> +	return 0;
> +}
> +void bpf_event__stop_polling_thread(struct bpf_event_poll_args *args __maybe_unused)
> +{
> +}
>  #endif // HAVE_LIBBPF_SUPPORT
>  #endif
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 8c902276d4b4..612c079579ce 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -271,6 +271,26 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist)
>  	return 0;
>  }
>  
> +int perf_evlist__add_bpf_tracker(struct perf_evlist *evlist)
> +{
> +	struct perf_event_attr attr = {
> +		.type	          = PERF_TYPE_SOFTWARE,
> +		.config           = PERF_COUNT_SW_DUMMY,
> +		.watermark        = 1,
> +		.bpf_event        = 1,
> +		.wakeup_watermark = 1,
> +		.size	   = sizeof(attr), /* to capture ABI version */
> +	};
> +	struct perf_evsel *evsel = perf_evsel__new_idx(&attr,
> +						       evlist->nr_entries);
> +
> +	if (evsel == NULL)
> +		return -ENOMEM;
> +
> +	perf_evlist__add(evlist, evsel);

You could use:

	struct perf_evlist *evlist = perf_evlist__new_dummy();
	if (evlist != NULL) {
		struct perf_evsel *evsel == perf_evlist__first(evlist);
		evsel->attr.bpf_event = evsel->attr.watermark = evsel->attr.wakeup_watermark = 1;
		return 0;
	}
	return -1;

Because in this case all you'll have in this evlist is the bpf tracker,
right? The add_bpf_tracker would be handy if we would want to have a
pre-existing evlist with some other events and wanted to add a bpf
tracker, no?

- Arnaldo

> +	return 0;
> +}
> +
>  static int perf_evlist__add_attrs(struct perf_evlist *evlist,
>  				  struct perf_event_attr *attrs, size_t nr_attrs)
>  {
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index 868294491194..a2d22715188e 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -84,6 +84,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
>  
>  int perf_evlist__add_dummy(struct perf_evlist *evlist);
>  
> +int perf_evlist__add_bpf_tracker(struct perf_evlist *evlist);
> +
>  int perf_evlist__add_newtp(struct perf_evlist *evlist,
>  			   const char *sys, const char *name, void *handler);
>  
> -- 
> 2.17.1

^ permalink raw reply

* RE: [PATCH net-next v2 3/3] enetc: Add ENETC PF level external MDIO support
From: Claudiu Manoil @ 2019-02-15 14:41 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Shawn Guo, Leo Li, David S . Miller, Alexandru Marginean,
	linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <20190215133459.GH5699@lunn.ch>



>-----Original Message-----
>From: Andrew Lunn <andrew@lunn.ch>
>Sent: Friday, February 15, 2019 3:35 PM
>To: Claudiu Manoil <claudiu.manoil@nxp.com>
>Cc: Shawn Guo <shawnguo@kernel.org>; Leo Li <leoyang.li@nxp.com>; David S .
>Miller <davem@davemloft.net>; Alexandru Marginean
><alexandru.marginean@nxp.com>; linux-arm-kernel@lists.infradead.org;
>devicetree@vger.kernel.org; netdev@vger.kernel.org; linux-
>kernel@vger.kernel.org
>Subject: Re: [PATCH net-next v2 3/3] enetc: Add ENETC PF level external MDIO
>support
>
>On Fri, Feb 15, 2019 at 12:10:14PM +0200, Claudiu Manoil wrote:
>> Each ENETC PF has its own MDIO interface, the corresponding
>> MDIO registers are mapped in the ENETC's Port register block.
>> The current patch adds a driver for these PF level MDIO buses,
>> so that each PF can manage directly its own external link.
>>
>> Signed-off-by: Alex Marginean <alexandru.marginean@nxp.com>
>> Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
>> ---
>> v2 - used readx_poll_timeout()
>>    - added mdio node child to the port node
>
>Hi Claudiu
>
>Please document this in the device tree binding.
>

Hi,
I don't really see what bindings would need to be documented, as the node
names and properties used so far are common.  Like, the "mdio" node name
is common, the enetc ports are "pci" nodes, nothing special for these.
What's missing?

Regards,
Claudiu

^ permalink raw reply

* [net-next 0/5] clean up SOCK_DEBUG()
From: Yafang Shao @ 2019-02-15 14:49 UTC (permalink / raw)
  To: davem; +Cc: daniel, edumazet, netdev, shaoyafang, Yafang Shao

Per discussion with Daniel[1] and Eric[2], the SOCK_DEBUG() is a very
ancient interface, which is not very useful for debugging.
So we'd better clean up it.

After this patchset, the SO_DEBUG interface will not take any effect,
but I still keep it as-is in sock_{s,g}etsockopt() to avoid breaking
applications.

[1] https://patchwork.ozlabs.org/patch/1035573/
[2] https://patchwork.ozlabs.org/patch/1040533

Yafang Shao (5):
  tcp: clean up SOCK_DEBUG()
  x25: clean up SOCK_DEBUG()
  appletalk: clean up SOCK_DEBUG()
  dccp: clean up SOCK_DEBUG()
  net: sock: remove the definition of SOCK_DEBUG()

 include/net/sock.h       | 19 -------------------
 net/appletalk/ddp.c      | 14 --------------
 net/core/sock.c          |  3 +++
 net/dccp/ipv6.c          |  2 --
 net/ipv4/tcp_input.c     | 19 +------------------
 net/ipv6/tcp_ipv6.c      |  2 --
 net/x25/af_x25.c         | 12 ------------
 net/x25/x25_facilities.c | 32 ++++++++++----------------------
 net/x25/x25_out.c        |  4 +---
 9 files changed, 15 insertions(+), 92 deletions(-)

-- 
1.8.3.1


^ permalink raw reply

* [net-next 1/5] tcp: clean up SOCK_DEBUG()
From: Yafang Shao @ 2019-02-15 14:49 UTC (permalink / raw)
  To: davem; +Cc: daniel, edumazet, netdev, shaoyafang, Yafang Shao
In-Reply-To: <1550242187-29660-1-git-send-email-laoar.shao@gmail.com>

Per discussion with Daniel[1] and Eric[2], the SOCK_DEBUG() is a very
ancient interface, which is not very useful for debugging.
So we'd better clean up it.

This patch cleans up it for TCP.

[1] https://patchwork.ozlabs.org/patch/1035573/
[2] https://patchwork.ozlabs.org/patch/1040533/

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 net/ipv4/tcp_input.c | 19 +------------------
 net/ipv6/tcp_ipv6.c  |  2 --
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7a027dec..6d2750e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3595,7 +3595,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	 * this segment (RFC793 Section 3.9).
 	 */
 	if (after(ack, tp->snd_nxt))
-		goto invalid_ack;
+		return -1;
 
 	if (after(ack, prior_snd_una)) {
 		flag |= FLAG_SND_UNA_ADVANCED;
@@ -3714,10 +3714,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		tcp_process_tlp_ack(sk, ack, flag);
 	return 1;
 
-invalid_ack:
-	SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
-	return -1;
-
 old_ack:
 	/* If data was SACKed, tag it and see if we should send more data.
 	 * If data was DSACKed, see if we can undo a cwnd reduction.
@@ -3731,7 +3727,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		tcp_xmit_recovery(sk, rexmit);
 	}
 
-	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
 	return 0;
 }
 
@@ -4432,13 +4427,9 @@ static void tcp_ofo_queue(struct sock *sk)
 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
-			SOCK_DEBUG(sk, "ofo packet was already received\n");
 			tcp_drop(sk, skb);
 			continue;
 		}
-		SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
-			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
-			   TCP_SKB_CB(skb)->end_seq);
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
@@ -4502,8 +4493,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
 	seq = TCP_SKB_CB(skb)->seq;
 	end_seq = TCP_SKB_CB(skb)->end_seq;
-	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-		   tp->rcv_nxt, seq, end_seq);
 
 	p = &tp->out_of_order_queue.rb_node;
 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
@@ -4779,10 +4768,6 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
 		/* Partial packet, seq < rcv_next < end_seq */
-		SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
-			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
-			   TCP_SKB_CB(skb)->end_seq);
-
 		tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
 
 		/* If window is closed, drop tail of packet. But after
@@ -5061,8 +5046,6 @@ static int tcp_prune_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
-
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e51cda7..57ef69a1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -220,8 +220,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
-		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
 		if (__ipv6_only_sock(sk))
 			return -ENETUNREACH;
 
-- 
1.8.3.1


^ permalink raw reply related

* [net-next 2/5] x25: clean up SOCK_DEBUG()
From: Yafang Shao @ 2019-02-15 14:49 UTC (permalink / raw)
  To: davem; +Cc: daniel, edumazet, netdev, shaoyafang, Yafang Shao
In-Reply-To: <1550242187-29660-1-git-send-email-laoar.shao@gmail.com>

Per discussion with Daniel[1] and Eric[2], the SOCK_DEBUG() is a very
ancient interface, which is not very useful for debugging.
So we'd better clean up it.

This patch cleans up it for x25.

[1] https://patchwork.ozlabs.org/patch/1035573/
[2] https://patchwork.ozlabs.org/patch/1040533/

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 net/x25/af_x25.c         | 12 ------------
 net/x25/x25_facilities.c | 32 ++++++++++----------------------
 net/x25/x25_out.c        |  4 +---
 3 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5121729..5892d05 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -705,7 +705,6 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	x25_insert_socket(sk);
 	sock_reset_flag(sk, SOCK_ZAPPED);
 	release_sock(sk);
-	SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
 out:
 	return rc;
 }
@@ -1148,11 +1147,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		goto out;
 	}
 
-	SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n");
-
 	/* Build a packet */
-	SOCK_DEBUG(sk, "x25_sendmsg: sendto: building packet.\n");
-
 	if ((msg->msg_flags & MSG_OOB) && len > 32)
 		len = 32;
 
@@ -1170,8 +1165,6 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	/*
 	 *	Put the data on the end
 	 */
-	SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
-
 	skb_reset_transport_header(skb);
 	skb_put(skb, len);
 
@@ -1194,8 +1187,6 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	/*
 	 *	Push down the X.25 header
 	 */
-	SOCK_DEBUG(sk, "x25_sendmsg: Building X.25 Header.\n");
-
 	if (msg->msg_flags & MSG_OOB) {
 		if (x25->neighbour->extended) {
 			asmptr    = skb_push(skb, X25_STD_MIN_LEN);
@@ -1228,9 +1219,6 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 			skb->data[0] |= X25_Q_BIT;
 	}
 
-	SOCK_DEBUG(sk, "x25_sendmsg: Built header.\n");
-	SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n");
-
 	rc = -ENOTCONN;
 	if (sk->sk_state != TCP_ESTABLISHED)
 		goto out_kfree_skb;
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index ad1734d..74a5284 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -286,10 +286,8 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
 	/*
 	 *	They want reverse charging, we won't accept it.
 	 */
-	if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) {
-		SOCK_DEBUG(sk, "X.25: rejecting reverse charging request\n");
+	if ((theirs.reverse & 0x01) && (ours->reverse & 0x01))
 		return -1;
-	}
 
 	new->reverse = theirs.reverse;
 
@@ -298,37 +296,27 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
 		int theirs_out = theirs.throughput & 0xf0;
 		int ours_in  = ours->throughput & 0x0f;
 		int ours_out = ours->throughput & 0xf0;
-		if (!ours_in || theirs_in < ours_in) {
-			SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n");
+		if (!ours_in || theirs_in < ours_in)
 			new->throughput = (new->throughput & 0xf0) | theirs_in;
-		}
-		if (!ours_out || theirs_out < ours_out) {
-			SOCK_DEBUG(sk,
-				"X.25: outbound throughput negotiated\n");
+
+		if (!ours_out || theirs_out < ours_out)
 			new->throughput = (new->throughput & 0x0f) | theirs_out;
-		}
 	}
 
 	if (theirs.pacsize_in && theirs.pacsize_out) {
-		if (theirs.pacsize_in < ours->pacsize_in) {
-			SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down\n");
+		if (theirs.pacsize_in < ours->pacsize_in)
 			new->pacsize_in = theirs.pacsize_in;
-		}
-		if (theirs.pacsize_out < ours->pacsize_out) {
-			SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down\n");
+
+		if (theirs.pacsize_out < ours->pacsize_out)
 			new->pacsize_out = theirs.pacsize_out;
-		}
 	}
 
 	if (theirs.winsize_in && theirs.winsize_out) {
-		if (theirs.winsize_in < ours->winsize_in) {
-			SOCK_DEBUG(sk, "X.25: window size inwards negotiated down\n");
+		if (theirs.winsize_in < ours->winsize_in)
 			new->winsize_in = theirs.winsize_in;
-		}
-		if (theirs.winsize_out < ours->winsize_out) {
-			SOCK_DEBUG(sk, "X.25: window size outwards negotiated down\n");
+
+		if (theirs.winsize_out < ours->winsize_out)
 			new->winsize_out = theirs.winsize_out;
-		}
 	}
 
 	return len;
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 0144271..2a74f26 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -77,9 +77,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
 					kfree_skb(skb);
 					return sent;
 				}
-				SOCK_DEBUG(sk, "x25_output: fragment alloc"
-					       " failed, err=%d, %d bytes "
-					       "sent\n", err, sent);
+
 				return err;
 			}
 
-- 
1.8.3.1


^ permalink raw reply related

* [net-next 3/5] appletalk: clean up SOCK_DEBUG()
From: Yafang Shao @ 2019-02-15 14:49 UTC (permalink / raw)
  To: davem; +Cc: daniel, edumazet, netdev, shaoyafang, Yafang Shao
In-Reply-To: <1550242187-29660-1-git-send-email-laoar.shao@gmail.com>

Per discussion with Daniel[1] and Eric[2], the SOCK_DEBUG() is a very
ancient interface, which is not very useful for debugging.
So we'd better clean up it.

This patch cleans up it for appletalk.

[1] https://patchwork.ozlabs.org/patch/1035573/
[2] https://patchwork.ozlabs.org/patch/1040533/

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 net/appletalk/ddp.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5a..326c4fd 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1608,8 +1608,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	/* Build a packet */
-	SOCK_DEBUG(sk, "SK %p: Got address.\n", sk);
-
 	/* For headers */
 	size = sizeof(struct ddpehdr) + len + ddp_dl->header_length;
 
@@ -1628,10 +1626,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		goto out;
 
 	dev = rt->dev;
-
-	SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
-			sk, size, dev->name);
-
 	size += dev->hard_header_len;
 	release_sock(sk);
 	skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
@@ -1643,8 +1637,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	skb_reserve(skb, dev->hard_header_len);
 	skb->dev = dev;
 
-	SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
-
 	ddp = skb_put(skb, sizeof(struct ddpehdr));
 	ddp->deh_len_hops  = htons(len + sizeof(*ddp));
 	ddp->deh_dnet  = usat->sat_addr.s_net;
@@ -1654,8 +1646,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	ddp->deh_dport = usat->sat_port;
 	ddp->deh_sport = at->src_port;
 
-	SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len);
-
 	err = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (err) {
 		kfree_skb(skb);
@@ -1678,7 +1668,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 		if (skb2) {
 			loopback = 1;
-			SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk);
 			/*
 			 * If it fails it is queued/sent above in the aarp queue
 			 */
@@ -1687,7 +1676,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	if (dev->flags & IFF_LOOPBACK || loopback) {
-		SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk);
 		/* loop back */
 		skb_orphan(skb);
 		if (ddp->deh_dnode == ATADDR_BCAST) {
@@ -1707,7 +1695,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		}
 		ddp_dl->request(ddp_dl, skb, dev->dev_addr);
 	} else {
-		SOCK_DEBUG(sk, "SK %p: send out.\n", sk);
 		if (rt->flags & RTF_GATEWAY) {
 		    gsat.sat_addr = rt->gateway;
 		    usat = &gsat;
@@ -1718,7 +1705,6 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		 */
 		aarp_send_ddp(dev, skb, &usat->sat_addr, NULL);
 	}
-	SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len);
 
 out:
 	release_sock(sk);
-- 
1.8.3.1


^ permalink raw reply related

* [net-next 4/5] dccp: clean up SOCK_DEBUG()
From: Yafang Shao @ 2019-02-15 14:49 UTC (permalink / raw)
  To: davem; +Cc: daniel, edumazet, netdev, shaoyafang, Yafang Shao
In-Reply-To: <1550242187-29660-1-git-send-email-laoar.shao@gmail.com>

Per discussion with Daniel[1] and Eric[2], the SOCK_DEBUG() is a very
ancient interface, which is not very useful for debugging.
So we'd better clean up it.

This patch cleans up it for dccp.

[1] https://patchwork.ozlabs.org/patch/1035573/
[2] https://patchwork.ozlabs.org/patch/1040533/

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 net/dccp/ipv6.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index d5740ba..8e72e50 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -878,8 +878,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
-		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
 		if (__ipv6_only_sock(sk))
 			return -ENETUNREACH;
 
-- 
1.8.3.1


^ permalink raw reply related

* [net-next 5/5] net: sock: remove the definition of SOCK_DEBUG()
From: Yafang Shao @ 2019-02-15 14:49 UTC (permalink / raw)
  To: davem; +Cc: daniel, edumazet, netdev, shaoyafang, Yafang Shao
In-Reply-To: <1550242187-29660-1-git-send-email-laoar.shao@gmail.com>

As SOCK_DEBUG() isn't used any more, we can get ride of it now.

Regarding the SO_DEBUG in sock_{s,g}etsockopt, I think it is better to
keep as-is, because if we return an errno to tell the application that
this optname isn't supported, it may break the application.
The application still can use this option but don't take any effect from
now on.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/net/sock.h | 19 -------------------
 net/core/sock.c    |  3 +++
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 6679f3c..4c6b599 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -74,25 +74,6 @@
 #include <net/smc.h>
 #include <net/l3mdev.h>
 
-/*
- * This structure really needs to be cleaned up.
- * Most of it is for TCP, and not used by any of
- * the other protocols.
- */
-
-/* Define this to get the SOCK_DBG debugging facility. */
-#define SOCK_DEBUGGING
-#ifdef SOCK_DEBUGGING
-#define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
-					printk(KERN_DEBUG msg); } while (0)
-#else
-/* Validate arguments and do nothing */
-static inline __printf(2, 3)
-void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
-{
-}
-#endif
-
 /* This is the per-socket lock.  The spinlock provides a synchronization
  * between user contexts and software interrupt processing, whereas the
  * mini-semaphore synchronizes multiple users amongst themselves.
diff --git a/net/core/sock.c b/net/core/sock.c
index 71ded4d..a980ff3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -753,6 +753,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 
 	switch (optname) {
 	case SO_DEBUG:
+		/* This option doesn't take effect now,
+		 * but we need to keep it for backward compatibility.
+		 */
 		if (val && !capable(CAP_NET_ADMIN))
 			ret = -EACCES;
 		else
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH net-next v2 3/3] enetc: Add ENETC PF level external MDIO support
From: Andrew Lunn @ 2019-02-15 14:56 UTC (permalink / raw)
  To: Claudiu Manoil
  Cc: Shawn Guo, Leo Li, David S . Miller, Alexandru Marginean,
	linux-arm-kernel@lists.infradead.org, devicetree@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <VI1PR04MB488080895C871B6B3ABA571D96600@VI1PR04MB4880.eurprd04.prod.outlook.com>

> Hi,
> I don't really see what bindings would need to be documented, as the node
> names and properties used so far are common.  Like, the "mdio" node name
> is common, the enetc ports are "pci" nodes, nothing special for these.
> What's missing?

Hi Claudiu

It is not well defined where to put phy nodes. Some drivers do allow
them directly in the MAC node, some drivers optionally allow an mdio
sub-node, others enforce an mdio sub-node. The name of the sub-node is
also not well defined.

So just documenting that the mdio sub-node is mandatory will help
people writing device trees for their boards.

       Andrew



^ permalink raw reply

* Re: [net-next 5/5] net: sock: remove the definition of SOCK_DEBUG()
From: Eric Dumazet @ 2019-02-15 14:58 UTC (permalink / raw)
  To: Yafang Shao; +Cc: David Miller, Daniel Borkmann, netdev, shaoyafang
In-Reply-To: <1550242187-29660-6-git-send-email-laoar.shao@gmail.com>

On Fri, Feb 15, 2019 at 6:50 AM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> As SOCK_DEBUG() isn't used any more, we can get ride of it now.
>

No, we are still using this infrastructure from time to time.

I told you I agreed to remove the current (obsolete) TCP call sites,
 I never suggested to remove SOCK_DEBUG() completely.

^ permalink raw reply

* [RFC] net: dsa: qca8k: implement rgmii-id mode
From: Michal Vokáč @ 2019-02-15 15:01 UTC (permalink / raw)
  To: Vinod Koul, Andrew Lunn
  Cc: David S. Miller, netdev, linux-kernel@vger.kernel.org,
	Florian Fainelli

Hi,

networking on my boards [1], which are currently in linux-next, suddently
stopped working. I tracked it down to this commit 5ecdd77c61c8 ("net: dsa:
qca8k: disable delay for RGMII mode") [2].

So I think the rgmii-id mode is obviously needed in my case.
I was able to find a couple drivers that read tx/rx-delay or
tx/rx-internal-delay from device tree. Namely:

   drivers/net/ethernet/apm/xgene/xgene_enet_main.c
   drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
   drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
   drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
   drivers/net/phy/dp83867.c

I would appreciate any hints how to add similar function to qca8k driver
if that is the correct way to go. Can I take some of the above mentioned
drivers as a good example for that? How should the binding look like?

I would expect something like this:

	switch@0 {
		compatible = "qca,qca8334";
		reg = <0>;

		switch_ports: ports {
			#address-cells = <1>;
			#size-cells = <0>;

			ethphy0: port@0 {
				reg = <0>;
				label = "cpu";
				phy-mode = "rgmii-id";
				qca,tx-delay = <3>;
				qca,rx-delay = <3>;
				ethernet = <&fec>;
		};
	};


Thanks in advance,
Michal

[1] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=87489ec3a77f3e01bcf0d46e353ae7112ec8c4f0
[2] https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/drivers/net/dsa/qca8k.c?id=5ecdd77c61c8fe1d75ded538701e5e854963c890

^ permalink raw reply

* [PATCH][next] mlxsw: core: fix spelling mistake "temprature" -> "temperature"
From: Colin King @ 2019-02-15 15:11 UTC (permalink / raw)
  To: Jiri Pirko, Ido Schimmel, David S . Miller, netdev
  Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

There is a spelling mistake in several dev_err messages, fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index f1ada4cdbd6b..6956bbebe2f1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -208,7 +208,7 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
 			    1);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
 	if (err) {
-		dev_err(dev, "Failed to query module temprature sensor\n");
+		dev_err(dev, "Failed to query module temperature sensor\n");
 		return err;
 	}
 
@@ -251,7 +251,7 @@ static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev,
 			    1);
 	err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
 	if (err) {
-		dev_err(dev, "Failed to query module temprature sensor\n");
+		dev_err(dev, "Failed to query module temperature sensor\n");
 		return err;
 	}
 
@@ -291,7 +291,7 @@ mlxsw_hwmon_module_temp_critical_show(struct device *dev,
 	err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
 						   SFP_TEMP_HIGH_WARN, &temp);
 	if (err) {
-		dev_err(dev, "Failed to query module temprature thresholds\n");
+		dev_err(dev, "Failed to query module temperature thresholds\n");
 		return err;
 	}
 
@@ -314,7 +314,7 @@ mlxsw_hwmon_module_temp_emergency_show(struct device *dev,
 	err = mlxsw_env_module_temp_thresholds_get(mlxsw_hwmon->core, module,
 						   SFP_TEMP_HIGH_ALARM, &temp);
 	if (err) {
-		dev_err(dev, "Failed to query module temprature thresholds\n");
+		dev_err(dev, "Failed to query module temperature thresholds\n");
 		return err;
 	}
 
-- 
2.20.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox