Netdev List
 help / color / mirror / Atom feed
* [PATCH v3,net-next 1/2] ip_gre: fix error path when erspan_rcv failed
From: Haishuang Yan @ 2017-12-20  2:21 UTC (permalink / raw)
  To: David S. Miller, Alexey Kuznetsov, Hideaki YOSHIFUJI
  Cc: netdev, linux-kernel, Haishuang Yan, William Tu
In-Reply-To: <1513736507-22968-1-git-send-email-yanhaishuang@cmss.chinamobile.com>

When erspan_rcv call return PACKET_REJECT, we shoudn't call ipgre_rcv to
process packets again, instead send icmp unreachable message in error
path.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Acked-by: William Tu <u9012063@gmail.com>
Cc: William Tu <u9012063@gmail.com>
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>

---
Change since v3:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 3029e3e..90c9123 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -436,11 +436,13 @@ static int gre_rcv(struct sk_buff *skb)
 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
 		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 			return 0;
+		goto out;
 	}
 
 	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 		return 0;
 
+out:
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 drop:
 	kfree_skb(skb);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v3,net-next 2/2] ip6_gre: fix error path when ip6erspan_rcv failed
From: Haishuang Yan @ 2017-12-20  2:21 UTC (permalink / raw)
  To: David S. Miller, Alexey Kuznetsov, Hideaki YOSHIFUJI
  Cc: netdev, linux-kernel, Haishuang Yan, William Tu
In-Reply-To: <1513736507-22968-1-git-send-email-yanhaishuang@cmss.chinamobile.com>

Same as ipv4 code, when ip6erspan_rcv call return PACKET_REJECT, we
should call icmpv6_send to send icmp unreachable message in error path.

Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
Acked-by: William Tu <u9012063@gmail.com>
Cc: William Tu <u9012063@gmail.com>
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>

---
Change since v2:
  * Rebase on latest master branch.
  * Fix wrong commit information.
---
 net/ipv6/ip6_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 45038a9..8451d00 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -604,12 +604,13 @@ static int gre_rcv(struct sk_buff *skb)
 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
 		if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
 			return 0;
-		goto drop;
+		goto out;
 	}
 
 	if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
 		return 0;
 
+out:
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 drop:
 	kfree_skb(skb);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net-next v2] netdevsim: correctly check return value of debugfs_create_dir
From: Prashant Bhole @ 2017-12-20  2:27 UTC (permalink / raw)
  To: David S . Miller; +Cc: Prashant Bhole, netdev, Jakub Kicinski

- Checking return value with IS_ERROR_OR_NULL
- Added error handling where it was not handled

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
 drivers/net/netdevsim/bpf.c    |  8 ++++----
 drivers/net/netdevsim/netdev.c | 12 ++++++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 078d2c37a6c1..aeb429428cc5 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -201,7 +201,6 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 {
 	struct nsim_bpf_bound_prog *state;
 	char name[16];
-	int err;
 
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (!state)
@@ -214,10 +213,9 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 	/* Program id is not populated yet when we create the state. */
 	sprintf(name, "%u", ns->prog_id_gen++);
 	state->ddir = debugfs_create_dir(name, ns->ddir_bpf_bound_progs);
-	if (IS_ERR(state->ddir)) {
-		err = PTR_ERR(state->ddir);
+	if (IS_ERR_OR_NULL(state->ddir)) {
 		kfree(state);
-		return err;
+		return -ENOMEM;
 	}
 
 	debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
@@ -349,6 +347,8 @@ int nsim_bpf_init(struct netdevsim *ns)
 			   &ns->bpf_bind_verifier_delay);
 	ns->ddir_bpf_bound_progs =
 		debugfs_create_dir("bpf_bound_progs", ns->ddir);
+	if (IS_ERR_OR_NULL(ns->ddir_bpf_bound_progs))
+		return -ENOMEM;
 
 	ns->bpf_tc_accept = true;
 	debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index eb8c679fca9f..c2a02d1944b8 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -147,10 +147,12 @@ struct device_type nsim_dev_type = {
 static int nsim_init(struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
-	int err;
+	int err = -ENOMEM;
 
 	ns->netdev = dev;
 	ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
+	if (IS_ERR_OR_NULL(ns->ddir))
+		goto err;
 
 	err = nsim_bpf_init(ns);
 	if (err)
@@ -171,6 +173,7 @@ static int nsim_init(struct net_device *dev)
 	nsim_bpf_uninit(ns);
 err_debugfs_destroy:
 	debugfs_remove_recursive(ns->ddir);
+err:
 	return err;
 }
 
@@ -466,11 +469,11 @@ struct dentry *nsim_ddir;
 
 static int __init nsim_module_init(void)
 {
-	int err;
+	int err = -ENOMEM;
 
 	nsim_ddir = debugfs_create_dir(DRV_NAME, NULL);
-	if (IS_ERR(nsim_ddir))
-		return PTR_ERR(nsim_ddir);
+	if (IS_ERR_OR_NULL(nsim_ddir))
+		goto err;
 
 	err = bus_register(&nsim_bus);
 	if (err)
@@ -486,6 +489,7 @@ static int __init nsim_module_init(void)
 	bus_unregister(&nsim_bus);
 err_debugfs_destroy:
 	debugfs_remove_recursive(nsim_ddir);
+err:
 	return err;
 }
 
-- 
2.13.6

^ permalink raw reply related

* Re: [PATCH -tip v3 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events
From: Masami Hiramatsu @ 2017-12-20  2:31 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Ingo Molnar, Stephen Hemminger, Steven Rostedt, Peter Zijlstra,
	Thomas Gleixner, LKML, David S . Miller, netdev
In-Reply-To: <20171219180155.xxkv437fqmwhmhgg@ast-mbp.dhcp.thefacebook.com>

On Tue, 19 Dec 2017 10:01:56 -0800
Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote:

> On Tue, Dec 19, 2017 at 05:56:55PM +0900, Masami Hiramatsu wrote:
> >  include/trace/events/sctp.h |   98 ++++++++++++++
> >  include/trace/events/tcp.h  |   80 +++++++++++
> >  net/Kconfig                 |   17 --
> >  net/dccp/Kconfig            |   17 --
> >  net/dccp/Makefile           |    2 
> >  net/dccp/probe.c            |  203 -----------------------------
> >  net/dccp/proto.c            |    5 +
> >  net/dccp/trace.h            |  105 +++++++++++++++
> >  net/ipv4/Makefile           |    1 
> >  net/ipv4/tcp_input.c        |    3 
> >  net/ipv4/tcp_probe.c        |  301 -------------------------------------------
> >  net/sctp/Kconfig            |   12 --
> >  net/sctp/Makefile           |    3 
> >  net/sctp/probe.c            |  244 -----------------------------------
> >  net/sctp/sm_statefuns.c     |    5 +
> >  15 files changed, 296 insertions(+), 800 deletions(-)
> 
> You need to target net-next tree for this patch set.
> 

Good point! I'll rebased on net-next tree. Anyway, I got an issue
building this on i386. I'll fix it and resend again.

Thank you,

-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH net-next v2] netdevsim: correctly check return value of debugfs_create_dir
From: Jakub Kicinski @ 2017-12-20  2:34 UTC (permalink / raw)
  To: Prashant Bhole; +Cc: David S . Miller, netdev
In-Reply-To: <20171220022715.2356-1-bhole_prashant_q7@lab.ntt.co.jp>

On Wed, 20 Dec 2017 11:27:15 +0900, Prashant Bhole wrote:
> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
> index eb8c679fca9f..c2a02d1944b8 100644
> --- a/drivers/net/netdevsim/netdev.c
> +++ b/drivers/net/netdevsim/netdev.c
> @@ -147,10 +147,12 @@ struct device_type nsim_dev_type = {
>  static int nsim_init(struct net_device *dev)
>  {
>  	struct netdevsim *ns = netdev_priv(dev);
> -	int err;
> +	int err = -ENOMEM;
>  
>  	ns->netdev = dev;
>  	ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
> +	if (IS_ERR_OR_NULL(ns->ddir))
> +		goto err;

nit:
Could you return err; here directly instead of go(ing )to return
and having label and variable of the same name?  Same in
nsim_module_init().

With that feel free to add:

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>

Thanks!

>  	err = nsim_bpf_init(ns);
>  	if (err)
> @@ -171,6 +173,7 @@ static int nsim_init(struct net_device *dev)
>  	nsim_bpf_uninit(ns);
>  err_debugfs_destroy:
>  	debugfs_remove_recursive(ns->ddir);
> +err:
>  	return err;
>  }
>  

^ permalink raw reply

* Re: [PATCH v4 25/36] nds32: Miscellaneous header files
From: Greentime Hu @ 2017-12-20  2:34 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Greentime, Linux Kernel Mailing List, linux-arch, Thomas Gleixner,
	Jason Cooper, Marc Zyngier, Rob Herring, Networking, Vincent Chen,
	DTML, Al Viro, David Howells, Will Deacon, Daniel Lezcano,
	linux-serial-u79uwXL29TY76Z2rM5mHXA, Geert Uytterhoeven,
	Linus Walleij, Mark Rutland, Greg KH, Guo Ren
In-Reply-To: <CAK8P3a3Ofczq1DrQEcEcP1fZrgyeOLpFDwgd7uMZ4H0NpHs+wg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

2017-12-19 17:54 GMT+08:00 Arnd Bergmann <arnd-r2nGTMty4D4@public.gmane.org>:
> On Tue, Dec 19, 2017 at 6:34 AM, Greentime Hu <green.hu-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>> Hi, Arnd:
>>
>> 2017-12-18 19:13 GMT+08:00 Arnd Bergmann <arnd-r2nGTMty4D4@public.gmane.org>:
>>> On Mon, Dec 18, 2017 at 7:46 AM, Greentime Hu <green.hu-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>>>> From: Greentime Hu <greentime-MUIXKm3Oiri1Z/+hSey0Gg@public.gmane.org>
>>>>
>>>> This patch introduces some miscellaneous header files.
>>>
>>>> +static inline void __delay(unsigned long loops)
>>>> +{
>>>> +       __asm__ __volatile__(".align 2\n"
>>>> +                            "1:\n"
>>>> +                            "\taddi\t%0, %0, -1\n"
>>>> +                            "\tbgtz\t%0, 1b\n"
>>>> +                            :"=r"(loops)
>>>> +                            :"0"(loops));
>>>> +}
>>>> +
>>>> +static inline void __udelay(unsigned long usecs, unsigned long lpj)
>>>> +{
>>>> +       usecs *= (unsigned long)(((0x8000000000000000ULL / (500000 / HZ)) +
>>>> +                                 0x80000000ULL) >> 32);
>>>> +       usecs = (unsigned long)(((unsigned long long)usecs * lpj) >> 32);
>>>> +       __delay(usecs);
>>>> +}
>>>
>>> Do you have a reliable clocksource that you can read here instead of doing the
>>> loop? It's generally preferred to have an accurate delay if at all possible, the
>>> delay loop calibration is only for those architectures that don't have any
>>> way to observe how much time has passed accurately.
>>>
>>
>> We currently only have atcpit100 as clocksource but it is an IP of  SoC.
>> These delay API will be unavailable if we changed to another SoC
>> unless all these timer driver provided the same APIs.
>> It may suffer our customers if they forget to port these APIs in their
>> timer drivers when they try to use nds32 in the first beginning.
>
> Ok, thanks for the clarification.
>
>> Or maybe I can use a CONFIG_USE_ACCURATE_DELAY to keep these 2
>> implementions for these purposes?
>
> I'd just add a one-line comment in delay.h to explain that there is no
> cycle counter in the CPU.
>

Thanks.
Got it. I will add a one-line comment in delay.h
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v4 04/36] nds32: Kernel booting and initialization
From: Greentime Hu @ 2017-12-20  2:35 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Greentime, Linux Kernel Mailing List, Arnd Bergmann, linux-arch,
	Thomas Gleixner, Jason Cooper, Marc Zyngier, Rob Herring, netdev,
	Vincent Chen, DTML, Al Viro, David Howells, Will Deacon,
	Daniel Lezcano, linux-serial, Geert Uytterhoeven, Linus Walleij,
	Mark Rutland, Greg KH
In-Reply-To: <78afd442-4482-f104-746e-5984214658ee@infradead.org>

2017-12-20 6:01 GMT+08:00 Randy Dunlap <rdunlap@infradead.org>:
> On 12/17/2017 10:46 PM, Greentime Hu wrote:
>> From: Greentime Hu <greentime@andestech.com>
>>
>> This patch includes the kernel startup code. It can get dtb pointer
>> passed from bootloader. It will create a temp mapping by tlb
>> instructions at beginning and goto start_kernel.
>>
>> Signed-off-by: Vincent Chen <vincentc@andestech.com>
>> Signed-off-by: Greentime Hu <greentime@andestech.com>
>> ---
>>  arch/nds32/kernel/head.S  |  189 ++++++++++++++++++++++
>>  arch/nds32/kernel/setup.c |  383 +++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 572 insertions(+)
>>  create mode 100644 arch/nds32/kernel/head.S
>>  create mode 100644 arch/nds32/kernel/setup.c
>>
>
>> diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
>> new file mode 100644
>> index 0000000..7718c58
>> --- /dev/null
>> +++ b/arch/nds32/kernel/setup.c
>> @@ -0,0 +1,383 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +// Copyright (C) 2005-2017 Andes Technology Corporation
>> +
>
> [snip]
>
>> +struct cache_info L1_cache_info[2];
>> +static void __init dump_cpu_info(int cpu)
>> +{
>> +     int i, p = 0;
>> +     char str[sizeof(hwcap_str) + 16];
>> +
>> +     for (i = 0; hwcap_str[i]; i++) {
>> +             if (elf_hwcap & (1 << i)) {
>> +                     sprintf(str + p, "%s ", hwcap_str[i]);
>> +                     p += strlen(hwcap_str[i]) + 1;
>> +             }
>> +     }
>> +
>> +     pr_info("CPU%d Featuretures: %s\n", cpu, str);
>
>                        Features:
>

Thanks Randy. I will fix this typo.

^ permalink raw reply

* [PATCH v3 net-next 0/5] replace tcp_set_state tracepoint with inet_sock_set_state
From: Yafang Shao @ 2017-12-20  3:12 UTC (permalink / raw)
  To: songliubraving, davem, marcelo.leitner, rostedt
  Cc: bgregg, netdev, linux-kernel, Yafang Shao

According to the discussion in the mail thread
https://patchwork.kernel.org/patch/10099243/,
tcp_set_state tracepoint is renamed to inet_sock_set_state tracepoint and is
moved to include/trace/events/sock.h.

With this new tracepoint, we can trace AF_INET/AF_INET6 sock state transitions.
As there's only one single tracepoint for inet, so I didn't create a new trace
file named trace/events/inet_sock.h, and just place it in
include/trace/events/sock.h

Currently TCP/DCCP/SCTP state transitions are traced with this tracepoint.

- Why not more protocol ?
If we really think that anonter protocol should be traced, I will modify the
code to trace it.
I just want to make the code easy and not output useless information.

Steven Rostedt (VMware) (1):
  tcp: Export to userspace the TCP state names for the trace events

Yafang Shao (4):
  net: tracepoint: replace tcp_set_state tracepoint with
    inet_sock_set_state tracepoint
  net: sock: replace sk_state_load with inet_sk_state_load and remove
     sk_state_store
  net: tracepoint: using sock_set_state tracepoint to trace DCCP state
    transition
  net: tracepoint: using sock_set_state tracepoint to trace SCTP state
    transition

 include/net/inet_sock.h         |  25 ++++++++++
 include/net/sock.h              |  25 ----------
 include/trace/events/sock.h     | 107 ++++++++++++++++++++++++++++++++++++++++
 include/trace/events/tcp.h      |  16 ------
 net/dccp/proto.c                |   2 +-
 net/ipv4/af_inet.c              |  14 ++++++
 net/ipv4/inet_connection_sock.c |   8 +--
 net/ipv4/inet_hashtables.c      |   2 +-
 net/ipv4/tcp.c                  |  10 ++--
 net/ipv4/tcp_diag.c             |   2 +-
 net/ipv4/tcp_ipv4.c             |   2 +-
 net/ipv6/tcp_ipv6.c             |   2 +-
 net/sctp/endpointola.c          |   2 +-
 net/sctp/sm_sideeffect.c        |   4 +-
 net/sctp/socket.c               |  12 ++---
 15 files changed, 167 insertions(+), 66 deletions(-)

--
1.8.3.1

^ permalink raw reply

* [PATCH v3 net-next 1/5] tcp: Export to userspace the TCP state names for the trace events
From: Yafang Shao @ 2017-12-20  3:12 UTC (permalink / raw)
  To: songliubraving, davem, marcelo.leitner, rostedt
  Cc: bgregg, netdev, linux-kernel, Yafang Shao
In-Reply-To: <1513739574-3345-1-git-send-email-laoar.shao@gmail.com>

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

The TCP trace events (specifically tcp_set_state), maps emums to symbol
names via __print_symbolic(). But this only works for reading trace events
from the tracefs trace files. If perf or trace-cmd were to record these
events, the event format file does not convert the enum names into numbers,
and you get something like:

__print_symbolic(REC->oldstate,
    { TCP_ESTABLISHED, "TCP_ESTABLISHED" },
    { TCP_SYN_SENT, "TCP_SYN_SENT" },
    { TCP_SYN_RECV, "TCP_SYN_RECV" },
    { TCP_FIN_WAIT1, "TCP_FIN_WAIT1" },
    { TCP_FIN_WAIT2, "TCP_FIN_WAIT2" },
    { TCP_TIME_WAIT, "TCP_TIME_WAIT" },
    { TCP_CLOSE, "TCP_CLOSE" },
    { TCP_CLOSE_WAIT, "TCP_CLOSE_WAIT" },
    { TCP_LAST_ACK, "TCP_LAST_ACK" },
    { TCP_LISTEN, "TCP_LISTEN" },
    { TCP_CLOSING, "TCP_CLOSING" },
    { TCP_NEW_SYN_RECV, "TCP_NEW_SYN_RECV" })

Where trace-cmd and perf do not know the values of those enums.

Use the TRACE_DEFINE_ENUM() macros that will have the trace events convert
the enum strings into their values at system boot. This will allow perf and
trace-cmd to see actual numbers and not enums:

__print_symbolic(REC->oldstate,
    { 1, "TCP_ESTABLISHED" },
    { 2, "TCP_SYN_SENT" },
    { 3, "TCP_SYN_RECV" },
    { 4, "TCP_FIN_WAIT1" },
    { 5, "TCP_FIN_WAIT2" },
    { 6, "TCP_TIME_WAIT" },
    { 7, "TCP_CLOSE" },
    { 8, "TCP_CLOSE_WAIT" },
    { 9, "TCP_LAST_ACK" },
    { 10, "TCP_LISTEN" },
    { 11, "TCP_CLOSING" },
    { 12, "TCP_NEW_SYN_RECV" })

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/trace/events/tcp.h | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 07cccca..ec52fb3 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,21 +9,36 @@
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
 
+#define tcp_state_names         \
+		EM(TCP_ESTABLISHED)     \
+		EM(TCP_SYN_SENT)        \
+		EM(TCP_SYN_RECV)        \
+		EM(TCP_FIN_WAIT1)       \
+		EM(TCP_FIN_WAIT2)       \
+		EM(TCP_TIME_WAIT)       \
+		EM(TCP_CLOSE)           \
+		EM(TCP_CLOSE_WAIT)      \
+		EM(TCP_LAST_ACK)        \
+		EM(TCP_LISTEN)          \
+		EM(TCP_CLOSING)         \
+		EMe(TCP_NEW_SYN_RECV)   \
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a)         TRACE_DEFINE_ENUM(a);
+#define EMe(a)        TRACE_DEFINE_ENUM(a);
+
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a)         tcp_state_name(a),
+#define EMe(a)        tcp_state_name(a)
+
 #define tcp_state_name(state)	{ state, #state }
 #define show_tcp_state_name(val)			\
-	__print_symbolic(val,				\
-		tcp_state_name(TCP_ESTABLISHED),	\
-		tcp_state_name(TCP_SYN_SENT),		\
-		tcp_state_name(TCP_SYN_RECV),		\
-		tcp_state_name(TCP_FIN_WAIT1),		\
-		tcp_state_name(TCP_FIN_WAIT2),		\
-		tcp_state_name(TCP_TIME_WAIT),		\
-		tcp_state_name(TCP_CLOSE),		\
-		tcp_state_name(TCP_CLOSE_WAIT),		\
-		tcp_state_name(TCP_LAST_ACK),		\
-		tcp_state_name(TCP_LISTEN),		\
-		tcp_state_name(TCP_CLOSING),		\
-		tcp_state_name(TCP_NEW_SYN_RECV))
+	__print_symbolic(val, tcp_state_names)
 
 /*
  * tcp event with arguments sk and skb
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint
From: Yafang Shao @ 2017-12-20  3:12 UTC (permalink / raw)
  To: songliubraving, davem, marcelo.leitner, rostedt
  Cc: bgregg, netdev, linux-kernel, Yafang Shao
In-Reply-To: <1513739574-3345-1-git-send-email-laoar.shao@gmail.com>

As sk_state is a common field for struct sock, so the state
transition tracepoint should not be a TCP specific feature.
Currently it traces all AF_INET state transition, so I rename this
tracepoint to inet_sock_set_state tracepoint with some minor changes and move it
into trace/events/sock.h.
We dont need to create a file named trace/events/inet_sock.h for this one single
tracepoint.

Two helpers are introduced to trace sk_state transition
    - void inet_sk_state_store(struct sock *sk, int newstate);
    - void inet_sk_set_state(struct sock *sk, int state);
As trace header should not be included in other header files,
so they are defined in sock.c.

The protocol such as SCTP maybe compiled as a ko, hence export
inet_sk_set_state().

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/net/inet_sock.h         |   2 +
 include/trace/events/sock.h     | 107 ++++++++++++++++++++++++++++++++++++++++
 include/trace/events/tcp.h      |  31 ------------
 net/ipv4/af_inet.c              |  14 ++++++
 net/ipv4/inet_connection_sock.c |   6 +--
 net/ipv4/inet_hashtables.c      |   2 +-
 net/ipv4/tcp.c                  |   6 +--
 7 files changed, 128 insertions(+), 40 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 39efb96..a3431a4 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -290,6 +290,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 #endif
 
 int inet_sk_rebuild_header(struct sock *sk);
+void inet_sk_set_state(struct sock *sk, int state);
+void inet_sk_state_store(struct sock *sk, int newstate);
 
 static inline unsigned int __inet_ehashfn(const __be32 laddr,
 					  const __u16 lport,
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index ec4dade..3b9094a 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -6,7 +6,50 @@
 #define _TRACE_SOCK_H
 
 #include <net/sock.h>
+#include <net/ipv6.h>
 #include <linux/tracepoint.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+/* The protocol traced by sock_set_state */
+#define inet_protocol_names		\
+		EM(IPPROTO_TCP)			\
+		EM(IPPROTO_DCCP)		\
+		EMe(IPPROTO_SCTP)
+
+#define tcp_state_names			\
+		EM(TCP_ESTABLISHED)		\
+		EM(TCP_SYN_SENT)		\
+		EM(TCP_SYN_RECV)		\
+		EM(TCP_FIN_WAIT1)		\
+		EM(TCP_FIN_WAIT2)		\
+		EM(TCP_TIME_WAIT)		\
+		EM(TCP_CLOSE)			\
+		EM(TCP_CLOSE_WAIT)		\
+		EM(TCP_LAST_ACK)		\
+		EM(TCP_LISTEN)			\
+		EM(TCP_CLOSING)			\
+		EMe(TCP_NEW_SYN_RECV)
+
+/* enums need to be exported to user space */
+#undef EM
+#undef EMe
+#define EM(a)       TRACE_DEFINE_ENUM(a);
+#define EMe(a)      TRACE_DEFINE_ENUM(a);
+
+inet_protocol_names
+tcp_state_names
+
+#undef EM
+#undef EMe
+#define EM(a)       { a, #a },
+#define EMe(a)      { a, #a }
+
+#define show_inet_protocol_name(val)    \
+	__print_symbolic(val, inet_protocol_names)
+
+#define show_tcp_state_name(val)        \
+	__print_symbolic(val, tcp_state_names)
 
 TRACE_EVENT(sock_rcvqueue_full,
 
@@ -63,6 +106,70 @@
 		__entry->rmem_alloc)
 );
 
+TRACE_EVENT(inet_sock_set_state,
+
+	TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
+
+	TP_ARGS(sk, oldstate, newstate),
+
+	TP_STRUCT__entry(
+		__field(const void *, skaddr)
+		__field(int, oldstate)
+		__field(int, newstate)
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u8, protocol)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->skaddr = sk;
+		__entry->oldstate = oldstate;
+		__entry->newstate = newstate;
+
+		__entry->protocol = sk->sk_protocol;
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 =  inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (sk->sk_family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = sk->sk_v6_rcv_saddr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = sk->sk_v6_daddr;
+		} else
+#endif
+		{
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+		}
+	),
+
+	TP_printk("protocol=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4"
+			"saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
+			show_inet_protocol_name(__entry->protocol),
+			__entry->sport, __entry->dport,
+			__entry->saddr, __entry->daddr,
+			__entry->saddr_v6, __entry->daddr_v6,
+			show_tcp_state_name(__entry->oldstate),
+			show_tcp_state_name(__entry->newstate))
+);
+
 #endif /* _TRACE_SOCK_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index ec52fb3..8e88a16 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -9,37 +9,6 @@
 #include <linux/tracepoint.h>
 #include <net/ipv6.h>
 
-#define tcp_state_names         \
-		EM(TCP_ESTABLISHED)     \
-		EM(TCP_SYN_SENT)        \
-		EM(TCP_SYN_RECV)        \
-		EM(TCP_FIN_WAIT1)       \
-		EM(TCP_FIN_WAIT2)       \
-		EM(TCP_TIME_WAIT)       \
-		EM(TCP_CLOSE)           \
-		EM(TCP_CLOSE_WAIT)      \
-		EM(TCP_LAST_ACK)        \
-		EM(TCP_LISTEN)          \
-		EM(TCP_CLOSING)         \
-		EMe(TCP_NEW_SYN_RECV)   \
-
-/* enums need to be exported to user space */
-#undef EM
-#undef EMe
-#define EM(a)         TRACE_DEFINE_ENUM(a);
-#define EMe(a)        TRACE_DEFINE_ENUM(a);
-
-tcp_state_names
-
-#undef EM
-#undef EMe
-#define EM(a)         tcp_state_name(a),
-#define EMe(a)        tcp_state_name(a)
-
-#define tcp_state_name(state)	{ state, #state }
-#define show_tcp_state_name(val)			\
-	__print_symbolic(val, tcp_state_names)
-
 /*
  * tcp event with arguments sk and skb
  *
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f00499a..bab98a4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -121,6 +121,7 @@
 #endif
 #include <net/l3mdev.h>
 
+#include <trace/events/sock.h>
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -1220,6 +1221,19 @@ int inet_sk_rebuild_header(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_sk_rebuild_header);
 
+void inet_sk_set_state(struct sock *sk, int state)
+{
+	trace_inet_sock_set_state(sk, sk->sk_state, state);
+	sk->sk_state = state;
+}
+EXPORT_SYMBOL(inet_sk_set_state);
+
+void inet_sk_state_store(struct sock *sk, int newstate)
+{
+	trace_inet_sock_set_state(sk, sk->sk_state, newstate);
+	smp_store_release(&sk->sk_state, newstate);
+}
+
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features)
 {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4ca46dc..f460fc0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -783,7 +783,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
 	if (newsk) {
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 
-		newsk->sk_state = TCP_SYN_RECV;
+		inet_sk_set_state(newsk, TCP_SYN_RECV);
 		newicsk->icsk_bind_hash = NULL;
 
 		inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
@@ -877,7 +877,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 	 * It is OK, because this socket enters to hash table only
 	 * after validation is complete.
 	 */
-	sk_state_store(sk, TCP_LISTEN);
+	inet_sk_state_store(sk, TCP_LISTEN);
 	if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
 		inet->inet_sport = htons(inet->inet_num);
 
@@ -888,7 +888,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 			return 0;
 	}
 
-	sk->sk_state = TCP_CLOSE;
+	inet_sk_set_state(sk, TCP_CLOSE);
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f5810..37b7da0 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -544,7 +544,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	} else {
 		percpu_counter_inc(sk->sk_prot->orphan_count);
-		sk->sk_state = TCP_CLOSE;
+		inet_sk_set_state(sk, TCP_CLOSE);
 		sock_set_flag(sk, SOCK_DEAD);
 		inet_csk_destroy_sock(sk);
 	}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c470fec..d408fb4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,8 +283,6 @@
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 
-#include <trace/events/tcp.h>
-
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
@@ -2040,8 +2038,6 @@ void tcp_set_state(struct sock *sk, int state)
 {
 	int oldstate = sk->sk_state;
 
-	trace_tcp_set_state(sk, oldstate, state);
-
 	switch (state) {
 	case TCP_ESTABLISHED:
 		if (oldstate != TCP_ESTABLISHED)
@@ -2065,7 +2061,7 @@ void tcp_set_state(struct sock *sk, int state)
 	/* Change state AFTER socket is unhashed to avoid closed
 	 * socket sitting in hash tables.
 	 */
-	sk_state_store(sk, state);
+	inet_sk_state_store(sk, state);
 
 #ifdef STATE_TRACE
 	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v3 net-next 3/5] net: sock: replace sk_state_load with inet_sk_state_load and remove sk_state_store
From: Yafang Shao @ 2017-12-20  3:12 UTC (permalink / raw)
  To: songliubraving, davem, marcelo.leitner, rostedt
  Cc: bgregg, netdev, linux-kernel, Yafang Shao
In-Reply-To: <1513739574-3345-1-git-send-email-laoar.shao@gmail.com>

sk_state_load is only used by AF_INET/AF_INET6, so rename it to
inet_sk_state_load and move it into inet_sock.h.

sk_state_store is removed as it is not used any more.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/net/inet_sock.h         | 25 ++++++++++++++++++++++++-
 include/net/sock.h              | 25 -------------------------
 net/ipv4/inet_connection_sock.c |  2 +-
 net/ipv4/tcp.c                  |  4 ++--
 net/ipv4/tcp_diag.c             |  2 +-
 net/ipv4/tcp_ipv4.c             |  2 +-
 net/ipv6/tcp_ipv6.c             |  2 +-
 7 files changed, 30 insertions(+), 32 deletions(-)

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a3431a4..0a671c3 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -290,9 +290,32 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 #endif
 
 int inet_sk_rebuild_header(struct sock *sk);
-void inet_sk_set_state(struct sock *sk, int state);
+
+/**
+ * inet_sk_state_load - read sk->sk_state for lockless contexts
+ * @sk: socket pointer
+ *
+ * Paired with inet_sk_state_store(). Used in places we don't hold socket lock:
+ * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
+ */
+static inline int inet_sk_state_load(const struct sock *sk)
+{
+	/* state change might impact lockless readers. */
+	return smp_load_acquire(&sk->sk_state);
+}
+
+/**
+ * inet_sk_state_store - update sk->sk_state
+ * @sk: socket pointer
+ * @newstate: new state
+ *
+ * Paired with inet_sk_state_load(). Should be used in contexts where
+ * state change might impact lockless readers.
+ */
 void inet_sk_state_store(struct sock *sk, int newstate);
 
+void inet_sk_set_state(struct sock *sk, int state);
+
 static inline unsigned int __inet_ehashfn(const __be32 laddr,
 					  const __u16 lport,
 					  const __be32 faddr,
diff --git a/include/net/sock.h b/include/net/sock.h
index 9a90472..4fd211b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2332,31 +2332,6 @@ static inline bool sk_listener(const struct sock *sk)
 	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
 }
 
-/**
- * sk_state_load - read sk->sk_state for lockless contexts
- * @sk: socket pointer
- *
- * Paired with sk_state_store(). Used in places we do not hold socket lock :
- * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
- */
-static inline int sk_state_load(const struct sock *sk)
-{
-	return smp_load_acquire(&sk->sk_state);
-}
-
-/**
- * sk_state_store - update sk->sk_state
- * @sk: socket pointer
- * @newstate: new state
- *
- * Paired with sk_state_load(). Should be used in contexts where
- * state change might impact lockless readers.
- */
-static inline void sk_state_store(struct sock *sk, int newstate)
-{
-	smp_store_release(&sk->sk_state, newstate);
-}
-
 void sock_enable_timestamp(struct sock *sk, int flag);
 int sock_get_timestamp(struct sock *, struct timeval __user *);
 int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f460fc0..12410ec 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -685,7 +685,7 @@ static void reqsk_timer_handler(struct timer_list *t)
 	int max_retries, thresh;
 	u8 defer_accept;
 
-	if (sk_state_load(sk_listener) != TCP_LISTEN)
+	if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
 		goto drop;
 
 	max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d408fb4..67d39b7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -502,7 +502,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
 
-	state = sk_state_load(sk);
+	state = inet_sk_state_load(sk);
 	if (state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
@@ -2916,7 +2916,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	if (sk->sk_type != SOCK_STREAM)
 		return;
 
-	info->tcpi_state = sk_state_load(sk);
+	info->tcpi_state = inet_sk_state_load(sk);
 
 	/* Report meaningful fields for all TCP states, including listeners */
 	rate = READ_ONCE(sk->sk_pacing_rate);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index abbf0ed..81148f7 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -24,7 +24,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 {
 	struct tcp_info *info = _info;
 
-	if (sk_state_load(sk) == TCP_LISTEN) {
+	if (inet_sk_state_load(sk) == TCP_LISTEN) {
 		r->idiag_rqueue = sk->sk_ack_backlog;
 		r->idiag_wqueue = sk->sk_max_ack_backlog;
 	} else if (sk->sk_type == SOCK_STREAM) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77ea45d..67ef303 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2281,7 +2281,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 		timer_expires = jiffies;
 	}
 
-	state = sk_state_load(sk);
+	state = inet_sk_state_load(sk);
 	if (state == TCP_LISTEN)
 		rx_queue = sk->sk_ack_backlog;
 	else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1f04ec0..af2b2a2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1795,7 +1795,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		timer_expires = jiffies;
 	}
 
-	state = sk_state_load(sp);
+	state = inet_sk_state_load(sp);
 	if (state == TCP_LISTEN)
 		rx_queue = sp->sk_ack_backlog;
 	else
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v3 net-next 4/5] net: tracepoint: using sock_set_state tracepoint to trace DCCP state transition
From: Yafang Shao @ 2017-12-20  3:12 UTC (permalink / raw)
  To: songliubraving, davem, marcelo.leitner, rostedt
  Cc: bgregg, netdev, linux-kernel, Yafang Shao
In-Reply-To: <1513739574-3345-1-git-send-email-laoar.shao@gmail.com>

With changes in inet_ files, DCCP state transitions are traced with
inet_sock_set_state tracepoint.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 net/dccp/proto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9d43c1f..7a75a1d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -110,7 +110,7 @@ void dccp_set_state(struct sock *sk, const int state)
 	/* Change state AFTER socket is unhashed to avoid closed
 	 * socket sitting in hash tables.
 	 */
-	sk->sk_state = state;
+	inet_sk_set_state(sk, state);
 }
 
 EXPORT_SYMBOL_GPL(dccp_set_state);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH v3 net-next 5/5] net: tracepoint: using sock_set_state tracepoint to trace SCTP state transition
From: Yafang Shao @ 2017-12-20  3:12 UTC (permalink / raw)
  To: songliubraving, davem, marcelo.leitner, rostedt
  Cc: bgregg, netdev, linux-kernel, Yafang Shao
In-Reply-To: <1513739574-3345-1-git-send-email-laoar.shao@gmail.com>

With changes in inet_ files, SCTP state transitions are traced with
inet_sock_set_state tracepoint.
As SCTP state names, i.e. SCTP_SS_CLOSED, SCTP_SS_ESTABLISHED,
have the same value with TCP state names. So the output info still print
the TCP state names, that makes the code easy.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 net/sctp/endpointola.c   |  2 +-
 net/sctp/sm_sideeffect.c |  4 ++--
 net/sctp/socket.c        | 12 ++++++------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index ee1e601..8b31468 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -232,7 +232,7 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
 {
 	ep->base.dead = true;
 
-	ep->base.sk->sk_state = SCTP_SS_CLOSED;
+	inet_sk_set_state(ep->base.sk, SCTP_SS_CLOSED);
 
 	/* Unlink this endpoint, so we can't find it again! */
 	sctp_unhash_endpoint(ep);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 8adde71..c0c3ec6 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -878,12 +878,12 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
 		 * successfully completed a connect() call.
 		 */
 		if (sctp_state(asoc, ESTABLISHED) && sctp_sstate(sk, CLOSED))
-			sk->sk_state = SCTP_SS_ESTABLISHED;
+			inet_sk_set_state(sk, SCTP_SS_ESTABLISHED);
 
 		/* Set the RCV_SHUTDOWN flag when a SHUTDOWN is received. */
 		if (sctp_state(asoc, SHUTDOWN_RECEIVED) &&
 		    sctp_sstate(sk, ESTABLISHED)) {
-			sk->sk_state = SCTP_SS_CLOSING;
+			inet_sk_set_state(sk, SCTP_SS_CLOSING);
 			sk->sk_shutdown |= RCV_SHUTDOWN;
 		}
 	}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7eec0a0..59b5689 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1544,7 +1544,7 @@ static void sctp_close(struct sock *sk, long timeout)
 
 	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 	sk->sk_shutdown = SHUTDOWN_MASK;
-	sk->sk_state = SCTP_SS_CLOSING;
+	inet_sk_set_state(sk, SCTP_SS_CLOSING);
 
 	ep = sctp_sk(sk)->ep;
 
@@ -4653,7 +4653,7 @@ static void sctp_shutdown(struct sock *sk, int how)
 	if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
 		struct sctp_association *asoc;
 
-		sk->sk_state = SCTP_SS_CLOSING;
+		inet_sk_set_state(sk, SCTP_SS_CLOSING);
 		asoc = list_entry(ep->asocs.next,
 				  struct sctp_association, asocs);
 		sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@ -7509,13 +7509,13 @@ static int sctp_listen_start(struct sock *sk, int backlog)
 	 * sockets.
 	 *
 	 */
-	sk->sk_state = SCTP_SS_LISTENING;
+	inet_sk_set_state(sk, SCTP_SS_LISTENING);
 	if (!ep->base.bind_addr.port) {
 		if (sctp_autobind(sk))
 			return -EAGAIN;
 	} else {
 		if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
-			sk->sk_state = SCTP_SS_CLOSED;
+			inet_sk_set_state(sk, SCTP_SS_CLOSED);
 			return -EADDRINUSE;
 		}
 	}
@@ -8538,10 +8538,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	 * is called, set RCV_SHUTDOWN flag.
 	 */
 	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
-		newsk->sk_state = SCTP_SS_CLOSED;
+		inet_sk_set_state(newsk, SCTP_SS_CLOSED);
 		newsk->sk_shutdown |= RCV_SHUTDOWN;
 	} else {
-		newsk->sk_state = SCTP_SS_ESTABLISHED;
+		inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED);
 	}
 
 	release_sock(newsk);
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH] selftests/bpf: remove the DEBUG macro for test_dev_cgroup
From: Chen Rong @ 2017-12-20  3:15 UTC (permalink / raw)
  Cc: chenr.fnst, Alexei Starovoitov, Daniel Borkmann, Shuah Khan,
	netdev, linux-kernel, linux-kselftest

The test may fail if not enable DEBUG macro in dev_cgroup.c
  # ./test_dev_cgroup
  libbpf: load bpf program failed: Operation not permitted
  libbpf: failed to load program 'cgroup/dev'
  libbpf: failed to load object './dev_cgroup.o'
  Failed to load DEV_CGROUP program

Removing the DEBUG macro makes the test always pass.

Signed-off-by: Chen Rong <chenr.fnst@cn.fujitsu.com>
---
 tools/testing/selftests/bpf/dev_cgroup.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c
index ce41a34..a167c6d 100644
--- a/tools/testing/selftests/bpf/dev_cgroup.c
+++ b/tools/testing/selftests/bpf/dev_cgroup.c
@@ -13,7 +13,6 @@ SEC("cgroup/dev")
 int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
 {
 	short type = ctx->access_type & 0xFFFF;
-#ifdef DEBUG
 	short access = ctx->access_type >> 16;
 	char fmt[] = "  %d:%d    \n";
 
@@ -39,7 +38,6 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
 		fmt[10] = 'm';
 
 	bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
-#endif
 
 	/* Allow access to /dev/zero and /dev/random.
 	 * Forbid everything else.
-- 
2.5.0

^ permalink raw reply related

* [PATCH net-next v3] netdevsim: correctly check return value of debugfs_create_dir
From: Prashant Bhole @ 2017-12-20  3:18 UTC (permalink / raw)
  To: David S . Miller; +Cc: Prashant Bhole, netdev, Jakub Kicinski

- Checking return value with IS_ERROR_OR_NULL
- Added error handling where it was not handled

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v3: nit-pick: directly returning error instead of going to label

 drivers/net/netdevsim/bpf.c    | 8 ++++----
 drivers/net/netdevsim/netdev.c | 6 ++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 078d2c37a6c1..aeb429428cc5 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -201,7 +201,6 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 {
 	struct nsim_bpf_bound_prog *state;
 	char name[16];
-	int err;
 
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 	if (!state)
@@ -214,10 +213,9 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 	/* Program id is not populated yet when we create the state. */
 	sprintf(name, "%u", ns->prog_id_gen++);
 	state->ddir = debugfs_create_dir(name, ns->ddir_bpf_bound_progs);
-	if (IS_ERR(state->ddir)) {
-		err = PTR_ERR(state->ddir);
+	if (IS_ERR_OR_NULL(state->ddir)) {
 		kfree(state);
-		return err;
+		return -ENOMEM;
 	}
 
 	debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
@@ -349,6 +347,8 @@ int nsim_bpf_init(struct netdevsim *ns)
 			   &ns->bpf_bind_verifier_delay);
 	ns->ddir_bpf_bound_progs =
 		debugfs_create_dir("bpf_bound_progs", ns->ddir);
+	if (IS_ERR_OR_NULL(ns->ddir_bpf_bound_progs))
+		return -ENOMEM;
 
 	ns->bpf_tc_accept = true;
 	debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index eb8c679fca9f..56d7ea93a983 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -151,6 +151,8 @@ static int nsim_init(struct net_device *dev)
 
 	ns->netdev = dev;
 	ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir);
+	if (IS_ERR_OR_NULL(ns->ddir))
+		return -ENOMEM;
 
 	err = nsim_bpf_init(ns);
 	if (err)
@@ -469,8 +471,8 @@ static int __init nsim_module_init(void)
 	int err;
 
 	nsim_ddir = debugfs_create_dir(DRV_NAME, NULL);
-	if (IS_ERR(nsim_ddir))
-		return PTR_ERR(nsim_ddir);
+	if (IS_ERR_OR_NULL(nsim_ddir))
+		return -ENOMEM;
 
 	err = bus_register(&nsim_bus);
 	if (err)
-- 
2.13.6

^ permalink raw reply related

* [PATCH bpf-next 0/8] bpf: offload: report device back to user space (take 2)
From: Jakub Kicinski @ 2017-12-20  4:09 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski

Hi!

This series is a redo of reporting offload device information to
user space after the first attempt did not take into account name
spaces.  As requested by Kirill offloads are now protected by an
r/w sem.  This allows us to remove the workqueue and free the
offload state fully when device is removed (suggested by Alexei).

Net namespace is reported with a device/inode pair.

The accompanying bpftool support is placed in common code because
maps will have very similar info.  Note that the UAPI information
can't be nicely encapsulated into a struct, because in case we
need to grow the device information the new fields will have to
be added at the end of struct bpf_prog_info, we can't grow
structures in the middle of bpf_prog_info.


Jakub Kicinski (8):
  bpf: offload: don't require rtnl for dev list manipulation
  bpf: offload: don't use prog->aux->offload as boolean
  bpf: offload: allow netdev to disappear while verifier is running
  bpf: offload: free prog->aux->offload when device disappears
  bpf: offload: free program id when device disappears
  bpf: offload: report device information for offloaded programs
  tools: bpftool: report device information for offloaded programs
  selftests/bpf: test device info reporting for bound progs

 drivers/net/ethernet/netronome/nfp/bpf/main.h     |   2 +-
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |   2 +-
 drivers/net/netdevsim/bpf.c                       |   2 +-
 fs/nsfs.c                                         |   2 +-
 include/linux/bpf.h                               |  16 ++-
 include/linux/bpf_verifier.h                      |  16 +--
 include/linux/netdevice.h                         |   4 +-
 include/linux/proc_ns.h                           |   1 +
 include/uapi/linux/bpf.h                          |   3 +
 kernel/bpf/offload.c                              | 114 ++++++++++++++++------
 kernel/bpf/syscall.c                              |  19 +++-
 kernel/bpf/verifier.c                             |  20 ++--
 tools/bpf/bpftool/common.c                        |  52 ++++++++++
 tools/bpf/bpftool/main.h                          |   2 +
 tools/bpf/bpftool/prog.c                          |   3 +
 tools/include/uapi/linux/bpf.h                    |   3 +
 tools/testing/selftests/bpf/test_offload.py       | 107 +++++++++++++++++---
 17 files changed, 287 insertions(+), 81 deletions(-)

-- 
2.15.1

^ permalink raw reply

* [PATCH bpf-next 1/8] bpf: offload: don't require rtnl for dev list manipulation
From: Jakub Kicinski @ 2017-12-20  4:09 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

We only need to hold rtnl_lock() around ndo calls.  The device
offload initialization doesn't require it.  Neither will soon-
-to-come querying the offload info.  Use struct rw_semaphore
because map offload will require sleeping with the semaphore
held for read.

Suggested-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 kernel/bpf/offload.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..b88e5ebdc61d 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -20,8 +20,12 @@
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/rtnetlink.h>
+#include <linux/rwsem.h>
 
-/* protected by RTNL */
+/* Protects bpf_prog_offload_devs and offload members of all progs.
+ * RTNL lock cannot be taken when holding this lock.
+ */
+static struct rw_semaphore bpf_devs_lock;
 static LIST_HEAD(bpf_prog_offload_devs);
 
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
@@ -43,17 +47,21 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	offload->prog = prog;
 	init_waitqueue_head(&offload->verifier_done);
 
-	rtnl_lock();
+	/* Our UNREGISTER notifier will grab bpf_devs_lock, so we are safe
+	 * to assume the netdev doesn't get unregistered as long as we hold
+	 * bpf_devs_lock.
+	 */
+	down_write(&bpf_devs_lock);
 	offload->netdev = __dev_get_by_index(net, attr->prog_ifindex);
 	if (!offload->netdev) {
-		rtnl_unlock();
+		up_write(&bpf_devs_lock);
 		kfree(offload);
 		return -EINVAL;
 	}
 
 	prog->aux->offload = offload;
 	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
-	rtnl_unlock();
+	up_write(&bpf_devs_lock);
 
 	return 0;
 }
@@ -126,7 +134,9 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 	wake_up(&offload->verifier_done);
 
 	rtnl_lock();
+	down_write(&bpf_devs_lock);
 	__bpf_prog_offload_destroy(prog);
+	up_write(&bpf_devs_lock);
 	rtnl_unlock();
 
 	kfree(offload);
@@ -181,11 +191,13 @@ static int bpf_offload_notification(struct notifier_block *notifier,
 		if (netdev->reg_state != NETREG_UNREGISTERING)
 			break;
 
+		down_write(&bpf_devs_lock);
 		list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
 					 offloads) {
 			if (offload->netdev == netdev)
 				__bpf_prog_offload_destroy(offload->prog);
 		}
+		up_write(&bpf_devs_lock);
 		break;
 	default:
 		break;
@@ -199,6 +211,7 @@ static struct notifier_block bpf_offload_notifier = {
 
 static int __init bpf_offload_init(void)
 {
+	init_rwsem(&bpf_devs_lock);
 	register_netdevice_notifier(&bpf_offload_notifier);
 	return 0;
 }
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 2/8] bpf: offload: don't use prog->aux->offload as boolean
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

We currently use aux->offload to indicate that program is bound
to a specific device.  This forces us to keep the offload structure
around even after the device is gone.  Add a bool member to
struct bpf_prog_aux to indicate if offload was requested.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h  | 3 ++-
 kernel/bpf/syscall.c | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da54ef644fcd..838eee10e979 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -201,6 +201,7 @@ struct bpf_prog_aux {
 	u32 stack_depth;
 	u32 id;
 	u32 func_cnt;
+	bool offload_requested;
 	struct bpf_prog **func;
 	void *jit_data; /* JIT specific data. arch dependent */
 	struct latch_tree_node ksym_tnode;
@@ -529,7 +530,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
 static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
 {
-	return aux->offload;
+	return aux->offload_requested;
 }
 #else
 static inline int bpf_prog_offload_init(struct bpf_prog *prog,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e2e1c78ce1dc..1143db61584c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1151,6 +1151,8 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (!prog)
 		return -ENOMEM;
 
+	prog->aux->offload_requested = !!attr->prog_ifindex;
+
 	err = security_bpf_prog_alloc(prog->aux);
 	if (err)
 		goto free_prog_nouncharge;
@@ -1172,7 +1174,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	atomic_set(&prog->aux->refcnt, 1);
 	prog->gpl_compatible = is_gpl ? 1 : 0;
 
-	if (attr->prog_ifindex) {
+	if (bpf_prog_is_dev_bound(prog->aux)) {
 		err = bpf_prog_offload_init(prog, attr);
 		if (err)
 			goto free_prog;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 3/8] bpf: offload: allow netdev to disappear while verifier is running
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

To allow verifier instruction callbacks without any extra locking
NETDEV_UNREGISTER notification would wait on a waitqueue for verifier
to finish.  This design decision was made when rtnl lock was providing
all the locking.  Use the read/write lock instead and remove the
workqueue.

Verifier will now call into the offload code, so dev_ops are moved
to offload structure.  Since verifier calls are all under
bpf_prog_is_dev_bound() we no longer need static inline implementations
to please builds with CONFIG_NET=n.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.h     |  2 +-
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |  2 +-
 drivers/net/netdevsim/bpf.c                       |  2 +-
 include/linux/bpf.h                               |  9 +++++--
 include/linux/bpf_verifier.h                      | 16 ++----------
 include/linux/netdevice.h                         |  4 +--
 kernel/bpf/offload.c                              | 30 ++++++++++++-----------
 kernel/bpf/verifier.c                             | 20 ++++++---------
 8 files changed, 37 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index aae1be9ed056..89a9b6393882 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -238,7 +238,7 @@ struct nfp_bpf_vnic {
 
 int nfp_bpf_jit(struct nfp_prog *prog);
 
-extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
+extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
 
 struct netdev_bpf;
 struct nfp_app;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 9c2608445bd8..d8870c2f11f3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -260,6 +260,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	return 0;
 }
 
-const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
 	.insn_hook = nfp_verify_insn,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index c977fece64a3..e363658405ee 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -66,7 +66,7 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
 	return 0;
 }
 
-static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = {
+static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
 	.insn_hook = nsim_bpf_verify_insn,
 };
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 838eee10e979..669549f7e3e8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -17,6 +17,7 @@
 #include <linux/numa.h>
 #include <linux/wait.h>
 
+struct bpf_verifier_env;
 struct perf_event;
 struct bpf_prog;
 struct bpf_map;
@@ -184,14 +185,18 @@ struct bpf_verifier_ops {
 				  struct bpf_prog *prog, u32 *target_size);
 };
 
+struct bpf_prog_offload_ops {
+	int (*insn_hook)(struct bpf_verifier_env *env,
+			 int insn_idx, int prev_insn_idx);
+};
+
 struct bpf_dev_offload {
 	struct bpf_prog		*prog;
 	struct net_device	*netdev;
 	void			*dev_priv;
 	struct list_head	offloads;
 	bool			dev_state;
-	bool			verifier_running;
-	wait_queue_head_t	verifier_done;
+	const struct bpf_prog_offload_ops *dev_ops;
 };
 
 struct bpf_prog_aux {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aaac589e490c..02ede122d35b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -166,12 +166,6 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
 	return log->len_used >= log->len_total - 1;
 }
 
-struct bpf_verifier_env;
-struct bpf_ext_analyzer_ops {
-	int (*insn_hook)(struct bpf_verifier_env *env,
-			 int insn_idx, int prev_insn_idx);
-};
-
 #define BPF_MAX_SUBPROGS 256
 
 /* single container for all structs
@@ -185,7 +179,6 @@ struct bpf_verifier_env {
 	bool strict_alignment;		/* perform strict pointer alignment checks */
 	struct bpf_verifier_state *cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
-	const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
 	u32 id_gen;			/* used to generate unique reg IDs */
@@ -205,13 +198,8 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
 	return cur->frame[cur->curframe]->regs;
 }
 
-#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
-#else
-static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
-{
-	return -EOPNOTSUPP;
-}
-#endif
+int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+				 int insn_idx, int prev_insn_idx);
 
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc4ce7456e38..0a1a4a111546 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -804,7 +804,7 @@ enum bpf_netdev_command {
 	BPF_OFFLOAD_DESTROY,
 };
 
-struct bpf_ext_analyzer_ops;
+struct bpf_prog_offload_ops;
 struct netlink_ext_ack;
 
 struct netdev_bpf {
@@ -826,7 +826,7 @@ struct netdev_bpf {
 		/* BPF_OFFLOAD_VERIFIER_PREP */
 		struct {
 			struct bpf_prog *prog;
-			const struct bpf_ext_analyzer_ops *ops; /* callee set */
+			const struct bpf_prog_offload_ops *ops; /* callee set */
 		} verifier;
 		/* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
 		struct {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index b88e5ebdc61d..cda2d8350fe1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -45,7 +45,6 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 		return -ENOMEM;
 
 	offload->prog = prog;
-	init_waitqueue_head(&offload->verifier_done);
 
 	/* Our UNREGISTER notifier will grab bpf_devs_lock, so we are safe
 	 * to assume the netdev doesn't get unregistered as long as we hold
@@ -95,15 +94,28 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
 	if (err)
 		goto exit_unlock;
 
-	env->dev_ops = data.verifier.ops;
-
+	env->prog->aux->offload->dev_ops = data.verifier.ops;
 	env->prog->aux->offload->dev_state = true;
-	env->prog->aux->offload->verifier_running = true;
 exit_unlock:
 	rtnl_unlock();
 	return err;
 }
 
+int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
+				 int insn_idx, int prev_insn_idx)
+{
+	struct bpf_dev_offload *offload;
+	int ret = -ENODEV;
+
+	down_read(&bpf_devs_lock);
+	offload = env->prog->aux->offload;
+	if (offload->netdev)
+		ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
+	up_read(&bpf_devs_lock);
+
+	return ret;
+}
+
 static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_dev_offload *offload = prog->aux->offload;
@@ -115,9 +127,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 
 	data.offload.prog = prog;
 
-	if (offload->verifier_running)
-		wait_event(offload->verifier_done, !offload->verifier_running);
-
 	if (offload->dev_state)
 		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
 
@@ -130,9 +139,6 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_dev_offload *offload = prog->aux->offload;
 
-	offload->verifier_running = false;
-	wake_up(&offload->verifier_done);
-
 	rtnl_lock();
 	down_write(&bpf_devs_lock);
 	__bpf_prog_offload_destroy(prog);
@@ -144,15 +150,11 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog)
 
 static int bpf_prog_offload_translate(struct bpf_prog *prog)
 {
-	struct bpf_dev_offload *offload = prog->aux->offload;
 	struct netdev_bpf data = {};
 	int ret;
 
 	data.offload.prog = prog;
 
-	offload->verifier_running = false;
-	wake_up(&offload->verifier_done);
-
 	rtnl_lock();
 	ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data);
 	rtnl_unlock();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48b2901cf483..6b95efad5828 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4341,15 +4341,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	return 0;
 }
 
-static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
-				  int insn_idx, int prev_insn_idx)
-{
-	if (env->dev_ops && env->dev_ops->insn_hook)
-		return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
-
-	return 0;
-}
-
 static int do_check(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state;
@@ -4431,9 +4422,12 @@ static int do_check(struct bpf_verifier_env *env)
 				       env->allow_ptr_leaks);
 		}
 
-		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
-		if (err)
-			return err;
+		if (bpf_prog_is_dev_bound(env->prog->aux)) {
+			err = bpf_prog_offload_verify_insn(env, insn_idx,
+							   prev_insn_idx);
+			if (err)
+				return err;
+		}
 
 		regs = cur_regs(env);
 		env->insn_aux_data[insn_idx].seen = true;
@@ -5341,7 +5335,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		env->strict_alignment = true;
 
-	if (env->prog->aux->offload) {
+	if (bpf_prog_is_dev_bound(env->prog->aux)) {
 		ret = bpf_prog_offload_verifier_prep(env);
 		if (ret)
 			goto err_unlock;
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 4/8] bpf: offload: free prog->aux->offload when device disappears
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

All bpf offload operations should now be under bpf_devs_lock,
it's safe to free and clear the entire offload structure,
not only the netdev pointer.

__bpf_prog_offload_destroy() will no longer be called multiple
times.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/offload.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index cda2d8350fe1..9988dc4038e6 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -68,12 +68,14 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
 			     struct netdev_bpf *data)
 {
-	struct net_device *netdev = prog->aux->offload->netdev;
+	struct bpf_dev_offload *offload = prog->aux->offload;
+	struct net_device *netdev;
 
 	ASSERT_RTNL();
 
-	if (!netdev)
+	if (!offload)
 		return -ENODEV;
+	netdev = offload->netdev;
 	if (!netdev->netdev_ops->ndo_bpf)
 		return -EOPNOTSUPP;
 
@@ -109,7 +111,7 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 
 	down_read(&bpf_devs_lock);
 	offload = env->prog->aux->offload;
-	if (offload->netdev)
+	if (offload)
 		ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
 	up_read(&bpf_devs_lock);
 
@@ -121,31 +123,24 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 	struct bpf_dev_offload *offload = prog->aux->offload;
 	struct netdev_bpf data = {};
 
-	/* Caution - if netdev is destroyed before the program, this function
-	 * will be called twice.
-	 */
-
 	data.offload.prog = prog;
 
 	if (offload->dev_state)
 		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
 
-	offload->dev_state = false;
 	list_del_init(&offload->offloads);
-	offload->netdev = NULL;
+	kfree(offload);
+	prog->aux->offload = NULL;
 }
 
 void bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
-	struct bpf_dev_offload *offload = prog->aux->offload;
-
 	rtnl_lock();
 	down_write(&bpf_devs_lock);
-	__bpf_prog_offload_destroy(prog);
+	if (prog->aux->offload)
+		__bpf_prog_offload_destroy(prog);
 	up_write(&bpf_devs_lock);
 	rtnl_unlock();
-
-	kfree(offload);
 }
 
 static int bpf_prog_offload_translate(struct bpf_prog *prog)
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 6/8] bpf: offload: report device information for offloaded programs
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel
  Cc: ktkhai, oss-drivers, Jakub Kicinski, Eric W . Biederman
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

Report to the user ifindex and namespace information of offloaded
programs.  If device has disappeared return -ENODEV.  Specify the
namespace using dev/inode combination.

CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 fs/nsfs.c                      |  2 +-
 include/linux/bpf.h            |  2 ++
 include/linux/proc_ns.h        |  1 +
 include/uapi/linux/bpf.h       |  3 +++
 kernel/bpf/offload.c           | 39 +++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  6 ++++++
 tools/include/uapi/linux/bpf.h |  3 +++
 7 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 7c6f76d29f56..e50628675935 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
 	ns->ops->put(ns);
 }
 
-static void *__ns_get_path(struct path *path, struct ns_common *ns)
+void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = nsfs_mnt;
 	struct dentry *dentry;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9a916ab34299..7810ae57b357 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+			       struct bpf_prog *prog);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..1733359cf713 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
 
 extern struct file *proc_ns_fget(int fd);
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
+extern void *__ns_get_path(struct path *path, struct ns_common *ns);
 extern void *ns_get_path(struct path *path, struct task_struct *task,
 			const struct proc_ns_operations *ns_ops);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d01f1cb3cfc0..72b37fc3bc0c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 1af94cb4f815..0543f24542ae 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,9 +16,11 @@
 #include <linux/bpf.h>
 #include <linux/bpf_verifier.h>
 #include <linux/bug.h>
+#include <linux/kdev_t.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
+#include <linux/proc_ns.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
 
@@ -174,6 +176,43 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
 	return bpf_prog_offload_translate(prog);
 }
 
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+			       struct bpf_prog *prog)
+{
+	struct bpf_dev_offload *offload;
+	struct inode *ns_inode;
+	struct path ns_path;
+	struct net *net;
+	void *ptr;
+
+again:
+	down_read(&bpf_devs_lock);
+	offload = prog->aux->offload;
+	if (!offload) {
+		up_read(&bpf_devs_lock);
+		return -ENODEV;
+	}
+
+	net = dev_net(offload->netdev);
+	get_net(net); /* __ns_get_path() drops the reference */
+
+	ptr = __ns_get_path(&ns_path, &net->ns);
+	if (IS_ERR(ptr)) {
+		up_read(&bpf_devs_lock);
+		if (PTR_ERR(ptr) == -EAGAIN)
+			goto again;
+		return PTR_ERR(ptr);
+	}
+	ns_inode = ns_path.dentry->d_inode;
+
+	info->ifindex = offload->netdev->ifindex;
+	info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+	info->netns_ino = ns_inode->i_ino;
+	up_read(&bpf_devs_lock);
+
+	return 0;
+}
+
 const struct bpf_prog_ops bpf_offload_prog_ops = {
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d9f5b0f0e49..20444fd678d0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1624,6 +1624,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			return -EFAULT;
 	}
 
+	if (bpf_prog_is_dev_bound(prog->aux)) {
+		err = bpf_prog_offload_info_fill(&info, prog);
+		if (err)
+			return err;
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db1b0923a308..4e8c60acfa32 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
 	char name[BPF_OBJ_NAME_LEN];
+	__u32 ifindex;
+	__u64 netns_dev;
+	__u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 5/8] bpf: offload: free program id when device disappears
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

Bound programs are quite useless after their device disappears.
They are simply waiting for reference count to go to zero,
don't list them in BPF_PROG_GET_NEXT_ID by freeing their ID
early.

Note that orphaned offload programs will return -ENODEV on
BPF_OBJ_GET_INFO_BY_FD so user will never see ID 0.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h  | 2 ++
 kernel/bpf/offload.c | 3 +++
 kernel/bpf/syscall.c | 9 +++++++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 669549f7e3e8..9a916ab34299 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -357,6 +357,8 @@ void bpf_prog_put(struct bpf_prog *prog);
 int __bpf_prog_charge(struct user_struct *user, u32 pages);
 void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
 
+void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
 struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 9988dc4038e6..1af94cb4f815 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -128,6 +128,9 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 	if (offload->dev_state)
 		WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
 
+	/* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
+	bpf_prog_free_id(prog, true);
+
 	list_del_init(&offload->offloads);
 	kfree(offload);
 	prog->aux->offload = NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1143db61584c..7d9f5b0f0e49 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -905,9 +905,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
 	return id > 0 ? 0 : id;
 }
 
-static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
+void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 {
-	/* cBPF to eBPF migrations are currently not in the idr store. */
+	/* cBPF to eBPF migrations are currently not in the idr store.
+	 * Offloaded programs are removed from the store when their device
+	 * disappears - even if someone grabs an fd to them they are unusable,
+	 * simply waiting for refcnt to drop to be freed.
+	 */
 	if (!prog->aux->id)
 		return;
 
@@ -917,6 +921,7 @@ static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
 		__acquire(&prog_idr_lock);
 
 	idr_remove(&prog_idr, prog->aux->id);
+	prog->aux->id = 0;
 
 	if (do_idr_lock)
 		spin_unlock_bh(&prog_idr_lock);
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 7/8] tools: bpftool: report device information for offloaded programs
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

Print the just-exposed device information about device to which
program is bound.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/bpf/bpftool/common.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   |  2 ++
 tools/bpf/bpftool/prog.c   |  3 +++
 3 files changed, 57 insertions(+)

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b62c94e3997a..6601c95a9258 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -44,7 +44,9 @@
 #include <unistd.h>
 #include <linux/limits.h>
 #include <linux/magic.h>
+#include <net/if.h>
 #include <sys/mount.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
 
@@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
 		free(obj);
 	}
 }
+
+static char *
+ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
+{
+	struct stat st;
+	int err;
+
+	err = stat("/proc/self/ns/net", &st);
+	if (err) {
+		p_err("Can't stat /proc/self: %s", strerror(errno));
+		return NULL;
+	}
+
+	if (st.st_dev != ns_dev || st.st_ino != ns_ino)
+		return NULL;
+
+	return if_indextoname(ifindex, buf);
+}
+
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+	char name[IF_NAMESIZE];
+
+	if (!ifindex)
+		return;
+
+	printf(" dev ");
+	if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+		printf("%s", name);
+	else
+		printf("ifindex %u ns_dev %llu ns_ino %llu",
+		       ifindex, ns_dev, ns_inode);
+}
+
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+	char name[IF_NAMESIZE];
+
+	if (!ifindex)
+		return;
+
+	jsonw_name(json_wtr, "dev");
+	jsonw_start_object(json_wtr);
+	jsonw_uint_field(json_wtr, "ifindex", ifindex);
+	jsonw_uint_field(json_wtr, "ns_dev", ns_dev);
+	jsonw_uint_field(json_wtr, "ns_inode", ns_inode);
+	if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+		jsonw_string_field(json_wtr, "ifname", name);
+	jsonw_end_object(json_wtr);
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 8f6d3cac0347..65b526fe6e7e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -96,6 +96,8 @@ struct pinned_obj {
 int build_pinned_obj_table(struct pinned_obj_table *table,
 			   enum bpf_obj_type type);
 void delete_pinned_obj_table(struct pinned_obj_table *tab);
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 
 struct cmd {
 	const char *cmd;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 037484ceaeaf..4ccf6301f0fe 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		     info->tag[0], info->tag[1], info->tag[2], info->tag[3],
 		     info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
 
+	print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
 	if (info->load_time) {
 		char buf[32];
 
@@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 
 	printf("tag ");
 	fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
+	print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
 	printf("\n");
 
 	if (info->load_time) {
-- 
2.15.1

^ permalink raw reply related

* [PATCH bpf-next 8/8] selftests/bpf: test device info reporting for bound progs
From: Jakub Kicinski @ 2017-12-20  4:10 UTC (permalink / raw)
  To: netdev, alexei.starovoitov, daniel; +Cc: ktkhai, oss-drivers, Jakub Kicinski
In-Reply-To: <20171220041006.25629-1-jakub.kicinski@netronome.com>

Check if bound programs report correct device info.  Test
in local namespace, in remote one, back to the local ns,
remove the device and check that information is cleared.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
---
 tools/testing/selftests/bpf/test_offload.py | 107 +++++++++++++++++++++++++---
 1 file changed, 96 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index c940505c2978..a581eb5b0f05 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -18,6 +18,8 @@ import argparse
 import json
 import os
 import pprint
+import random
+import string
 import subprocess
 import time
 
@@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
 pp = pprint.PrettyPrinter()
 devs = [] # devices we created for clean up
 files = [] # files to be removed
+netns = [] # net namespaces to be removed
 
 def log_get_sec(level=0):
     return "*" * (log_level + level)
@@ -128,22 +131,25 @@ files = [] # files to be removed
     if f in files:
         files.remove(f)
 
-def tool(name, args, flags, JSON=True, fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True):
     params = ""
     if JSON:
         params += "%s " % (flags["json"])
 
-    ret, out = cmd(name + " " + params + args, fail=fail)
+    if ns != "":
+        ns = "ip netns exec %s " % (ns)
+
+    ret, out = cmd(ns + name + " " + params + args, fail=fail)
     if JSON and len(out.strip()) != 0:
         return ret, json.loads(out)
     else:
         return ret, out
 
-def bpftool(args, JSON=True, fail=True):
-    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def bpftool(args, JSON=True, ns="", fail=True):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
 
-def bpftool_prog_list(expected=None):
-    _, progs = bpftool("prog show", JSON=True, fail=True)
+def bpftool_prog_list(expected=None, ns=""):
+    _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -158,13 +164,13 @@ files = [] # files to be removed
         time.sleep(0.05)
     raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
 
-def ip(args, force=False, JSON=True, fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True):
     if force:
         args = "-force " + args
-    return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail)
+    return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
 
-def tc(args, JSON=True, fail=True):
-    return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail)
+def tc(args, JSON=True, ns="", fail=True):
+    return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
 
 def ethtool(dev, opt, args, fail=True):
     return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -178,6 +184,15 @@ files = [] # files to be removed
 def bpf_bytecode(bytecode):
     return "bytecode \"%s\"" % (bytecode)
 
+def mknetns(n_retry=10):
+    for i in range(n_retry):
+        name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+        ret, _ = ip("netns add %s" % (name), fail=False)
+        if ret == 0:
+            netns.append(name)
+            return name
+    return None
+
 class DebugfsDir:
     """
     Class for accessing DebugFS directories as a dictionary.
@@ -237,6 +252,8 @@ files = [] # files to be removed
         self.dev = self._netdevsim_create()
         devs.append(self)
 
+        self.ns = ""
+
         self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
         self.dfs_refresh()
 
@@ -257,7 +274,7 @@ files = [] # files to be removed
 
     def remove(self):
         devs.remove(self)
-        ip("link del dev %s" % (self.dev["ifname"]))
+        ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
 
     def dfs_refresh(self):
         self.dfs = DebugfsDir(self.dfs_dir)
@@ -285,6 +302,11 @@ files = [] # files to be removed
             time.sleep(0.05)
         raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
 
+    def set_ns(self, ns):
+        name = "1" if ns == "" else ns
+        ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+        self.ns = ns
+
     def set_mtu(self, mtu, fail=True):
         return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
                   fail=fail)
@@ -372,6 +394,8 @@ files = [] # files to be removed
         dev.remove()
     for f in files:
         cmd("rm -f %s" % (f))
+    for ns in netns:
+        cmd("ip netns delete %s" % (ns))
 
 def pin_prog(file_name, idx=0):
     progs = bpftool_prog_list(expected=(idx + 1))
@@ -381,6 +405,30 @@ files = [] # files to be removed
 
     return file_name, bpf_pinned(file_name)
 
+def check_dev_info(other_ns, ns, removed=False):
+    if removed:
+        bpftool_prog_list()
+        return
+    progs = bpftool_prog_list(expected=int(not removed), ns=ns)
+    prog = progs[0]
+
+    fail("dev" not in prog.keys(), "Device parameters not reported")
+    dev = prog["dev"]
+    fail("ifindex" not in dev.keys(), "Device parameters not reported")
+    fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+    fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+    if not removed and not other_ns:
+        fail("ifname" not in dev.keys(), "Ifname not reported")
+        fail(dev["ifname"] != sim["ifname"],
+             "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+    else:
+        fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+        if removed:
+            fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
+            fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
+            fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
+
 # Parse command line
 parser = argparse.ArgumentParser()
 parser.add_argument("--log", help="output verbose log to given file")
@@ -417,6 +465,12 @@ samples = ["sample_ret0.o"]
     skip(ret != 0, "sample %s/%s not found, please compile it" %
          (bpf_test_dir, s))
 
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
 try:
     obj = bpf_obj("sample_ret0.o")
     bytecode = bpf_bytecode("1,6 0 0 4294967295,")
@@ -549,6 +603,8 @@ samples = ["sample_ret0.o"]
     progs = bpftool_prog_list(expected=1)
     fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
          "Loaded program has wrong ID")
+    fail("dev" in progs[0].keys(),
+         "Device parameters reported for non-offloaded program")
 
     start_test("Test XDP prog replace with bad flags...")
     ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
@@ -673,6 +729,35 @@ samples = ["sample_ret0.o"]
     fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
          (time_diff, delay_sec))
 
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+    sim.set_xdp(obj, "offload")
+
+    start_test("Test bpftool bound info reporting (own ns)...")
+    check_dev_info(False, "")
+
+    start_test("Test bpftool bound info reporting (other ns)...")
+    ns = mknetns()
+    sim.set_ns(ns)
+    check_dev_info(True, "")
+
+    start_test("Test bpftool bound info reporting (remote ns)...")
+    check_dev_info(False, ns)
+
+    start_test("Test bpftool bound info reporting (back to own ns)...")
+    sim.set_ns("")
+    check_dev_info(False, "")
+
+    pin_prog("/sys/fs/bpf/tmp")
+    sim.remove()
+
+    start_test("Test bpftool bound info reporting (removed dev)...")
+    check_dev_info(True, "", removed=True)
+
     print("%s: OK" % (os.path.basename(__file__)))
 
 finally:
-- 
2.15.1

^ permalink raw reply related

* [PATCH net-next v4 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events
From: Masami Hiramatsu @ 2017-12-20  4:14 UTC (permalink / raw)
  To: Ingo Molnar, David S . Miller, Ian McDonald, Vlad Yasevich,
	Stephen Hemminger, Steven Rostedt
  Cc: Peter Zijlstra, Thomas Gleixner, LKML, H . Peter Anvin,
	Gerrit Renker, Neil Horman, dccp, netdev, linux-sctp,
	Stephen Rothwell, mhiramat

Hi,

This series is v4 of the replacement of jprobe usage with trace
events. This version is rebased on net-next, fixes a build warning
and moves a temporal variable definition in a block.

Previous version is here;
https://lkml.org/lkml/2017/12/19/153

Changes from v3:
  All: Rebased on net-next
  [3/6]: fixes a build warning for i386 by casting pointer unsigned
        long instead of __u64, and moves a temporal variable
         definition in a block.

Thank you,

---

Masami Hiramatsu (6):
      net: tcp: Add trace events for TCP congestion window tracing
      net: tcp: Remove TCP probe module
      net: sctp: Add SCTP ACK tracking trace event
      net: sctp: Remove debug SCTP probe module
      net: dccp: Add DCCP sendmsg trace event
      net: dccp: Remove dccpprobe module


 include/trace/events/sctp.h |   99 ++++++++++++++
 include/trace/events/tcp.h  |   80 +++++++++++
 net/Kconfig                 |   17 --
 net/dccp/Kconfig            |   17 --
 net/dccp/Makefile           |    2 
 net/dccp/probe.c            |  203 -----------------------------
 net/dccp/proto.c            |    5 +
 net/dccp/trace.h            |  105 +++++++++++++++
 net/ipv4/Makefile           |    1 
 net/ipv4/tcp_input.c        |    3 
 net/ipv4/tcp_probe.c        |  301 -------------------------------------------
 net/sctp/Kconfig            |   12 --
 net/sctp/Makefile           |    3 
 net/sctp/probe.c            |  244 -----------------------------------
 net/sctp/sm_statefuns.c     |    5 +
 15 files changed, 297 insertions(+), 800 deletions(-)
 create mode 100644 include/trace/events/sctp.h
 delete mode 100644 net/dccp/probe.c
 create mode 100644 net/dccp/trace.h
 delete mode 100644 net/ipv4/tcp_probe.c
 delete mode 100644 net/sctp/probe.c

--
Masami Hiramatsu (Linaro) <mhiramat@kernel.org>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox