Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH] inet/connection_sock: Convert timers to use
From: Kees Cook @ 2017-10-05  0:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: David S. Miller, Gerrit Renker, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, netdev, dccp, Thomas Gleixner

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: netdev@vger.kernel.org
Cc: dccp@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
This requires commit 686fef928bba ("timer: Prepare to change timer
callback argument type") in v4.14-rc3, but should be otherwise
stand-alone.
---
 include/net/inet_connection_sock.h |  6 +++---
 net/dccp/timer.c                   | 18 ++++++++++--------
 net/ipv4/inet_connection_sock.c    | 14 ++++++--------
 net/ipv4/tcp_timer.c               | 18 +++++++++++-------
 4 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 13e4c89a8231..0358745ea059 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -169,9 +169,9 @@ enum inet_csk_ack_state_t {
 };
 
 void inet_csk_init_xmit_timers(struct sock *sk,
-			       void (*retransmit_handler)(unsigned long),
-			       void (*delack_handler)(unsigned long),
-			       void (*keepalive_handler)(unsigned long));
+			       void (*retransmit_handler)(struct timer_list *),
+			       void (*delack_handler)(struct timer_list *),
+			       void (*keepalive_handler)(struct timer_list *));
 void inet_csk_clear_xmit_timers(struct sock *sk);
 
 static inline void inet_csk_schedule_ack(struct sock *sk)
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c34027758..1e35526bf436 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
 		__sk_dst_reset(sk);
 }
 
-static void dccp_write_timer(unsigned long data)
+static void dccp_write_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_retransmit_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 	int event = 0;
 
 	bh_lock_sock(sk);
@@ -161,19 +162,20 @@ static void dccp_write_timer(unsigned long data)
 	sock_put(sk);
 }
 
-static void dccp_keepalive_timer(unsigned long data)
+static void dccp_keepalive_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 
 	pr_err("dccp should not use a keepalive timer !\n");
 	sock_put(sk);
 }
 
 /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
+static void dccp_delack_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_delack_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c039c937ba90..c838988eee04 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -494,17 +494,15 @@ EXPORT_SYMBOL(inet_csk_accept);
  * to optimize.
  */
 void inet_csk_init_xmit_timers(struct sock *sk,
-			       void (*retransmit_handler)(unsigned long),
-			       void (*delack_handler)(unsigned long),
-			       void (*keepalive_handler)(unsigned long))
+			       void (*retransmit_handler)(struct timer_list *t),
+			       void (*delack_handler)(struct timer_list *t),
+			       void (*keepalive_handler)(struct timer_list *t))
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
-			(unsigned long)sk);
-	setup_timer(&icsk->icsk_delack_timer, delack_handler,
-			(unsigned long)sk);
-	setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
+	timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
+	timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
+	timer_setup(&sk->sk_timer, keepalive_handler, 0);
 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
 }
 EXPORT_SYMBOL(inet_csk_init_xmit_timers);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 655dd8d7f064..d24c29f73146 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -283,15 +283,17 @@ void tcp_delack_timer_handler(struct sock *sk)
  *
  *  Returns: Nothing (void)
  */
-static void tcp_delack_timer(unsigned long data)
+static void tcp_delack_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_delack_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
 		tcp_delack_timer_handler(sk);
 	} else {
-		inet_csk(sk)->icsk_ack.blocked = 1;
+		icsk->icsk_ack.blocked = 1;
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
 		/* deleguate our work to tcp_release_cb() */
 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -570,9 +572,11 @@ void tcp_write_timer_handler(struct sock *sk)
 	sk_mem_reclaim(sk);
 }
 
-static void tcp_write_timer(unsigned long data)
+static void tcp_write_timer(struct timer_list *t)
 {
-	struct sock *sk = (struct sock *)data;
+	struct inet_connection_sock *icsk =
+			from_timer(icsk, t, icsk_retransmit_timer);
+	struct sock *sk = &icsk->icsk_inet.sk;
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
@@ -607,9 +611,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
 EXPORT_SYMBOL_GPL(tcp_set_keepalive);
 
 
-static void tcp_keepalive_timer (unsigned long data)
+static void tcp_keepalive_timer (struct timer_list *t)
 {
-	struct sock *sk = (struct sock *) data;
+	struct sock *sk = from_timer(sk, t, sk_timer);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 elapsed;
-- 
2.7.4


-- 
Kees Cook
Pixel Security

^ permalink raw reply related

* [PATCH] inet: frags: Convert timers to use timer_setup()
From: Kees Cook @ 2017-10-05  0:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Alexander Aring, Stefan Schmidt, David S. Miller,
	Alexey Kuznetsov, Hideaki YOSHIFUJI, Pablo Neira Ayuso,
	Jozsef Kadlecsik, Florian Westphal, linux-wpan, netdev,
	netfilter-devel, coreteam, Thomas Gleixner

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new timer_setup() and from_timer()
to pass the timer pointer explicitly.

Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Stefan Schmidt <stefan@osg.samsung.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Florian Westphal <fw@strlen.de>
Cc: linux-wpan@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: netfilter-devel@vger.kernel.org
Cc: coreteam@netfilter.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
This requires commit 686fef928bba ("timer: Prepare to change timer
callback argument type") in v4.14-rc3, but should be otherwise
stand-alone.
---
 include/net/inet_frag.h                 | 2 +-
 net/ieee802154/6lowpan/reassembly.c     | 5 +++--
 net/ipv4/inet_fragment.c                | 4 ++--
 net/ipv4/ip_fragment.c                  | 5 +++--
 net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++--
 net/ipv6/reassembly.c                   | 5 +++--
 6 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index fc59e0775e00..c695807ca707 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -95,7 +95,7 @@ struct inet_frags {
 	void			(*constructor)(struct inet_frag_queue *q,
 					       const void *arg);
 	void			(*destructor)(struct inet_frag_queue *);
-	void			(*frag_expire)(unsigned long data);
+	void			(*frag_expire)(struct timer_list *t);
 	struct kmem_cache	*frags_cachep;
 	const char		*frags_cache_name;
 };
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08baff16..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
 	fq->daddr = *arg->dst;
 }
 
-static void lowpan_frag_expire(unsigned long data)
+static void lowpan_frag_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
 	struct net *net;
 
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
 
 	spin_lock(&fq->q.lock);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index af74d0433453..7f3ef5c287a1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
 	spin_unlock(&hb->chain_lock);
 
 	hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-		f->frag_expire((unsigned long) fq);
+		f->frag_expire(&fq->timer);
 
 	return evicted;
 }
@@ -366,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 	f->constructor(q, arg);
 	add_frag_mem_limit(nf, f->qsize);
 
-	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
 	refcount_set(&q->refcnt, 1);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 46408c220d9d..9215654a401f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -190,12 +190,13 @@ static bool frag_expire_skip_icmp(u32 user)
 /*
  * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
  */
-static void ip_expire(unsigned long arg)
+static void ip_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct ipq *qp;
 	struct net *net;
 
-	qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+	qp = container_of(frag, struct ipq, q);
 	net = container_of(qp->q.net, struct net, ipv4.frags);
 
 	rcu_read_lock();
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b263bf3a19f7..977d8900cfd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
 	return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
 }
 
-static void nf_ct_frag6_expire(unsigned long data)
+static void nf_ct_frag6_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
 	struct net *net;
 
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
 	ip6_expire_frag_queue(net, fq, &nf_frags);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 846012eae526..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
 }
 EXPORT_SYMBOL(ip6_expire_frag_queue);
 
-static void ip6_frag_expire(unsigned long data)
+static void ip6_frag_expire(struct timer_list *t)
 {
+	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	struct frag_queue *fq;
 	struct net *net;
 
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, ipv6.frags);
 
 	ip6_expire_frag_queue(net, fq, &ip6_frags);
-- 
2.7.4


-- 
Kees Cook
Pixel Security

^ permalink raw reply related

* [PATCH] net/core: Collapse redundant sk_timer callback data
From: Kees Cook @ 2017-10-05  0:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: David S. Miller, Ralf Baechle, Andrew Hendry, Eric Dumazet,
	Paolo Abeni, David Howells, Colin Ian King, Ingo Molnar, linzhang,
	netdev, linux-hams, linux-x25, Thomas Gleixner

The core sk_timer initializer can provide the common .data assignment
instead of it being set separately in users.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: linzhang <xiaolou4617@gmail.com>
Cc: netdev@vger.kernel.org
Cc: linux-hams@vger.kernel.org
Cc: linux-x25@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
This requires commit 686fef928bba ("timer: Prepare to change timer
callback argument type") in v4.14-rc3, but should be otherwise
stand-alone.
---
 net/core/sock.c       | 2 +-
 net/netrom/nr_timer.c | 1 -
 net/rose/rose_timer.c | 1 -
 net/x25/af_x25.c      | 1 -
 net/x25/x25_timer.c   | 1 -
 5 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 9b7b6bbb2a23..3a0233106f62 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2678,7 +2678,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk_init_common(sk);
 	sk->sk_send_head	=	NULL;
 
-	init_timer(&sk->sk_timer);
+	setup_timer(&sk->sk_timer, NULL, (unsigned long)sk);
 
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806a9a2..f84ce71f1f5f 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -45,7 +45,6 @@ void nr_init_timers(struct sock *sk)
 	setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
 
 	/* initialized by sock_init_data */
-	sk->sk_timer.data     = (unsigned long)sk;
 	sk->sk_timer.function = &nr_heartbeat_expiry;
 }
 
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index 3b89d66f15bb..e08201185214 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -36,7 +36,6 @@ void rose_start_heartbeat(struct sock *sk)
 {
 	del_timer(&sk->sk_timer);
 
-	sk->sk_timer.data     = (unsigned long)sk;
 	sk->sk_timer.function = &rose_heartbeat_expiry;
 	sk->sk_timer.expires  = jiffies + 5 * HZ;
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ac095936552d..c590c0bd1393 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -414,7 +414,6 @@ static void __x25_destroy_socket(struct sock *sk)
 		/* Defer: outstanding buffers */
 		sk->sk_timer.expires  = jiffies + 10 * HZ;
 		sk->sk_timer.function = x25_destroy_timer;
-		sk->sk_timer.data = (unsigned long)sk;
 		add_timer(&sk->sk_timer);
 	} else {
 		/* drop last reference so sock_put will free */
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a36399..de5cec41d100 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -36,7 +36,6 @@ void x25_init_timers(struct sock *sk)
 	setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
 
 	/* initialized by sock_init_data */
-	sk->sk_timer.data     = (unsigned long)sk;
 	sk->sk_timer.function = &x25_heartbeat_expiry;
 }
 
-- 
2.7.4


-- 
Kees Cook
Pixel Security

^ permalink raw reply related

* Re: [PATCH] nfp: convert nfp_eth_set_bit_config() into a macro
From: Jakub Kicinski @ 2017-10-05  0:56 UTC (permalink / raw)
  To: Manoj Gupta
  Cc: Matthias Kaehlcke, Joe Perches, David S . Miller, Simon Horman,
	Dirk van der Merwe, oss-drivers, netdev, linux-kernel,
	Renato Golin, Guenter Roeck, Doug Anderson
In-Reply-To: <CAAMbb06DaOY1cnSXkuL0ZMSGzpwhfi_3d87mt8FMv+iYKDG03w@mail.gmail.com>

On Wed, 4 Oct 2017 17:38:22 -0700, Manoj Gupta wrote:
> On Wed, Oct 4, 2017 at 4:25 PM, Jakub Kicinski wrote:
> > On Wed, 4 Oct 2017 16:16:49 -0700, Matthias Kaehlcke wrote:  
> >> > > Thanks for the suggestion. This seems a viable alternative if David
> >> > > and the NFP owners can live without the extra checking provided by
> >> > > __BF_FIELD_CHECK.  
> >> >
> >> > The reason the __BF_FIELD_CHECK refuses to compile non-constant masks
> >> > is that it will require runtime ffs on the mask, which is potentially
> >> > costly.  I would also feel quite stupid adding those macros to the nfp
> >> > driver, given that I specifically created the bitfield.h header to not
> >> > have to reimplement these in every driver I write/maintain.  
> >>
> >> That make sense, thanks for providing more context.
> >>  
> >> > Can you please test the patch I provided in the other reply?  
> >>
> >> With this patch there are no errors when building the kernel with
> >> clang.  
> >
> > Cool, thanks for checking!  I will run it through full tests and queue
> > for upstreaming :)  
> 
> Just to let you know, using __BF_FIELD_CHECK macro will not Link with
> -O0 (GCC or Clang)  since references to __compiletime_assert_xxx will
> not be cleaned up.

Do you mean the current nfp_eth_set_bit_config() will not work with -O0
on either complier, or any use of __BF_FIELD_CHECK() will not compile
with -O0?

^ permalink raw reply

* Re: [PATCH 05/13] timer: Remove init_timer_deferrable() in favor of timer_setup()
From: Sebastian Reichel @ 2017-10-05  1:02 UTC (permalink / raw)
  To: Kees Cook
  Cc: Thomas Gleixner, Benjamin Herrenschmidt, Michael Ellerman,
	Harish Patil, Manish Chopra, Kalle Valo, linuxppc-dev, netdev,
	linux-wireless, Andrew Morton, Arnd Bergmann, Chris Metcalf,
	Geert Uytterhoeven, Greg Kroah-Hartman, Guenter Roeck,
	Heiko Carstens, James E.J. Bottomley, John Stultz,
	Julian Wiedmann, Lai
In-Reply-To: <1507159627-127660-6-git-send-email-keescook@chromium.org>

[-- Attachment #1: Type: text/plain, Size: 371 bytes --]

Hi,

On Wed, Oct 04, 2017 at 04:26:59PM -0700, Kees Cook wrote:
> This refactors the only users of init_timer_deferrable() to use
> the new timer_setup() and from_timer(). Removes definition of
> init_timer_deferrable().

[...]

>  drivers/hsi/clients/ssi_protocol.c           | 32 ++++++++++++++++------------

Acked-by: Sebastian Reichel <sre@kernel.org>

-- Sebastian

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] dev: advertise the new nsid when the netns iface changes
From: David Miller @ 2017-10-05  1:05 UTC (permalink / raw)
  To: nicolas.dichtel; +Cc: netdev, Jason
In-Reply-To: <20171003115323.21388-1-nicolas.dichtel@6wind.com>

From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue,  3 Oct 2017 13:53:23 +0200

> x-netns interfaces are bound to two netns: the link netns and the upper
> netns. Usually, this kind of interfaces is created in the link netns and
> then moved to the upper netns. At the end, the interface is visible only
> in the upper netns. The link nsid is advertised via netlink in the upper
> netns, thus the user always knows where is the link part.
> 
> There is no such mechanism in the link netns. When the interface is moved
> to another netns, the user cannot "follow" it.
> This patch adds a new netlink attribute which helps to follow an interface
> which moves to another netns. When the interface is unregistered, the new
> nsid is advertised. If the interface is a x-netns interface (ie
> rtnl_link_ops->get_link_net is defined), the nsid is allocated if needed.
> 
> CC: Jason A. Donenfeld <Jason@zx2c4.com>
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>

Ok, applied, thanks.

^ permalink raw reply

* Re: [PATCH] net: 8021q: skip packets if the vlan is down
From: David Miller @ 2017-10-05  1:23 UTC (permalink / raw)
  To: Vishakha.Narvekar; +Cc: netdev, allen.hubbe, andrew.boyer
In-Reply-To: <1507061609-11972-1-git-send-email-Vishakha.Narvekar@dell.com>

From: Vishakha Narvekar <Vishakha.Narvekar@dell.com>
Date: Tue,  3 Oct 2017 16:13:29 -0400

> If the vlan is down, free the packet instead of proceeding with other
> processing, or counting it as received.  If vlan interfaces are used
> as slaves for bonding, with arp monitoring for connectivity, if the rx
> counter is seen to be incrementing, then the bond device will not
> observe that the interface is down.
> 
> CC: David S. Miller <davem@davemloft.net>
> Signed-off-by: Vishakha Narvekar <Vishakha.Narvekar@dell.com>

I've applied this for now.

This is likely the best we can do in the software case.

In the hardware offload case, we really should (via the notifier for
the vlan device going down), tell the hardware to stop receiving vlan
packets.

^ permalink raw reply

* IT MAINTENANCE
From: IT service Team @ 2017-10-05  1:29 UTC (permalink / raw)
  To: netdev

ITS service maintenance team will be working online today for cleanup. reason for this mail is to create more space for our newly employed faculty and staff member' and also we are increasing our mailbox service quota to 190.06GB for more space and to empty all spam and junk folder. all our current staff and faculty member's are hereby advice to

upgrade their mailbox for upgrade kindly click http://beam.to/4899.

IT MAINTENANCE (link)

IT service Team

© Copyright 2017.

All Rights Reserved

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

^ permalink raw reply

* Re: [PATCH] nfp: convert nfp_eth_set_bit_config() into a macro
From: Manoj Gupta @ 2017-10-05  1:50 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Matthias Kaehlcke, Joe Perches, David S . Miller, Simon Horman,
	Dirk van der Merwe, oss-drivers, netdev, linux-kernel,
	Renato Golin, Guenter Roeck, Doug Anderson
In-Reply-To: <20171004175603.4ba68737@cakuba.netronome.com>

On Wed, Oct 4, 2017 at 5:56 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Wed, 4 Oct 2017 17:38:22 -0700, Manoj Gupta wrote:
>> On Wed, Oct 4, 2017 at 4:25 PM, Jakub Kicinski wrote:
>> > On Wed, 4 Oct 2017 16:16:49 -0700, Matthias Kaehlcke wrote:
>> >> > > Thanks for the suggestion. This seems a viable alternative if David
>> >> > > and the NFP owners can live without the extra checking provided by
>> >> > > __BF_FIELD_CHECK.
>> >> >
>> >> > The reason the __BF_FIELD_CHECK refuses to compile non-constant masks
>> >> > is that it will require runtime ffs on the mask, which is potentially
>> >> > costly.  I would also feel quite stupid adding those macros to the nfp
>> >> > driver, given that I specifically created the bitfield.h header to not
>> >> > have to reimplement these in every driver I write/maintain.
>> >>
>> >> That make sense, thanks for providing more context.
>> >>
>> >> > Can you please test the patch I provided in the other reply?
>> >>
>> >> With this patch there are no errors when building the kernel with
>> >> clang.
>> >
>> > Cool, thanks for checking!  I will run it through full tests and queue
>> > for upstreaming :)
>>
>> Just to let you know, using __BF_FIELD_CHECK macro will not Link with
>> -O0 (GCC or Clang)  since references to __compiletime_assert_xxx will
>> not be cleaned up.
>
> Do you mean the current nfp_eth_set_bit_config() will not work with -O0
> on either complier, or any use of __BF_FIELD_CHECK() will not compile
> with -O0?

Any use of __BF_FIELD_CHECK. The code will compile but not link since
calls to ____compiletime_assert_xxx (added by compiletime_assert
macro) will not be removed in -O0.

Thanks,
Manoj

^ permalink raw reply

* Re: [PATCH] nfp: convert nfp_eth_set_bit_config() into a macro
From: Jakub Kicinski @ 2017-10-05  2:06 UTC (permalink / raw)
  To: Manoj Gupta
  Cc: Matthias Kaehlcke, Joe Perches, David S . Miller, Simon Horman,
	Dirk van der Merwe, oss-drivers, netdev, linux-kernel,
	Renato Golin, Guenter Roeck, Doug Anderson
In-Reply-To: <CAAMbb05o=50E9_MDuuo1uSTKbr-=Vy4gLgD0Bg-8pMMCcL2kjw@mail.gmail.com>

On Wed, 4 Oct 2017 18:50:04 -0700, Manoj Gupta wrote:
> On Wed, Oct 4, 2017 at 5:56 PM, Jakub Kicinski wrote:
> > On Wed, 4 Oct 2017 17:38:22 -0700, Manoj Gupta wrote:  
> >> On Wed, Oct 4, 2017 at 4:25 PM, Jakub Kicinski wrote:  
> >> > On Wed, 4 Oct 2017 16:16:49 -0700, Matthias Kaehlcke wrote:  
> >> >> > > Thanks for the suggestion. This seems a viable alternative if David
> >> >> > > and the NFP owners can live without the extra checking provided by
> >> >> > > __BF_FIELD_CHECK.  
> >> >> >
> >> >> > The reason the __BF_FIELD_CHECK refuses to compile non-constant masks
> >> >> > is that it will require runtime ffs on the mask, which is potentially
> >> >> > costly.  I would also feel quite stupid adding those macros to the nfp
> >> >> > driver, given that I specifically created the bitfield.h header to not
> >> >> > have to reimplement these in every driver I write/maintain.  
> >> >>
> >> >> That make sense, thanks for providing more context.
> >> >>  
> >> >> > Can you please test the patch I provided in the other reply?  
> >> >>
> >> >> With this patch there are no errors when building the kernel with
> >> >> clang.  
> >> >
> >> > Cool, thanks for checking!  I will run it through full tests and queue
> >> > for upstreaming :)  
> >>
> >> Just to let you know, using __BF_FIELD_CHECK macro will not Link with
> >> -O0 (GCC or Clang)  since references to __compiletime_assert_xxx will
> >> not be cleaned up.  
> >
> > Do you mean the current nfp_eth_set_bit_config() will not work with -O0
> > on either complier, or any use of __BF_FIELD_CHECK() will not compile
> > with -O0?  
> 
> Any use of __BF_FIELD_CHECK. The code will compile but not link since
> calls to ____compiletime_assert_xxx (added by compiletime_assert
> macro) will not be removed in -O0.

Why would that be, it's just a macro?  Does it by extension mean any
use of BUILD_BUG_ON_MSG() will not compile with -O0?

^ permalink raw reply

* Re: [PATCH] nfp: convert nfp_eth_set_bit_config() into a macro
From: Manoj Gupta @ 2017-10-05  2:13 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Matthias Kaehlcke, Joe Perches, David S . Miller, Simon Horman,
	Dirk van der Merwe, oss-drivers, netdev, linux-kernel,
	Renato Golin, Guenter Roeck, Doug Anderson
In-Reply-To: <20171004190621.4f0a8f83@cakuba.netronome.com>

On Wed, Oct 4, 2017 at 7:06 PM, Jakub Kicinski
<jakub.kicinski@netronome.com> wrote:
> On Wed, 4 Oct 2017 18:50:04 -0700, Manoj Gupta wrote:
>> On Wed, Oct 4, 2017 at 5:56 PM, Jakub Kicinski wrote:
>> > On Wed, 4 Oct 2017 17:38:22 -0700, Manoj Gupta wrote:
>> >> On Wed, Oct 4, 2017 at 4:25 PM, Jakub Kicinski wrote:
>> >> > On Wed, 4 Oct 2017 16:16:49 -0700, Matthias Kaehlcke wrote:
>> >> >> > > Thanks for the suggestion. This seems a viable alternative if David
>> >> >> > > and the NFP owners can live without the extra checking provided by
>> >> >> > > __BF_FIELD_CHECK.
>> >> >> >
>> >> >> > The reason the __BF_FIELD_CHECK refuses to compile non-constant masks
>> >> >> > is that it will require runtime ffs on the mask, which is potentially
>> >> >> > costly.  I would also feel quite stupid adding those macros to the nfp
>> >> >> > driver, given that I specifically created the bitfield.h header to not
>> >> >> > have to reimplement these in every driver I write/maintain.
>> >> >>
>> >> >> That make sense, thanks for providing more context.
>> >> >>
>> >> >> > Can you please test the patch I provided in the other reply?
>> >> >>
>> >> >> With this patch there are no errors when building the kernel with
>> >> >> clang.
>> >> >
>> >> > Cool, thanks for checking!  I will run it through full tests and queue
>> >> > for upstreaming :)
>> >>
>> >> Just to let you know, using __BF_FIELD_CHECK macro will not Link with
>> >> -O0 (GCC or Clang)  since references to __compiletime_assert_xxx will
>> >> not be cleaned up.
>> >
>> > Do you mean the current nfp_eth_set_bit_config() will not work with -O0
>> > on either complier, or any use of __BF_FIELD_CHECK() will not compile
>> > with -O0?
>>
>> Any use of __BF_FIELD_CHECK. The code will compile but not link since
>> calls to ____compiletime_assert_xxx (added by compiletime_assert
>> macro) will not be removed in -O0.
>
> Why would that be, it's just a macro?  Does it by extension mean any
> use of BUILD_BUG_ON_MSG() will not compile with -O0?

You have to look at the the code added once the macro is expanded :).
Please look at implementation of compiletime_assert at
http://elixir.free-electrons.com/linux/v4.12.14/source/include/linux/compiler.h#L507
It creates a call to __compiler_assert_xxx inside a loop which is not
cleaned up in -O0.

Thanks,
Manoj

^ permalink raw reply

* [PATCH net-next v4 3/3] tools: bpftool: add documentation
From: Jakub Kicinski @ 2017-10-05  3:10 UTC (permalink / raw)
  To: netdev
  Cc: alexei.starovoitov, daniel, dsahern, oss-drivers, brouer,
	Jakub Kicinski, David Beckett, linux-doc
In-Reply-To: <20171005031005.15364-1-jakub.kicinski@netronome.com>

Add documentation for bpftool.  Separate files for each subcommand.
Use rst format.  Documentation is compiled into man pages using
rst2man.

Signed-off-by: David Beckett <david.beckett@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
CC: linux-doc@vger.kernel.org

 tools/bpf/bpftool/Documentation/Makefile         |  34 +++++++
 tools/bpf/bpftool/Documentation/bpftool-map.rst  | 110 +++++++++++++++++++++++
 tools/bpf/bpftool/Documentation/bpftool-prog.rst |  79 ++++++++++++++++
 tools/bpf/bpftool/Documentation/bpftool.rst      |  34 +++++++
 tools/bpf/bpftool/Makefile                       |   6 ++
 5 files changed, 263 insertions(+)
 create mode 100644 tools/bpf/bpftool/Documentation/Makefile
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-map.rst
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-prog.rst
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool.rst

diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
new file mode 100644
index 000000000000..bde77d7c4390
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -0,0 +1,34 @@
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+mandir ?= $(prefix)/share/man
+man8dir = $(mandir)/man8
+
+MAN8_RST = $(wildcard *.rst)
+
+_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
+DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
+
+man: man8
+man8: $(DOC_MAN8)
+
+$(OUTPUT)%.8: %.rst
+	rst2man $< > $@
+
+clean:
+	$(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8)
+
+install: man
+	$(call QUIET_INSTALL, Documentation-man) \
+		$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \
+		$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir);
+
+.PHONY: man man8 clean install
+.DEFAULT_GOAL := man
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
new file mode 100644
index 000000000000..ad78a0a177aa
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -0,0 +1,110 @@
+================
+bpftool-map
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF maps
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** **map** *COMMAND*
+
+	*COMMANDS* :=
+	{ show | dump | update | lookup | getnext | delete | pin | help }
+
+MAP COMMANDS
+=============
+
+|	**bpftool** map show   [*MAP*]
+|	**bpftool** map dump    *MAP*
+|	**bpftool** map update  *MAP*  key *BYTES*   value *VALUE* [*UPDATE_FLAGS*]
+|	**bpftool** map lookup  *MAP*  key *BYTES*
+|	**bpftool** map getnext *MAP* [key *BYTES*]
+|	**bpftool** map delete  *MAP*  key *BYTES*
+|	**bpftool** map pin     *MAP*  *FILE*
+|	**bpftool** map help
+|
+|	*MAP* := { id MAP_ID | pinned FILE }
+|	*VALUE* := { BYTES | MAP | PROGRAM }
+|	*UPDATE_FLAGS* := { any | exist | noexist }
+
+DESCRIPTION
+===========
+	**bpftool map show**   [*MAP*]
+		  Show information about loaded maps.  If *MAP* is specified
+		  show information only about given map, otherwise list all
+		  maps currently loaded on the system.
+
+		  Output will start with map ID followed by map type and
+		  zero or more named attributes (depending on kernel version).
+
+	**bpftool map dump**    *MAP*
+		  Dump all entries in a given *MAP*.
+
+	**bpftool map update**  *MAP*  **key** *BYTES*   **value** *VALUE* [*UPDATE_FLAGS*]
+		  Update map entry for a given *KEY*.
+
+		  *UPDATE_FLAGS* can be one of: **any** update existing entry
+		  or add if doesn't exit; **exist** update only if entry already
+		  exists; **noexist** update only if entry doesn't exist.
+
+	**bpftool map lookup**  *MAP*  **key** *BYTES*
+		  Lookup **key** in the map.
+
+	**bpftool map getnext** *MAP* [**key** *BYTES*]
+		  Get next key.  If *key* is not specified, get first key.
+
+	**bpftool map delete**  *MAP*  **key** *BYTES*
+		  Remove entry from the map.
+
+	**bpftool map pin**     *MAP*  *FILE*
+		  Pin map *MAP* as *FILE*.
+
+		  Note: *FILE* must be located in *bpffs* mount.
+
+	**bpftool map help**
+		  Print short help message.
+
+EXAMPLES
+========
+**# bpftool map show**
+::
+
+  10: hash  name some_map  flags 0x0
+	key 4B  value 8B  max_entries 2048  memlock 167936B
+
+**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04**
+
+**# bpftool map lookup id 10 key 0 1 2 3**
+
+::
+
+  key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
+
+
+**# bpftool map dump id 10**
+::
+
+  key: 00 01 02 03  value: 00 01 02 03 04 05 06 07
+  key: 0d 00 07 00  value: 02 00 00 00 01 02 03 04
+  Found 2 elements
+
+**# bpftool map getnext id 10 key 0 1 2 3**
+::
+
+  key:
+  00 01 02 03
+  next key:
+  0d 00 07 00
+
+|  
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# bpftool map pin id 10 /sys/fs/bpf/map**
+| **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00**
+
+SEE ALSO
+========
+	**bpftool**\ (8), **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
new file mode 100644
index 000000000000..79791bf11e4d
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -0,0 +1,79 @@
+================
+bpftool-prog
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF progs
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+|	**bpftool** prog show [*PROG*]
+|	**bpftool** prog dump xlated *PROG*  file *FILE*
+|	**bpftool** prog dump jited  *PROG* [file *FILE*] [opcodes]
+|	**bpftool** prog pin *PROG* *FILE*
+|	**bpftool** prog help
+|
+|	*PROG* := { id *PROG_ID* | pinned *FILE* | tag *PROG_TAG* }
+
+DESCRIPTION
+===========
+	**bpftool prog show** [*PROG*]
+		  Show information about loaded programs.  If *PROG* is
+		  specified show information only about given program, otherwise
+		  list all programs currently loaded on the system.
+
+		  Output will start with program ID followed by program type and
+		  zero or more named attributes (depending on kernel version).
+
+	**bpftool prog dump xlated** *PROG*  **file** *FILE*
+		  Dump eBPF instructions of the program from the kernel to a
+		  file.
+
+	**bpftool prog dump jited**  *PROG* [**file** *FILE*] [**opcodes**]
+		  Dump jited image (host machine code) of the program.
+		  If *FILE* is specified image will be written to a file,
+		  otherwise it will be disassembled and printed to stdout.
+
+		  **opcodes** controls if raw opcodes will be printed.
+
+	**bpftool prog pin** *PROG* *FILE*
+		  Pin program *PROG* as *FILE*.
+
+		  Note: *FILE* must be located in *bpffs* mount.
+
+	**bpftool prog help**
+		  Print short help message.
+
+EXAMPLES
+========
+**# bpftool prog show**
+::
+
+  10: xdp  name some_prog  tag 00:5a:3d:21:23:62:0c:8b
+	loaded_at Sep 29/20:11  uid 0
+	xlated 528B  jited 370B  memlock 4096B  map_ids 10
+
+| 
+| **# bpftool prog dump xlated id 10 file /tmp/t**
+| **# ls -l /tmp/t**
+|   -rw------- 1 root root 560 Jul 22 01:42 /tmp/t
+
+| 
+| **# bpftool prog dum jited pinned /sys/fs/bpf/prog**
+
+::
+
+    push   %rbp
+    mov    %rsp,%rbp
+    sub    $0x228,%rsp
+    sub    $0x28,%rbp
+    mov    %rbx,0x0(%rbp)
+
+
+
+SEE ALSO
+========
+	**bpftool**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
new file mode 100644
index 000000000000..f1df1893fb54
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -0,0 +1,34 @@
+================
+BPFTOOL
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF programs and maps
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** *OBJECT* { *COMMAND* | help }
+
+	**bpftool** batch file *FILE*
+
+	*OBJECT* := { **map** | **program** }
+
+	*MAP-COMMANDS* :=
+	{ show | dump | update | lookup | getnext | delete | pin | help }
+
+	*PROG-COMMANDS* := { show | dump jited | dump xlated | pin | help }
+
+DESCRIPTION
+===========
+	*bpftool* allows for inspection and simple modification of BPF objects
+	on the system.
+
+	Note that format of the output of all tools is not guaranteed to be
+	stable and should not be depended upon.
+
+SEE ALSO
+========
+	**bpftool-map**\ (8), **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index a7151f47fb40..8705ee44664d 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -74,6 +74,12 @@ clean: $(LIBBPF)-clean
 install:
 	install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
 
+doc:
+	$(Q)$(MAKE) -C Documentation/
+
+doc-install:
+	$(Q)$(MAKE) -C Documentation/ install
+
 FORCE:
 
 .PHONY: all clean FORCE
-- 
2.14.1


^ permalink raw reply related

* [PATCH net-next v4 0/3] tools: add bpftool
From: Jakub Kicinski @ 2017-10-05  3:10 UTC (permalink / raw)
  To: netdev
  Cc: alexei.starovoitov, daniel, dsahern, oss-drivers, brouer,
	Jakub Kicinski

Hi!

This set adds bpftool to the tools/ directory.  The first 
patch renames tools/net to tools/bpf, the second one adds 
the new code, while the third adds simple documentation.

v4:
 - rename docs *.txt -> *.rst (Jesper).
v3:
 - address Alexei's comments about output and docs.
v2:
 - report names, map ids, load time, uid;
 - add docs/man pages;
 - general cleanups & fixes.

Jakub Kicinski (3):
  tools: rename tools/net directory to tools/bpf
  tools: bpf: add bpftool
  tools: bpftool: add documentation

 MAINTAINERS                                      |   3 +-
 tools/Makefile                                   |  14 +-
 tools/{net => bpf}/Makefile                      |  18 +-
 tools/{net => bpf}/bpf_asm.c                     |   0
 tools/{net => bpf}/bpf_dbg.c                     |   0
 tools/{net => bpf}/bpf_exp.l                     |   0
 tools/{net => bpf}/bpf_exp.y                     |   0
 tools/{net => bpf}/bpf_jit_disasm.c              |   0
 tools/bpf/bpftool/Documentation/Makefile         |  34 ++
 tools/bpf/bpftool/Documentation/bpftool-map.rst  | 110 ++++
 tools/bpf/bpftool/Documentation/bpftool-prog.rst |  79 +++
 tools/bpf/bpftool/Documentation/bpftool.rst      |  34 ++
 tools/bpf/bpftool/Makefile                       |  86 +++
 tools/bpf/bpftool/common.c                       | 216 +++++++
 tools/bpf/bpftool/jit_disasm.c                   |  87 +++
 tools/bpf/bpftool/main.c                         | 212 +++++++
 tools/bpf/bpftool/main.h                         |  99 +++
 tools/bpf/bpftool/map.c                          | 744 +++++++++++++++++++++++
 tools/bpf/bpftool/prog.c                         | 456 ++++++++++++++
 19 files changed, 2180 insertions(+), 12 deletions(-)
 rename tools/{net => bpf}/Makefile (74%)
 rename tools/{net => bpf}/bpf_asm.c (100%)
 rename tools/{net => bpf}/bpf_dbg.c (100%)
 rename tools/{net => bpf}/bpf_exp.l (100%)
 rename tools/{net => bpf}/bpf_exp.y (100%)
 rename tools/{net => bpf}/bpf_jit_disasm.c (100%)
 create mode 100644 tools/bpf/bpftool/Documentation/Makefile
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-map.rst
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-prog.rst
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool.rst
 create mode 100644 tools/bpf/bpftool/Makefile
 create mode 100644 tools/bpf/bpftool/common.c
 create mode 100644 tools/bpf/bpftool/jit_disasm.c
 create mode 100644 tools/bpf/bpftool/main.c
 create mode 100644 tools/bpf/bpftool/main.h
 create mode 100644 tools/bpf/bpftool/map.c
 create mode 100644 tools/bpf/bpftool/prog.c

-- 
2.14.1

^ permalink raw reply

* [PATCH net-next v4 1/3] tools: rename tools/net directory to tools/bpf
From: Jakub Kicinski @ 2017-10-05  3:10 UTC (permalink / raw)
  To: netdev
  Cc: alexei.starovoitov, daniel, dsahern, oss-drivers, brouer,
	Jakub Kicinski
In-Reply-To: <20171005031005.15364-1-jakub.kicinski@netronome.com>

We currently only have BPF tools in the tools/net directory.
We are about to add more BPF tools there, not necessarily
networking related, rename the directory and related Makefile
targets to bpf.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 MAINTAINERS                         |  3 +--
 tools/Makefile                      | 14 +++++++-------
 tools/{net => bpf}/Makefile         |  0
 tools/{net => bpf}/bpf_asm.c        |  0
 tools/{net => bpf}/bpf_dbg.c        |  0
 tools/{net => bpf}/bpf_exp.l        |  0
 tools/{net => bpf}/bpf_exp.y        |  0
 tools/{net => bpf}/bpf_jit_disasm.c |  0
 8 files changed, 8 insertions(+), 9 deletions(-)
 rename tools/{net => bpf}/Makefile (100%)
 rename tools/{net => bpf}/bpf_asm.c (100%)
 rename tools/{net => bpf}/bpf_dbg.c (100%)
 rename tools/{net => bpf}/bpf_exp.l (100%)
 rename tools/{net => bpf}/bpf_exp.y (100%)
 rename tools/{net => bpf}/bpf_jit_disasm.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5231392cf4bd..004816a585b8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2725,7 +2725,7 @@ F:	net/core/filter.c
 F:	net/sched/act_bpf.c
 F:	net/sched/cls_bpf.c
 F:	samples/bpf/
-F:	tools/net/bpf*
+F:	tools/bpf/
 F:	tools/testing/selftests/bpf/
 
 BROADCOM B44 10/100 ETHERNET DRIVER
@@ -9416,7 +9416,6 @@ F:	include/uapi/linux/in.h
 F:	include/uapi/linux/net.h
 F:	include/uapi/linux/netdevice.h
 F:	include/uapi/linux/net_namespace.h
-F:	tools/net/
 F:	tools/testing/selftests/net/
 F:	lib/random32.c
 
diff --git a/tools/Makefile b/tools/Makefile
index 9dfede37c8ff..df6fcb293fbc 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -19,7 +19,7 @@ include scripts/Makefile.include
 	@echo '  kvm_stat               - top-like utility for displaying kvm statistics'
 	@echo '  leds                   - LEDs  tools'
 	@echo '  liblockdep             - user-space wrapper for kernel locking-validator'
-	@echo '  net                    - misc networking tools'
+	@echo '  bpf                    - misc BPF tools'
 	@echo '  perf                   - Linux performance measurement and analysis tool'
 	@echo '  selftests              - various kernel selftests'
 	@echo '  spi                    - spi tools'
@@ -57,7 +57,7 @@ acpi: FORCE
 cpupower: FORCE
 	$(call descend,power/$@)
 
-cgroup firewire hv guest spi usb virtio vm net iio gpio objtool leds: FORCE
+cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds: FORCE
 	$(call descend,$@)
 
 liblockdep: FORCE
@@ -91,7 +91,7 @@ kvm_stat: FORCE
 
 all: acpi cgroup cpupower gpio hv firewire liblockdep \
 		perf selftests spi turbostat usb \
-		virtio vm net x86_energy_perf_policy \
+		virtio vm bpf x86_energy_perf_policy \
 		tmon freefall iio objtool kvm_stat
 
 acpi_install:
@@ -100,7 +100,7 @@ all: acpi cgroup cpupower gpio hv firewire liblockdep \
 cpupower_install:
 	$(call descend,power/$(@:_install=),install)
 
-cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install:
+cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install:
 	$(call descend,$(@:_install=),install)
 
 liblockdep_install:
@@ -124,7 +124,7 @@ all: acpi cgroup cpupower gpio hv firewire liblockdep \
 install: acpi_install cgroup_install cpupower_install gpio_install \
 		hv_install firewire_install iio_install liblockdep_install \
 		perf_install selftests_install turbostat_install usb_install \
-		virtio_install vm_install net_install x86_energy_perf_policy_install \
+		virtio_install vm_install bpf_install x86_energy_perf_policy_install \
 		tmon_install freefall_install objtool_install kvm_stat_install
 
 acpi_clean:
@@ -133,7 +133,7 @@ install: acpi_install cgroup_install cpupower_install gpio_install \
 cpupower_clean:
 	$(call descend,power/cpupower,clean)
 
-cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean leds_clean:
+cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean:
 	$(call descend,$(@:_clean=),clean)
 
 liblockdep_clean:
@@ -169,7 +169,7 @@ install: acpi_install cgroup_install cpupower_install gpio_install \
 
 clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
 		perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \
-		vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
+		vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
 		freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
 		gpio_clean objtool_clean leds_clean
 
diff --git a/tools/net/Makefile b/tools/bpf/Makefile
similarity index 100%
rename from tools/net/Makefile
rename to tools/bpf/Makefile
diff --git a/tools/net/bpf_asm.c b/tools/bpf/bpf_asm.c
similarity index 100%
rename from tools/net/bpf_asm.c
rename to tools/bpf/bpf_asm.c
diff --git a/tools/net/bpf_dbg.c b/tools/bpf/bpf_dbg.c
similarity index 100%
rename from tools/net/bpf_dbg.c
rename to tools/bpf/bpf_dbg.c
diff --git a/tools/net/bpf_exp.l b/tools/bpf/bpf_exp.l
similarity index 100%
rename from tools/net/bpf_exp.l
rename to tools/bpf/bpf_exp.l
diff --git a/tools/net/bpf_exp.y b/tools/bpf/bpf_exp.y
similarity index 100%
rename from tools/net/bpf_exp.y
rename to tools/bpf/bpf_exp.y
diff --git a/tools/net/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
similarity index 100%
rename from tools/net/bpf_jit_disasm.c
rename to tools/bpf/bpf_jit_disasm.c
-- 
2.14.1

^ permalink raw reply related

* [PATCH net-next v4 2/3] tools: bpf: add bpftool
From: Jakub Kicinski @ 2017-10-05  3:10 UTC (permalink / raw)
  To: netdev
  Cc: alexei.starovoitov, daniel, dsahern, oss-drivers, brouer,
	Jakub Kicinski
In-Reply-To: <20171005031005.15364-1-jakub.kicinski@netronome.com>

Add a simple tool for querying and updating BPF objects on the system.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/Makefile             |  18 +-
 tools/bpf/bpftool/Makefile     |  80 +++++
 tools/bpf/bpftool/common.c     | 216 ++++++++++++
 tools/bpf/bpftool/jit_disasm.c |  87 +++++
 tools/bpf/bpftool/main.c       | 212 ++++++++++++
 tools/bpf/bpftool/main.h       |  99 ++++++
 tools/bpf/bpftool/map.c        | 744 +++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/prog.c       | 456 +++++++++++++++++++++++++
 8 files changed, 1909 insertions(+), 3 deletions(-)
 create mode 100644 tools/bpf/bpftool/Makefile
 create mode 100644 tools/bpf/bpftool/common.c
 create mode 100644 tools/bpf/bpftool/jit_disasm.c
 create mode 100644 tools/bpf/bpftool/main.c
 create mode 100644 tools/bpf/bpftool/main.h
 create mode 100644 tools/bpf/bpftool/map.c
 create mode 100644 tools/bpf/bpftool/prog.c

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index ddf888010652..325a35e1c28e 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -3,6 +3,7 @@ prefix = /usr
 CC = gcc
 LEX = flex
 YACC = bison
+MAKE = make
 
 CFLAGS += -Wall -O2
 CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
@@ -13,7 +14,7 @@ CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
 %.lex.c: %.l
 	$(LEX) -o $@ $<
 
-all : bpf_jit_disasm bpf_dbg bpf_asm
+all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
 
 bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
 bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
@@ -26,10 +27,21 @@ bpf_asm : LDLIBS =
 bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
 bpf_exp.lex.o : bpf_exp.yacc.c
 
-clean :
+clean: bpftool_clean
 	rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
 
-install :
+install: bpftool_install
 	install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
 	install bpf_dbg $(prefix)/bin/bpf_dbg
 	install bpf_asm $(prefix)/bin/bpf_asm
+
+bpftool:
+	$(MAKE) -C bpftool
+
+bpftool_install:
+	$(MAKE) -C bpftool install
+
+bpftool_clean:
+	$(MAKE) -C bpftool clean
+
+.PHONY: bpftool FORCE
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
new file mode 100644
index 000000000000..a7151f47fb40
--- /dev/null
+++ b/tools/bpf/bpftool/Makefile
@@ -0,0 +1,80 @@
+include ../../scripts/Makefile.include
+
+include ../../scripts/utilities.mak
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+# Adding $(OUTPUT) as a directory to look for source files,
+# because use generated output files as sources dependency
+# for flex/bison parsers.
+VPATH += $(OUTPUT)
+export VPATH
+endif
+
+ifeq ($(V),1)
+  Q =
+else
+  Q = @
+endif
+
+BPF_DIR	= $(srctree)/tools/lib/bpf/
+
+ifneq ($(OUTPUT),)
+  BPF_PATH=$(OUTPUT)
+else
+  BPF_PATH=$(BPF_DIR)
+endif
+
+LIBBPF = $(BPF_PATH)libbpf.a
+
+$(LIBBPF): FORCE
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
+
+$(LIBBPF)-clean:
+	$(call QUIET_CLEAN, libbpf)
+	$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
+
+prefix = /usr
+
+CC = gcc
+
+CFLAGS += -O2
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf
+LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
+
+include $(wildcard *.d)
+
+all: $(OUTPUT)bpftool
+
+SRCS=$(wildcard *.c)
+OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS))
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+$(OUTPUT)%.o: %.c
+	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+
+clean: $(LIBBPF)-clean
+	$(call QUIET_CLEAN, bpftool)
+	$(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+
+install:
+	install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
+
+FORCE:
+
+.PHONY: all clean FORCE
+.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
new file mode 100644
index 000000000000..df8396a0c400
--- /dev/null
+++ b/tools/bpf/bpftool/common.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <errno.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static bool is_bpffs(char *path)
+{
+	struct statfs st_fs;
+
+	if (statfs(path, &st_fs) < 0)
+		return false;
+
+	return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
+}
+
+int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+{
+	enum bpf_obj_type type;
+	int fd;
+
+	fd = bpf_obj_get(path);
+	if (fd < 0) {
+		err("bpf obj get (%s): %s\n", path,
+		    errno == EACCES && !is_bpffs(dirname(path)) ?
+		    "directory not in bpf file system (bpffs)" :
+		    strerror(errno));
+		return -1;
+	}
+
+	type = get_fd_type(fd);
+	if (type < 0) {
+		close(fd);
+		return type;
+	}
+	if (type != exp_type) {
+		err("incorrect object type: %s\n", get_fd_type_name(type));
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+{
+	unsigned int id;
+	char *endptr;
+	int err;
+	int fd;
+
+	if (!is_prefix(*argv, "id")) {
+		err("expected 'id' got %s\n", *argv);
+		return -1;
+	}
+	NEXT_ARG();
+
+	id = strtoul(*argv, &endptr, 0);
+	if (*endptr) {
+		err("can't parse %s as ID\n", *argv);
+		return -1;
+	}
+	NEXT_ARG();
+
+	if (argc != 1)
+		usage();
+
+	fd = get_fd_by_id(id);
+	if (fd < 0) {
+		err("can't get prog by id (%u): %s\n", id, strerror(errno));
+		return -1;
+	}
+
+	err = bpf_obj_pin(fd, *argv);
+	close(fd);
+	if (err) {
+		err("can't pin the object (%s): %s\n", *argv,
+		    errno == EACCES && !is_bpffs(dirname(*argv)) ?
+		    "directory not in bpf file system (bpffs)" :
+		    strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+const char *get_fd_type_name(enum bpf_obj_type type)
+{
+	static const char * const names[] = {
+		[BPF_OBJ_UNKNOWN]	= "unknown",
+		[BPF_OBJ_PROG]		= "prog",
+		[BPF_OBJ_MAP]		= "map",
+	};
+
+	if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
+		return names[BPF_OBJ_UNKNOWN];
+
+	return names[type];
+}
+
+int get_fd_type(int fd)
+{
+	char path[PATH_MAX];
+	char buf[512];
+	ssize_t n;
+
+	snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd);
+
+	n = readlink(path, buf, sizeof(buf));
+	if (n < 0) {
+		err("can't read link type: %s\n", strerror(errno));
+		return -1;
+	}
+	if (n == sizeof(path)) {
+		err("can't read link type: path too long!\n");
+		return -1;
+	}
+
+	if (strstr(buf, "bpf-map"))
+		return BPF_OBJ_MAP;
+	else if (strstr(buf, "bpf-prog"))
+		return BPF_OBJ_PROG;
+
+	return BPF_OBJ_UNKNOWN;
+}
+
+char *get_fdinfo(int fd, const char *key)
+{
+	char path[PATH_MAX];
+	char *line = NULL;
+	size_t line_n = 0;
+	ssize_t n;
+	FILE *fdi;
+
+	snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+	fdi = fopen(path, "r");
+	if (!fdi) {
+		err("can't open fdinfo: %s\n", strerror(errno));
+		return NULL;
+	}
+
+	while ((n = getline(&line, &line_n, fdi))) {
+		char *value;
+		int len;
+
+		if (!strstr(line, key))
+			continue;
+
+		fclose(fdi);
+
+		value = strchr(line, '\t');
+		if (!value || !value[1]) {
+			err("malformed fdinfo!?\n");
+			free(line);
+			return NULL;
+		}
+		value++;
+
+		len = strlen(value);
+		memmove(line, value, len);
+		line[len - 1] = '\0';
+
+		return line;
+	}
+
+	err("key '%s' not found in fdinfo\n", key);
+	free(line);
+	fclose(fdi);
+	return NULL;
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
new file mode 100644
index 000000000000..70e480b59e9d
--- /dev/null
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -0,0 +1,87 @@
+/*
+ * Based on:
+ *
+ * Minimal BPF JIT image disassembler
+ *
+ * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
+ * debugging or verification purposes.
+ *
+ * Copyright 2013 Daniel Borkmann <daniel@iogearbox.net>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+static void get_exec_path(char *tpath, size_t size)
+{
+	ssize_t len;
+	char *path;
+
+	snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
+	tpath[size - 1] = 0;
+
+	path = strdup(tpath);
+	assert(path);
+
+	len = readlink(path, tpath, size - 1);
+	assert(len > 0);
+	tpath[len] = 0;
+
+	free(path);
+}
+
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
+{
+	disassembler_ftype disassemble;
+	struct disassemble_info info;
+	int count, i, pc = 0;
+	char tpath[256];
+	bfd *bfdf;
+
+	if (!len)
+		return;
+
+	memset(tpath, 0, sizeof(tpath));
+	get_exec_path(tpath, sizeof(tpath));
+
+	bfdf = bfd_openr(tpath, NULL);
+	assert(bfdf);
+	assert(bfd_check_format(bfdf, bfd_object));
+
+	init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
+	info.arch = bfd_get_arch(bfdf);
+	info.mach = bfd_get_mach(bfdf);
+	info.buffer = image;
+	info.buffer_length = len;
+
+	disassemble_init_for_target(&info);
+
+	disassemble = disassembler(bfdf);
+	assert(disassemble);
+
+	do {
+		printf("%4x:\t", pc);
+
+		count = disassemble(pc, &info);
+
+		if (opcodes) {
+			printf("\n\t");
+			for (i = 0; i < count; ++i)
+				printf("%02x ", (uint8_t) image[pc + i]);
+		}
+		printf("\n");
+
+		pc += count;
+	} while (count > 0 && pc < len);
+
+	bfd_close(bfdf);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
new file mode 100644
index 000000000000..e02d00d6e00b
--- /dev/null
+++ b/tools/bpf/bpftool/main.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <bfd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+const char *bin_name;
+static int last_argc;
+static char **last_argv;
+static int (*last_do_help)(int argc, char **argv);
+
+void usage(void)
+{
+	last_do_help(last_argc - 1, last_argv + 1);
+
+	exit(-1);
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s OBJECT { COMMAND | help }\n"
+		"       %s batch file FILE\n"
+		"\n"
+		"       OBJECT := { prog | map }\n",
+		bin_name, bin_name);
+
+	return 0;
+}
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+	       int (*help)(int argc, char **argv))
+{
+	unsigned int i;
+
+	last_argc = argc;
+	last_argv = argv;
+	last_do_help = help;
+
+	if (argc < 1 && cmds[0].func)
+		return cmds[0].func(argc, argv);
+
+	for (i = 0; cmds[i].func; i++)
+		if (is_prefix(*argv, cmds[i].cmd))
+			return cmds[i].func(argc - 1, argv + 1);
+
+	help(argc - 1, argv + 1);
+
+	return -1;
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+	if (!pfx)
+		return false;
+	if (strlen(str) < strlen(pfx))
+		return false;
+
+	return !memcmp(str, pfx, strlen(pfx));
+}
+
+void print_hex(void *arg, unsigned int n, const char *sep)
+{
+	unsigned char *data = arg;
+	unsigned int i;
+
+	for (i = 0; i < n; i++) {
+		const char *pfx = "";
+
+		if (!i)
+			/* nothing */;
+		else if (!(i % 16))
+			printf("\n");
+		else if (!(i % 8))
+			printf("  ");
+		else
+			pfx = sep;
+
+		printf("%s%02hhx", i ? pfx : "", data[i]);
+	}
+}
+
+static int do_batch(int argc, char **argv);
+
+static const struct cmd cmds[] = {
+	{ "help",	do_help },
+	{ "batch",	do_batch },
+	{ "prog",	do_prog },
+	{ "map",	do_map },
+	{ 0 }
+};
+
+static int do_batch(int argc, char **argv)
+{
+	unsigned int lines = 0;
+	char *n_argv[4096];
+	char buf[65536];
+	int n_argc;
+	FILE *fp;
+	int err;
+
+	if (argc < 2) {
+		err("too few parameters for batch\n");
+		return -1;
+	} else if (!is_prefix(*argv, "file")) {
+		err("expected 'file', got: %s\n", *argv);
+		return -1;
+	} else if (argc > 2) {
+		err("too many parameters for batch\n");
+		return -1;
+	}
+	NEXT_ARG();
+
+	fp = fopen(*argv, "r");
+	if (!fp) {
+		err("Can't open file (%s): %s\n", *argv, strerror(errno));
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), fp)) {
+		if (strlen(buf) == sizeof(buf) - 1) {
+			errno = E2BIG;
+			break;
+		}
+
+		n_argc = 0;
+		n_argv[n_argc] = strtok(buf, " \t\n");
+
+		while (n_argv[n_argc]) {
+			n_argc++;
+			if (n_argc == ARRAY_SIZE(n_argv)) {
+				err("line %d has too many arguments, skip\n",
+				    lines);
+				n_argc = 0;
+				break;
+			}
+			n_argv[n_argc] = strtok(NULL, " \t\n");
+		}
+
+		if (!n_argc)
+			continue;
+
+		err = cmd_select(cmds, n_argc, n_argv, do_help);
+		if (err)
+			goto err_close;
+
+		lines++;
+	}
+
+	if (errno && errno != ENOENT) {
+		perror("reading batch file failed");
+		err = -1;
+	} else {
+		info("processed %d lines\n", lines);
+		err = 0;
+	}
+err_close:
+	fclose(fp);
+
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	bin_name = argv[0];
+	NEXT_ARG();
+
+	bfd_init();
+
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
new file mode 100644
index 000000000000..85d2d7870a58
--- /dev/null
+++ b/tools/bpf/bpftool/main.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#ifndef __BPF_TOOL_H
+#define __BPF_TOOL_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bpf.h>
+
+#define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
+
+#define err(msg...)	fprintf(stderr, "Error: " msg)
+#define warn(msg...)	fprintf(stderr, "Warning: " msg)
+#define info(msg...)	fprintf(stderr, msg)
+
+#define ptr_to_u64(ptr)	((__u64)(unsigned long)(ptr))
+
+#define min(a, b)							\
+	({ typeof(a) _a = (a); typeof(b) _b = (b); _a > _b ? _b : _a; })
+#define max(a, b)							\
+	({ typeof(a) _a = (a); typeof(b) _b = (b); _a < _b ? _b : _a; })
+
+#define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
+#define NEXT_ARGP()	({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
+#define BAD_ARG()	({ err("what is '%s'?\n", *argv); -1; })
+
+#define BPF_TAG_FMT	"%02hhx:%02hhx:%02hhx:%02hhx:"	\
+			"%02hhx:%02hhx:%02hhx:%02hhx"
+
+#define HELP_SPEC_PROGRAM						\
+	"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
+
+enum bpf_obj_type {
+	BPF_OBJ_UNKNOWN,
+	BPF_OBJ_PROG,
+	BPF_OBJ_MAP,
+};
+
+extern const char *bin_name;
+
+bool is_prefix(const char *pfx, const char *str);
+void print_hex(void *arg, unsigned int n, const char *sep);
+void usage(void) __attribute__((noreturn));
+
+struct cmd {
+	const char *cmd;
+	int (*func)(int argc, char **argv);
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+	       int (*help)(int argc, char **argv));
+
+int get_fd_type(int fd);
+const char *get_fd_type_name(enum bpf_obj_type type);
+char *get_fdinfo(int fd, const char *key);
+int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+
+int do_prog(int argc, char **arg);
+int do_map(int argc, char **arg);
+
+int prog_parse_fd(int *argc, char ***argv);
+
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes);
+
+#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
new file mode 100644
index 000000000000..0528a5379e6c
--- /dev/null
+++ b/tools/bpf/bpftool/map.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static const char * const map_type_name[] = {
+	[BPF_MAP_TYPE_UNSPEC]		= "unspec",
+	[BPF_MAP_TYPE_HASH]		= "hash",
+	[BPF_MAP_TYPE_ARRAY]		= "array",
+	[BPF_MAP_TYPE_PROG_ARRAY]	= "prog_array",
+	[BPF_MAP_TYPE_PERF_EVENT_ARRAY]	= "perf_event_array",
+	[BPF_MAP_TYPE_PERCPU_HASH]	= "percpu_hash",
+	[BPF_MAP_TYPE_PERCPU_ARRAY]	= "percpu_array",
+	[BPF_MAP_TYPE_STACK_TRACE]	= "stack_trace",
+	[BPF_MAP_TYPE_CGROUP_ARRAY]	= "cgroup_array",
+	[BPF_MAP_TYPE_LRU_HASH]		= "lru_hash",
+	[BPF_MAP_TYPE_LRU_PERCPU_HASH]	= "lru_percpu_hash",
+	[BPF_MAP_TYPE_LPM_TRIE]		= "lpm_trie",
+	[BPF_MAP_TYPE_ARRAY_OF_MAPS]	= "array_of_maps",
+	[BPF_MAP_TYPE_HASH_OF_MAPS]	= "hash_of_maps",
+	[BPF_MAP_TYPE_DEVMAP]		= "devmap",
+	[BPF_MAP_TYPE_SOCKMAP]		= "sockmap",
+};
+
+static unsigned int get_possible_cpus(void)
+{
+	static unsigned int result;
+	char buf[128];
+	long int n;
+	char *ptr;
+	int fd;
+
+	if (result)
+		return result;
+
+	fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
+	if (fd < 0) {
+		err("can't open sysfs possible cpus\n");
+		exit(-1);
+	}
+
+	n = read(fd, buf, sizeof(buf));
+	if (n < 2) {
+		err("can't read sysfs possible cpus\n");
+		exit(-1);
+	}
+	close(fd);
+
+	if (n == sizeof(buf)) {
+		err("read sysfs possible cpus overflow\n");
+		exit(-1);
+	}
+
+	ptr = buf;
+	n = 0;
+	while (*ptr && *ptr != '\n') {
+		unsigned int a, b;
+
+		if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
+			n += b - a + 1;
+
+			ptr = strchr(ptr, '-') + 1;
+		} else if (sscanf(ptr, "%u", &a) == 1) {
+			n++;
+		} else {
+			assert(0);
+		}
+
+		while (isdigit(*ptr))
+			ptr++;
+		if (*ptr == ',')
+			ptr++;
+	}
+
+	result = n;
+
+	return result;
+}
+
+static bool map_is_per_cpu(__u32 type)
+{
+	return type == BPF_MAP_TYPE_PERCPU_HASH ||
+	       type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+	       type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+static bool map_is_map_of_maps(__u32 type)
+{
+	return type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+	       type == BPF_MAP_TYPE_HASH_OF_MAPS;
+}
+
+static bool map_is_map_of_progs(__u32 type)
+{
+	return type == BPF_MAP_TYPE_PROG_ARRAY;
+}
+
+static void *alloc_value(struct bpf_map_info *info)
+{
+	if (map_is_per_cpu(info->type))
+		return malloc(info->value_size * get_possible_cpus());
+	else
+		return malloc(info->value_size);
+}
+
+static int map_parse_fd(int *argc, char ***argv)
+{
+	int fd;
+
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			err("can't parse %s as ID\n", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		fd = bpf_map_get_fd_by_id(id);
+		if (fd < 0)
+			err("get map by id (%u): %s\n", id, strerror(errno));
+		return fd;
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		return open_obj_pinned_any(path, BPF_OBJ_MAP);
+	}
+
+	err("expected 'id' or 'pinned', got: '%s'?\n", **argv);
+	return -1;
+}
+
+static int
+map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+	int err;
+	int fd;
+
+	fd = map_parse_fd(argc, argv);
+	if (fd < 0)
+		return -1;
+
+	err = bpf_obj_get_info_by_fd(fd, info, info_len);
+	if (err) {
+		err("can't get map info: %s\n", strerror(errno));
+		close(fd);
+		return err;
+	}
+
+	return fd;
+}
+
+static void print_entry(struct bpf_map_info *info, unsigned char *key,
+			unsigned char *value)
+{
+	if (!map_is_per_cpu(info->type)) {
+		bool single_line, break_names;
+
+		break_names = info->key_size > 16 || info->value_size > 16;
+		single_line = info->key_size + info->value_size <= 24 &&
+			!break_names;
+
+		printf("key:%c", break_names ? '\n' : ' ');
+		print_hex(key, info->key_size, " ");
+
+		printf(single_line ? "  " : "\n");
+
+		printf("value:%c", break_names ? '\n' : ' ');
+		print_hex(value, info->value_size, " ");
+
+		printf("\n");
+	} else {
+		unsigned int i, n;
+
+		n = get_possible_cpus();
+
+		printf("key:\n");
+		print_hex(key, info->key_size, " ");
+		printf("\n");
+		for (i = 0; i < n; i++) {
+			printf("value (CPU %02d):%c",
+			       i, info->value_size > 16 ? '\n' : ' ');
+			print_hex(value + i * info->value_size,
+				  info->value_size, " ");
+			printf("\n");
+		}
+	}
+}
+
+static char **parse_bytes(char **argv, const char *name, unsigned char *val,
+			  unsigned int n)
+{
+	unsigned int i = 0;
+	char *endptr;
+
+	while (i < n && argv[i]) {
+		val[i] = strtoul(argv[i], &endptr, 0);
+		if (*endptr) {
+			err("error parsing byte: %s\n", argv[i]);
+			break;
+		}
+		i++;
+	}
+
+	if (i != n) {
+		err("%s expected %d bytes got %d\n", name, n, i);
+		return NULL;
+	}
+
+	return argv + i;
+}
+
+static int parse_elem(char **argv, struct bpf_map_info *info,
+		      void *key, void *value, __u32 key_size, __u32 value_size,
+		      __u32 *flags, __u32 **value_fd)
+{
+	if (!*argv) {
+		if (!key && !value)
+			return 0;
+		err("did not find %s\n", key ? "key" : "value");
+		return -1;
+	}
+
+	if (is_prefix(*argv, "key")) {
+		if (!key) {
+			if (key_size)
+				err("duplicate key\n");
+			else
+				err("unnecessary key\n");
+			return -1;
+		}
+
+		argv = parse_bytes(argv + 1, "key", key, key_size);
+		if (!argv)
+			return -1;
+
+		return parse_elem(argv, info, NULL, value, key_size, value_size,
+				  flags, value_fd);
+	} else if (is_prefix(*argv, "value")) {
+		int fd;
+
+		if (!value) {
+			if (value_size)
+				err("duplicate value\n");
+			else
+				err("unnecessary value\n");
+			return -1;
+		}
+
+		argv++;
+
+		if (map_is_map_of_maps(info->type)) {
+			int argc = 2;
+
+			if (value_size != 4) {
+				err("value smaller than 4B for map in map?\n");
+				return -1;
+			}
+			if (!argv[0] || !argv[1]) {
+				err("not enough value arguments for map in map\n");
+				return -1;
+			}
+
+			fd = map_parse_fd(&argc, &argv);
+			if (fd < 0)
+				return -1;
+
+			*value_fd = value;
+			**value_fd = fd;
+		} else if (map_is_map_of_progs(info->type)) {
+			int argc = 2;
+
+			if (value_size != 4) {
+				err("value smaller than 4B for map of progs?\n");
+				return -1;
+			}
+			if (!argv[0] || !argv[1]) {
+				err("not enough value arguments for map of progs\n");
+				return -1;
+			}
+
+			fd = prog_parse_fd(&argc, &argv);
+			if (fd < 0)
+				return -1;
+
+			*value_fd = value;
+			**value_fd = fd;
+		} else {
+			argv = parse_bytes(argv, "value", value, value_size);
+			if (!argv)
+				return -1;
+		}
+
+		return parse_elem(argv, info, key, NULL, key_size, value_size,
+				  flags, NULL);
+	} else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") ||
+		   is_prefix(*argv, "exist")) {
+		if (!flags) {
+			err("flags specified multiple times: %s\n", *argv);
+			return -1;
+		}
+
+		if (is_prefix(*argv, "any"))
+			*flags = BPF_ANY;
+		else if (is_prefix(*argv, "noexist"))
+			*flags = BPF_NOEXIST;
+		else if (is_prefix(*argv, "exist"))
+			*flags = BPF_EXIST;
+
+		return parse_elem(argv + 1, info, key, value, key_size,
+				  value_size, NULL, value_fd);
+	}
+
+	err("expected key or value, got: %s\n", *argv);
+	return -1;
+}
+
+static int show_map_close(int fd, struct bpf_map_info *info)
+{
+	char *memlock;
+
+	memlock = get_fdinfo(fd, "memlock");
+	close(fd);
+
+	printf("%u: ", info->id);
+	if (info->type < ARRAY_SIZE(map_type_name))
+		printf("%s  ", map_type_name[info->type]);
+	else
+		printf("type %u  ", info->type);
+
+	if (*info->name)
+		printf("name %s  ", info->name);
+
+	printf("flags 0x%x\n", info->map_flags);
+	printf("\tkey %uB  value %uB  max_entries %u",
+	       info->key_size, info->value_size, info->max_entries);
+
+	if (memlock)
+		printf("  memlock %sB", memlock);
+	free(memlock);
+
+	printf("\n");
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	__u32 id = 0;
+	int err;
+	int fd;
+
+	if (argc == 2) {
+		fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+		if (fd < 0)
+			return -1;
+
+		return show_map_close(fd, &info);
+	}
+
+	if (argc)
+		return BAD_ARG();
+
+	while (true) {
+		err = bpf_map_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT)
+				break;
+			err("can't get next map: %s\n", strerror(errno));
+			if (errno == EINVAL)
+				err("kernel too old?\n");
+			return -1;
+		}
+
+		fd = bpf_map_get_fd_by_id(id);
+		if (fd < 0) {
+			err("can't get map by id (%u): %s\n",
+			    id, strerror(errno));
+			return -1;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			err("can't get map info: %s\n", strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		show_map_close(fd, &info);
+	}
+
+	return errno == ENOENT ? 0 : -1;
+}
+
+static int do_dump(int argc, char **argv)
+{
+	void *key, *value, *prev_key;
+	unsigned int num_elems = 0;
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	int err;
+	int fd;
+
+	if (argc != 2)
+		usage();
+
+	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+	if (fd < 0)
+		return -1;
+
+	if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) {
+		err("Dumping maps of maps and program maps not supported\n");
+		close(fd);
+		return -1;
+	}
+
+	key = malloc(info.key_size);
+	value = alloc_value(&info);
+	if (!key || !value) {
+		err("mem alloc failed\n");
+		err = -1;
+		goto exit_free;
+	}
+
+	prev_key = NULL;
+	while (true) {
+		err = bpf_map_get_next_key(fd, prev_key, key);
+		if (err) {
+			if (errno == ENOENT)
+				err = 0;
+			break;
+		}
+
+		if (!bpf_map_lookup_elem(fd, key, value)) {
+			print_entry(&info, key, value);
+		} else {
+			info("can't lookup element with key: ");
+			print_hex(key, info.key_size, " ");
+			printf("\n");
+		}
+
+		prev_key = key;
+		num_elems++;
+	}
+
+	printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : "");
+
+exit_free:
+	free(key);
+	free(value);
+	close(fd);
+
+	return err;
+}
+
+static int do_update(int argc, char **argv)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	__u32 *value_fd = NULL;
+	__u32 flags = BPF_ANY;
+	void *key, *value;
+	int fd, err;
+
+	if (argc < 2)
+		usage();
+
+	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+	if (fd < 0)
+		return -1;
+
+	key = malloc(info.key_size);
+	value = alloc_value(&info);
+	if (!key || !value) {
+		err("mem alloc failed");
+		err = -1;
+		goto exit_free;
+	}
+
+	err = parse_elem(argv, &info, key, value, info.key_size,
+			 info.value_size, &flags, &value_fd);
+	if (err)
+		goto exit_free;
+
+	err = bpf_map_update_elem(fd, key, value, flags);
+	if (err) {
+		err("update failed: %s\n", strerror(errno));
+		goto exit_free;
+	}
+
+exit_free:
+	if (value_fd)
+		close(*value_fd);
+	free(key);
+	free(value);
+	close(fd);
+
+	return err;
+}
+
+static int do_lookup(int argc, char **argv)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	void *key, *value;
+	int err;
+	int fd;
+
+	if (argc < 2)
+		usage();
+
+	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+	if (fd < 0)
+		return -1;
+
+	key = malloc(info.key_size);
+	value = alloc_value(&info);
+	if (!key || !value) {
+		err("mem alloc failed");
+		err = -1;
+		goto exit_free;
+	}
+
+	err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+	if (err)
+		goto exit_free;
+
+	err = bpf_map_lookup_elem(fd, key, value);
+	if (!err) {
+		print_entry(&info, key, value);
+	} else if (errno == ENOENT) {
+		printf("key:\n");
+		print_hex(key, info.key_size, " ");
+		printf("\n\nNot found\n");
+	} else {
+		err("lookup failed: %s\n", strerror(errno));
+	}
+
+exit_free:
+	free(key);
+	free(value);
+	close(fd);
+
+	return err;
+}
+
+static int do_getnext(int argc, char **argv)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	void *key, *nextkey;
+	int err;
+	int fd;
+
+	if (argc < 2)
+		usage();
+
+	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+	if (fd < 0)
+		return -1;
+
+	key = malloc(info.key_size);
+	nextkey = malloc(info.key_size);
+	if (!key || !nextkey) {
+		err("mem alloc failed");
+		err = -1;
+		goto exit_free;
+	}
+
+	if (argc) {
+		err = parse_elem(argv, &info, key, NULL, info.key_size, 0,
+				 NULL, NULL);
+		if (err)
+			goto exit_free;
+	} else {
+		free(key);
+		key = NULL;
+	}
+
+	err = bpf_map_get_next_key(fd, key, nextkey);
+	if (err) {
+		err("can't get next key: %s\n", strerror(errno));
+		goto exit_free;
+	}
+
+	if (key) {
+		printf("key:\n");
+		print_hex(key, info.key_size, " ");
+		printf("\n");
+	} else {
+		printf("key: None\n");
+	}
+
+	printf("next key:\n");
+	print_hex(nextkey, info.key_size, " ");
+	printf("\n");
+
+exit_free:
+	free(nextkey);
+	free(key);
+	close(fd);
+
+	return err;
+}
+
+static int do_delete(int argc, char **argv)
+{
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	void *key;
+	int err;
+	int fd;
+
+	if (argc < 2)
+		usage();
+
+	fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+	if (fd < 0)
+		return -1;
+
+	key = malloc(info.key_size);
+	if (!key) {
+		err("mem alloc failed");
+		err = -1;
+		goto exit_free;
+	}
+
+	err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+	if (err)
+		goto exit_free;
+
+	err = bpf_map_delete_elem(fd, key);
+	if (err)
+		err("delete failed: %s\n", strerror(errno));
+
+exit_free:
+	free(key);
+	close(fd);
+
+	return err;
+}
+
+static int do_pin(int argc, char **argv)
+{
+	return do_pin_any(argc, argv, bpf_map_get_fd_by_id);
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s show   [MAP]\n"
+		"       %s %s dump    MAP\n"
+		"       %s %s update  MAP  key BYTES value VALUE [UPDATE_FLAGS]\n"
+		"       %s %s lookup  MAP  key BYTES\n"
+		"       %s %s getnext MAP [key BYTES]\n"
+		"       %s %s delete  MAP  key BYTES\n"
+		"       %s %s pin     MAP  FILE\n"
+		"       %s %s help\n"
+		"\n"
+		"       MAP := { id MAP_ID | pinned FILE }\n"
+		"       " HELP_SPEC_PROGRAM "\n"
+		"       VALUE := { BYTES | MAP | PROG }\n"
+		"       UPDATE_FLAGS := { any | exist | noexist }\n"
+		"",
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "help",	do_help },
+	{ "dump",	do_dump },
+	{ "update",	do_update },
+	{ "lookup",	do_lookup },
+	{ "getnext",	do_getnext },
+	{ "delete",	do_delete },
+	{ "pin",	do_pin },
+	{ 0 }
+};
+
+int do_map(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
new file mode 100644
index 000000000000..421ba89ce86a
--- /dev/null
+++ b/tools/bpf/bpftool/prog.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static const char * const prog_type_name[] = {
+	[BPF_PROG_TYPE_UNSPEC]		= "unspec",
+	[BPF_PROG_TYPE_SOCKET_FILTER]	= "socket_filter",
+	[BPF_PROG_TYPE_KPROBE]		= "kprobe",
+	[BPF_PROG_TYPE_SCHED_CLS]	= "sched_cls",
+	[BPF_PROG_TYPE_SCHED_ACT]	= "sched_act",
+	[BPF_PROG_TYPE_TRACEPOINT]	= "tracepoint",
+	[BPF_PROG_TYPE_XDP]		= "xdp",
+	[BPF_PROG_TYPE_PERF_EVENT]	= "perf_event",
+	[BPF_PROG_TYPE_CGROUP_SKB]	= "cgroup_skb",
+	[BPF_PROG_TYPE_CGROUP_SOCK]	= "cgroup_sock",
+	[BPF_PROG_TYPE_LWT_IN]		= "lwt_in",
+	[BPF_PROG_TYPE_LWT_OUT]		= "lwt_out",
+	[BPF_PROG_TYPE_LWT_XMIT]	= "lwt_xmit",
+	[BPF_PROG_TYPE_SOCK_OPS]	= "sock_ops",
+	[BPF_PROG_TYPE_SK_SKB]		= "sk_skb",
+};
+
+static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
+{
+	struct timespec real_time_ts, boot_time_ts;
+	time_t wallclock_secs;
+	struct tm load_tm;
+
+	buf[--size] = '\0';
+
+	if (clock_gettime(CLOCK_REALTIME, &real_time_ts) ||
+	    clock_gettime(CLOCK_BOOTTIME, &boot_time_ts)) {
+		perror("Can't read clocks");
+		snprintf(buf, size, "%llu", nsecs / 1000000000);
+		return;
+	}
+
+	wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
+		nsecs / 1000000000;
+
+	if (!localtime_r(&wallclock_secs, &load_tm)) {
+		snprintf(buf, size, "%llu", nsecs / 1000000000);
+		return;
+	}
+
+	strftime(buf, size, "%b %d/%H:%M", &load_tm);
+}
+
+static int prog_fd_by_tag(unsigned char *tag)
+{
+	struct bpf_prog_info info = {};
+	__u32 len = sizeof(info);
+	unsigned int id = 0;
+	int err;
+	int fd;
+
+	while (true) {
+		err = bpf_prog_get_next_id(id, &id);
+		if (err) {
+			err("%s\n", strerror(errno));
+			return -1;
+		}
+
+		fd = bpf_prog_get_fd_by_id(id);
+		if (fd < 0) {
+			err("can't get prog by id (%u): %s\n",
+			    id, strerror(errno));
+			return -1;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			err("can't get prog info (%u): %s\n",
+			    id, strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		if (!memcmp(tag, info.tag, BPF_TAG_SIZE))
+			return fd;
+
+		close(fd);
+	}
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+	int fd;
+
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			err("can't parse %s as ID\n", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		fd = bpf_prog_get_fd_by_id(id);
+		if (fd < 0)
+			err("get by id (%u): %s\n", id, strerror(errno));
+		return fd;
+	} else if (is_prefix(**argv, "tag")) {
+		unsigned char tag[BPF_TAG_SIZE];
+
+		NEXT_ARGP();
+
+		if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+			   tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+		    != BPF_TAG_SIZE) {
+			err("can't parse tag\n");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return prog_fd_by_tag(tag);
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		return open_obj_pinned_any(path, BPF_OBJ_PROG);
+	}
+
+	err("expected 'id', 'tag' or 'pinned', got: '%s'?\n", **argv);
+	return -1;
+}
+
+static void show_prog_maps(int fd, u32 num_maps)
+{
+	struct bpf_prog_info info = {};
+	__u32 len = sizeof(info);
+	__u32 map_ids[num_maps];
+	unsigned int i;
+	int err;
+
+	info.nr_map_ids = num_maps;
+	info.map_ids = ptr_to_u64(map_ids);
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err || !info.nr_map_ids)
+		return;
+
+	printf("  map_ids ");
+	for (i = 0; i < info.nr_map_ids; i++)
+		printf("%u%s", map_ids[i],
+		       i == info.nr_map_ids - 1 ? "" : ",");
+}
+
+static int show_prog(int fd)
+{
+	struct bpf_prog_info info = {};
+	__u32 len = sizeof(info);
+	char *memlock;
+	int err;
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err) {
+		err("can't get prog info: %s\n", strerror(errno));
+		return -1;
+	}
+
+	printf("%u: ", info.id);
+	if (info.type < ARRAY_SIZE(prog_type_name))
+		printf("%s  ", prog_type_name[info.type]);
+	else
+		printf("type %u  ", info.type);
+
+	if (*info.name)
+		printf("name %s  ", info.name);
+
+	printf("tag ");
+	print_hex(info.tag, BPF_TAG_SIZE, ":");
+	printf("\n");
+
+	if (info.load_time) {
+		char buf[32];
+
+		print_boot_time(info.load_time, buf, sizeof(buf));
+
+		/* Piggy back on load_time, since 0 uid is a valid one */
+		printf("\tloaded_at %s  uid %u\n", buf, info.created_by_uid);
+	}
+
+	printf("\txlated %uB", info.xlated_prog_len);
+
+	if (info.jited_prog_len)
+		printf("  jited %uB", info.jited_prog_len);
+	else
+		printf("  not jited");
+
+	memlock = get_fdinfo(fd, "memlock");
+	if (memlock)
+		printf("  memlock %sB", memlock);
+	free(memlock);
+
+	if (info.nr_map_ids)
+		show_prog_maps(fd, info.nr_map_ids);
+
+	printf("\n");
+
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{	__u32 id = 0;
+	int err;
+	int fd;
+
+	if (argc == 2) {
+		fd = prog_parse_fd(&argc, &argv);
+		if (fd < 0)
+			return -1;
+
+		return show_prog(fd);
+	}
+
+	if (argc)
+		return BAD_ARG();
+
+	while (true) {
+		err = bpf_prog_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT)
+				break;
+			err("can't get next program: %s\n", strerror(errno));
+			if (errno == EINVAL)
+				err("kernel too old?\n");
+			return -1;
+		}
+
+		fd = bpf_prog_get_fd_by_id(id);
+		if (fd < 0) {
+			err("can't get prog by id (%u): %s\n",
+			    id, strerror(errno));
+			return -1;
+		}
+
+		err = show_prog(fd);
+		close(fd);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int do_dump(int argc, char **argv)
+{
+	struct bpf_prog_info info = {};
+	__u32 len = sizeof(info);
+	bool can_disasm = false;
+	unsigned int buf_size;
+	char *filepath = NULL;
+	bool opcodes = false;
+	unsigned char *buf;
+	__u32 *member_len;
+	__u64 *member_ptr;
+	ssize_t n;
+	int err;
+	int fd;
+
+	if (is_prefix(*argv, "jited")) {
+		member_len = &info.jited_prog_len;
+		member_ptr = &info.jited_prog_insns;
+		can_disasm = true;
+	} else if (is_prefix(*argv, "xlated")) {
+		member_len = &info.xlated_prog_len;
+		member_ptr = &info.xlated_prog_insns;
+	} else {
+		err("expected 'xlated' or 'jited', got: %s\n", *argv);
+		return -1;
+	}
+	NEXT_ARG();
+
+	if (argc < 2)
+		usage();
+
+	fd = prog_parse_fd(&argc, &argv);
+	if (fd < 0)
+		return -1;
+
+	if (is_prefix(*argv, "file")) {
+		NEXT_ARG();
+		if (!argc) {
+			err("expected file path\n");
+			return -1;
+		}
+
+		filepath = *argv;
+		NEXT_ARG();
+	} else if (is_prefix(*argv, "opcodes")) {
+		opcodes = true;
+		NEXT_ARG();
+	}
+
+	if (!filepath && !can_disasm) {
+		err("expected 'file' got %s\n", *argv);
+		return -1;
+	}
+	if (argc) {
+		usage();
+		return -1;
+	}
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err) {
+		err("can't get prog info: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (!*member_len) {
+		info("no instructions returned\n");
+		close(fd);
+		return 0;
+	}
+
+	buf_size = *member_len;
+
+	buf = malloc(buf_size);
+	if (!buf) {
+		err("mem alloc failed\n");
+		close(fd);
+		return -1;
+	}
+
+	memset(&info, 0, sizeof(info));
+
+	*member_ptr = ptr_to_u64(buf);
+	*member_len = buf_size;
+
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	close(fd);
+	if (err) {
+		err("can't get prog info: %s\n", strerror(errno));
+		goto err_free;
+	}
+
+	if (*member_len > buf_size) {
+		info("too many instructions returned\n");
+		goto err_free;
+	}
+
+	if (filepath) {
+		fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+		if (fd < 0) {
+			err("can't open file %s: %s\n", filepath,
+			    strerror(errno));
+			goto err_free;
+		}
+
+		n = write(fd, buf, *member_len);
+		close(fd);
+		if (n != *member_len) {
+			err("error writing output file: %s\n",
+			    n < 0 ? strerror(errno) : "short write");
+			goto err_free;
+		}
+	} else {
+		disasm_print_insn(buf, *member_len, opcodes);
+	}
+
+	free(buf);
+
+	return 0;
+
+err_free:
+	free(buf);
+	return -1;
+}
+
+static int do_pin(int argc, char **argv)
+{
+	return do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s show [PROG]\n"
+		"       %s %s dump xlated PROG  file FILE\n"
+		"       %s %s dump jited  PROG [file FILE] [opcodes]\n"
+		"       %s %s pin   PROG FILE\n"
+		"       %s %s help\n"
+		"\n"
+		"       " HELP_SPEC_PROGRAM "\n"
+		"",
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2], bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "dump",	do_dump },
+	{ "pin",	do_pin },
+	{ 0 }
+};
+
+int do_prog(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH net-next v4 0/3] tools: add bpftool
From: David Ahern @ 2017-10-05  3:26 UTC (permalink / raw)
  To: Jakub Kicinski, netdev; +Cc: alexei.starovoitov, daniel, oss-drivers, brouer
In-Reply-To: <20171005031005.15364-1-jakub.kicinski@netronome.com>

On 10/4/17 8:10 PM, Jakub Kicinski wrote:
> Hi!
> 
> This set adds bpftool to the tools/ directory.  The first 
> patch renames tools/net to tools/bpf, the second one adds 
> the new code, while the third adds simple documentation.
> 
> v4:
>  - rename docs *.txt -> *.rst (Jesper).
> v3:
>  - address Alexei's comments about output and docs.
> v2:
>  - report names, map ids, load time, uid;
>  - add docs/man pages;
>  - general cleanups & fixes.
> 
> Jakub Kicinski (3):
>   tools: rename tools/net directory to tools/bpf
>   tools: bpf: add bpftool
>   tools: bpftool: add documentation
> 

LGTM. Thanks for the work, Jakub.

Acked-by: David Ahern <dsahern@gmail.com>

^ permalink raw reply

* Re: [PATCH V2] Fix a sleep-in-atomic bug in shash_setkey_unaligned
From: Herbert Xu @ 2017-10-05  3:40 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: Andy Lutomirski, Jia-Ju Bai, David S. Miller, Neil Horman,
	vyasevich, Kalle Valo, Linux Crypto Mailing List,
	Network Development, linux-sctp, Linux Wireless List
In-Reply-To: <20171003224505.GE19750@localhost.localdomain>

On Tue, Oct 03, 2017 at 07:45:06PM -0300, Marcelo Ricardo Leitner wrote:
>
> > Usually if you're invoking setkey from a non-sleeping code-path
> > you're probably doing something wrong.
> 
> Usually but not always. There are 3 calls to that function on SCTP
> code:
> - pack a cookie, which is sent on an INIT_ACK packet to the client
> - unpack the cookie above, after it is sent back by the client on a
>   COOKIE_ECHO packet
> - send a chunk authenticated by a hash

I'm not talking about the code-path in question.  I'm talking
about the function which generates the secret key in the first
place.  AFAICS that's only called in GFP_KERNEL context.  What
am I missing?

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [RESEND PATCH 0/7] net: qrtr: Fixes and support receiving version 2 packets
From: Bjorn Andersson @ 2017-10-05  3:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew

On the latest Qualcomm platforms remote processors are sending packets with
version 2 of the message header. This series starts off with some fixes and
then refactors the qrtr code to support receiving messages of both version 1
and version 2.

As all remotes are backwards compatible transmitted packets continues to be
send as version 1, but some groundwork has been done to make this a per-link
property.

Bjorn Andersson (7):
  net: qrtr: Invoke sk_error_report() after setting sk_err
  net: qrtr: Move constants to header file
  net: qrtr: Add control packet definition to uapi
  net: qrtr: Pass source and destination to enqueue functions
  net: qrtr: Clean up control packet handling
  net: qrtr: Use sk_buff->cb in receive path
  net: qrtr: Support decoding incoming v2 packets

 include/uapi/linux/qrtr.h |  35 +++++
 net/qrtr/qrtr.c           | 377 +++++++++++++++++++++++++---------------------
 2 files changed, 241 insertions(+), 171 deletions(-)

-- 
2.12.0

^ permalink raw reply

* [RESEND PATCH 1/7] net: qrtr: Invoke sk_error_report() after setting sk_err
From: Bjorn Andersson @ 2017-10-05  3:50 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew
In-Reply-To: <20171005035105.14677-1-bjorn.andersson@linaro.org>

Rather than manually waking up any context sleeping on the sock to
signal an error we should call sk_error_report(). This has the added
benefit that in-kernel consumers can override this notification with
its own callback.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 net/qrtr/qrtr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c2f5c13550c0..7e4b49a8349e 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -541,7 +541,7 @@ static void qrtr_reset_ports(void)

 		sock_hold(&ipc->sk);
 		ipc->sk.sk_err = ENETRESET;
-		wake_up_interruptible(sk_sleep(&ipc->sk));
+		ipc->sk.sk_error_report(&ipc->sk);
 		sock_put(&ipc->sk);
 	}
 	mutex_unlock(&qrtr_port_lock);
-- 
2.12.0

^ permalink raw reply related

* [RESEND PATCH 2/7] net: qrtr: Move constants to header file
From: Bjorn Andersson @ 2017-10-05  3:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew
In-Reply-To: <20171005035105.14677-1-bjorn.andersson@linaro.org>

The constants are used by both the name server and clients, so clarify
their value and move them to the uapi header.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/uapi/linux/qrtr.h | 3 +++
 net/qrtr/qrtr.c           | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h
index 9d76c566f66e..63e8803e4d90 100644
--- a/include/uapi/linux/qrtr.h
+++ b/include/uapi/linux/qrtr.h
@@ -4,6 +4,9 @@
 #include <linux/socket.h>
 #include <linux/types.h>
 
+#define QRTR_NODE_BCAST	0xffffffffu
+#define QRTR_PORT_CTRL	0xfffffffeu
+
 struct sockaddr_qrtr {
 	__kernel_sa_family_t sq_family;
 	__u32 sq_node;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 7e4b49a8349e..15981abc042c 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -61,8 +61,6 @@ struct qrtr_hdr {
 } __packed;
 
 #define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
-#define QRTR_NODE_BCAST ((unsigned int)-1)
-#define QRTR_PORT_CTRL ((unsigned int)-2)
 
 struct qrtr_sock {
 	/* WARNING: sk must be the first member */
-- 
2.12.0

^ permalink raw reply related

* [RESEND PATCH 3/7] net: qrtr: Add control packet definition to uapi
From: Bjorn Andersson @ 2017-10-05  3:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew
In-Reply-To: <20171005035105.14677-1-bjorn.andersson@linaro.org>

The QMUX protocol specification defines structure of the special control
packet messages being sent between handlers of the control port.

Add these to the uapi header, as this structure and the associated types
are shared between the kernel and all userspace handlers of control
messages.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/uapi/linux/qrtr.h | 32 ++++++++++++++++++++++++++++++++
 net/qrtr/qrtr.c           | 12 ------------
 2 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h
index 63e8803e4d90..179af64846e0 100644
--- a/include/uapi/linux/qrtr.h
+++ b/include/uapi/linux/qrtr.h
@@ -13,4 +13,36 @@ struct sockaddr_qrtr {
 	__u32 sq_port;
 };
 
+enum qrtr_pkt_type {
+	QRTR_TYPE_DATA		= 1,
+	QRTR_TYPE_HELLO		= 2,
+	QRTR_TYPE_BYE		= 3,
+	QRTR_TYPE_NEW_SERVER	= 4,
+	QRTR_TYPE_DEL_SERVER	= 5,
+	QRTR_TYPE_DEL_CLIENT	= 6,
+	QRTR_TYPE_RESUME_TX	= 7,
+	QRTR_TYPE_EXIT          = 8,
+	QRTR_TYPE_PING          = 9,
+	QRTR_TYPE_NEW_LOOKUP	= 10,
+	QRTR_TYPE_DEL_LOOKUP	= 11,
+};
+
+struct qrtr_ctrl_pkt {
+	__le32 cmd;
+
+	union {
+		struct {
+			__le32 service;
+			__le32 instance;
+			__le32 node;
+			__le32 port;
+		} server;
+
+		struct {
+			__le32 node;
+			__le32 port;
+		} client;
+	};
+} __packed;
+
 #endif /* _LINUX_QRTR_H */
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 15981abc042c..d85ca7170b8f 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -26,18 +26,6 @@
 #define QRTR_MIN_EPH_SOCKET 0x4000
 #define QRTR_MAX_EPH_SOCKET 0x7fff
 
-enum qrtr_pkt_type {
-	QRTR_TYPE_DATA		= 1,
-	QRTR_TYPE_HELLO		= 2,
-	QRTR_TYPE_BYE		= 3,
-	QRTR_TYPE_NEW_SERVER	= 4,
-	QRTR_TYPE_DEL_SERVER	= 5,
-	QRTR_TYPE_DEL_CLIENT	= 6,
-	QRTR_TYPE_RESUME_TX	= 7,
-	QRTR_TYPE_EXIT		= 8,
-	QRTR_TYPE_PING		= 9,
-};
-
 /**
  * struct qrtr_hdr - (I|R)PCrouter packet header
  * @version: protocol version
-- 
2.12.0

^ permalink raw reply related

* [RESEND PATCH 4/7] net: qrtr: Pass source and destination to enqueue functions
From: Bjorn Andersson @ 2017-10-05  3:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew
In-Reply-To: <20171005035105.14677-1-bjorn.andersson@linaro.org>

Defer writing the message header to the skb until its time to enqueue
the packet. As the receive path is reworked to decode the message header
as it's received from the transport and only pass around the payload in
the skb this change means that we do not have to fill out the full
message header just to decode it immediately in qrtr_local_enqueue().

In the future this change also makes it possible to prepend message
headers based on the version of each link.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 net/qrtr/qrtr.c | 120 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 69 insertions(+), 51 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index d85ca7170b8f..82dc83789310 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -97,8 +97,12 @@ struct qrtr_node {
 	struct list_head item;
 };
 
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb);
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to);
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to);
 
 /* Release node resources and free the node.
  *
@@ -136,10 +140,27 @@ static void qrtr_node_release(struct qrtr_node *node)
 }
 
 /* Pass an outgoing packet socket buffer to the endpoint driver. */
-static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			     int type, struct sockaddr_qrtr *from,
+			     struct sockaddr_qrtr *to)
 {
+	struct qrtr_hdr *hdr;
+	size_t len = skb->len;
 	int rc = -ENODEV;
 
+	hdr = skb_push(skb, QRTR_HDR_SIZE);
+	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
+	hdr->type = cpu_to_le32(type);
+	hdr->src_node_id = cpu_to_le32(from->sq_node);
+	hdr->src_port_id = cpu_to_le32(from->sq_port);
+	hdr->dst_node_id = cpu_to_le32(to->sq_node);
+	hdr->dst_port_id = cpu_to_le32(to->sq_port);
+
+	hdr->size = cpu_to_le32(len);
+	hdr->confirm_rx = 0;
+
+	skb_put_padto(skb, ALIGN(len, 4));
+
 	mutex_lock(&node->ep_lock);
 	if (node->ep)
 		rc = node->ep->xmit(node->ep, skb);
@@ -237,23 +258,13 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
 static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
 					      u32 src_node, u32 dst_node)
 {
-	struct qrtr_hdr *hdr;
 	struct sk_buff *skb;
 
 	skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
 	if (!skb)
 		return NULL;
-	skb_reset_transport_header(skb);
 
-	hdr = skb_put(skb, QRTR_HDR_SIZE);
-	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
-	hdr->type = cpu_to_le32(type);
-	hdr->src_node_id = cpu_to_le32(src_node);
-	hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
-	hdr->confirm_rx = cpu_to_le32(0);
-	hdr->size = cpu_to_le32(pkt_len);
-	hdr->dst_node_id = cpu_to_le32(dst_node);
-	hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+	skb_reserve(skb, QRTR_HDR_SIZE);
 
 	return skb;
 }
@@ -326,6 +337,8 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
 static void qrtr_node_rx_work(struct work_struct *work)
 {
 	struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+	struct sockaddr_qrtr dst;
+	struct sockaddr_qrtr src;
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
@@ -341,6 +354,11 @@ static void qrtr_node_rx_work(struct work_struct *work)
 		dst_port = le32_to_cpu(phdr->dst_port_id);
 		confirm = !!phdr->confirm_rx;
 
+		src.sq_node = src_node;
+		src.sq_port = le32_to_cpu(phdr->src_port_id);
+		dst.sq_node = dst_node;
+		dst.sq_port = dst_port;
+
 		qrtr_node_assign(node, src_node);
 
 		ipc = qrtr_port_lookup(dst_port);
@@ -357,7 +375,9 @@ static void qrtr_node_rx_work(struct work_struct *work)
 			skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
 			if (!skb)
 				break;
-			if (qrtr_node_enqueue(node, skb))
+
+			if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
+					      &dst, &src))
 				break;
 		}
 	}
@@ -407,6 +427,8 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
 void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
 {
 	struct qrtr_node *node = ep->node;
+	struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
+	struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
 	struct sk_buff *skb;
 
 	mutex_lock(&node->ep_lock);
@@ -416,7 +438,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
 	/* Notify the local controller about the event */
 	skb = qrtr_alloc_local_bye(node->nid);
 	if (skb)
-		qrtr_local_enqueue(NULL, skb);
+		qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
 
 	qrtr_node_release(node);
 	ep->node = NULL;
@@ -454,11 +476,17 @@ static void qrtr_port_remove(struct qrtr_sock *ipc)
 {
 	struct sk_buff *skb;
 	int port = ipc->us.sq_port;
+	struct sockaddr_qrtr to;
+
+	to.sq_family = AF_QIPCRTR;
+	to.sq_node = QRTR_NODE_BCAST;
+	to.sq_port = QRTR_PORT_CTRL;
 
 	skb = qrtr_alloc_del_client(&ipc->us);
 	if (skb) {
 		skb_set_owner_w(skb, &ipc->sk);
-		qrtr_bcast_enqueue(NULL, skb);
+		qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
+				   &to);
 	}
 
 	if (port == QRTR_PORT_CTRL)
@@ -606,19 +634,25 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
 }
 
 /* Queue packet to local peer socket. */
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to)
 {
-	const struct qrtr_hdr *phdr;
 	struct qrtr_sock *ipc;
+	struct qrtr_hdr *phdr;
 
-	phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-
-	ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
+	ipc = qrtr_port_lookup(to->sq_port);
 	if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
 		kfree_skb(skb);
 		return -ENODEV;
 	}
 
+	phdr = skb_push(skb, QRTR_HDR_SIZE);
+	skb_reset_transport_header(skb);
+
+	phdr->src_node_id = cpu_to_le32(from->sq_node);
+	phdr->src_port_id = cpu_to_le32(from->sq_port);
+
 	if (sock_queue_rcv_skb(&ipc->sk, skb)) {
 		qrtr_port_put(ipc);
 		kfree_skb(skb);
@@ -631,7 +665,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
 }
 
 /* Queue packet for broadcast. */
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+			      int type, struct sockaddr_qrtr *from,
+			      struct sockaddr_qrtr *to)
 {
 	struct sk_buff *skbn;
 
@@ -641,11 +677,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
 		if (!skbn)
 			break;
 		skb_set_owner_w(skbn, skb->sk);
-		qrtr_node_enqueue(node, skbn);
+		qrtr_node_enqueue(node, skbn, type, from, to);
 	}
 	mutex_unlock(&qrtr_node_lock);
 
-	qrtr_local_enqueue(node, skb);
+	qrtr_local_enqueue(node, skb, type, from, to);
 
 	return 0;
 }
@@ -653,13 +689,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
 static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
-	int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
+	int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
+			  struct sockaddr_qrtr *, struct sockaddr_qrtr *);
 	struct qrtr_sock *ipc = qrtr_sk(sock->sk);
 	struct sock *sk = sock->sk;
 	struct qrtr_node *node;
-	struct qrtr_hdr *hdr;
 	struct sk_buff *skb;
 	size_t plen;
+	u32 type = QRTR_TYPE_DATA;
 	int rc;
 
 	if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -713,32 +750,14 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	if (!skb)
 		goto out_node;
 
-	skb_reset_transport_header(skb);
-	skb_put(skb, len + QRTR_HDR_SIZE);
-
-	hdr = (struct qrtr_hdr *)skb_transport_header(skb);
-	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
-	hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
-	hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
-	hdr->confirm_rx = cpu_to_le32(0);
-	hdr->size = cpu_to_le32(len);
-	hdr->dst_node_id = cpu_to_le32(addr->sq_node);
-	hdr->dst_port_id = cpu_to_le32(addr->sq_port);
+	skb_reserve(skb, QRTR_HDR_SIZE);
 
-	rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
-					 &msg->msg_iter, len);
+	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc) {
 		kfree_skb(skb);
 		goto out_node;
 	}
 
-	if (plen != len) {
-		rc = skb_pad(skb, plen - len);
-		if (rc)
-			goto out_node;
-		skb_put(skb, plen - len);
-	}
-
 	if (ipc->us.sq_port == QRTR_PORT_CTRL) {
 		if (len < 4) {
 			rc = -EINVAL;
@@ -747,12 +766,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 		}
 
 		/* control messages already require the type as 'command' */
-		skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
-	} else {
-		hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
+		skb_copy_bits(skb, 0, &type, 4);
+		type = le32_to_cpu(type);
 	}
 
-	rc = enqueue_fn(node, skb);
+	rc = enqueue_fn(node, skb, type, &ipc->us, addr);
 	if (rc >= 0)
 		rc = len;
 
-- 
2.12.0

^ permalink raw reply related

* [RESEND PATCH 5/7] net: qrtr: Clean up control packet handling
From: Bjorn Andersson @ 2017-10-05  3:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew
In-Reply-To: <20171005035105.14677-1-bjorn.andersson@linaro.org>

As the message header generation is deferred the internal functions for
generating control packets can be simplified.

This patch modifies qrtr_alloc_ctrl_packet() to, in addition to the
sk_buff, return a reference to a struct qrtr_ctrl_pkt, which clarifies
and simplifies the helpers to the point that these functions can be
folded back into the callers.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 net/qrtr/qrtr.c | 93 ++++++++++++++++++---------------------------------------
 1 file changed, 29 insertions(+), 64 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 82dc83789310..a84edba7b1ef 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -255,9 +255,18 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 }
 EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
 
-static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
-					      u32 src_node, u32 dst_node)
+/**
+ * qrtr_alloc_ctrl_packet() - allocate control packet skb
+ * @pkt: reference to qrtr_ctrl_pkt pointer
+ *
+ * Returns newly allocated sk_buff, or NULL on failure
+ *
+ * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
+ * on success returns a reference to the control packet in @pkt.
+ */
+static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
 {
+	const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
 	struct sk_buff *skb;
 
 	skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
@@ -265,64 +274,7 @@ static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
 		return NULL;
 
 	skb_reserve(skb, QRTR_HDR_SIZE);
-
-	return skb;
-}
-
-/* Allocate and construct a resume-tx packet. */
-static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
-					    u32 dst_node, u32 port)
-{
-	const int pkt_len = 20;
-	struct sk_buff *skb;
-	__le32 *buf;
-
-	skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len,
-				     src_node, dst_node);
-	if (!skb)
-		return NULL;
-
-	buf = skb_put_zero(skb, pkt_len);
-	buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
-	buf[1] = cpu_to_le32(src_node);
-	buf[2] = cpu_to_le32(port);
-
-	return skb;
-}
-
-/* Allocate and construct a BYE message to signal remote termination */
-static struct sk_buff *qrtr_alloc_local_bye(u32 src_node)
-{
-	const int pkt_len = 20;
-	struct sk_buff *skb;
-	__le32 *buf;
-
-	skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len,
-				     src_node, qrtr_local_nid);
-	if (!skb)
-		return NULL;
-
-	buf = skb_put_zero(skb, pkt_len);
-	buf[0] = cpu_to_le32(QRTR_TYPE_BYE);
-
-	return skb;
-}
-
-static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq)
-{
-	const int pkt_len = 20;
-	struct sk_buff *skb;
-	__le32 *buf;
-
-	skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len,
-				     sq->sq_node, QRTR_NODE_BCAST);
-	if (!skb)
-		return NULL;
-
-	buf = skb_put_zero(skb, pkt_len);
-	buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
-	buf[1] = cpu_to_le32(sq->sq_node);
-	buf[2] = cpu_to_le32(sq->sq_port);
+	*pkt = skb_put_zero(skb, pkt_len);
 
 	return skb;
 }
@@ -337,6 +289,7 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
 static void qrtr_node_rx_work(struct work_struct *work)
 {
 	struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+	struct qrtr_ctrl_pkt *pkt;
 	struct sockaddr_qrtr dst;
 	struct sockaddr_qrtr src;
 	struct sk_buff *skb;
@@ -372,10 +325,14 @@ static void qrtr_node_rx_work(struct work_struct *work)
 		}
 
 		if (confirm) {
-			skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
+			skb = qrtr_alloc_ctrl_packet(&pkt);
 			if (!skb)
 				break;
 
+			pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+			pkt->client.node = cpu_to_le32(dst.sq_node);
+			pkt->client.port = cpu_to_le32(dst.sq_port);
+
 			if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
 					      &dst, &src))
 				break;
@@ -429,6 +386,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
 	struct qrtr_node *node = ep->node;
 	struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
 	struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
+	struct qrtr_ctrl_pkt *pkt;
 	struct sk_buff *skb;
 
 	mutex_lock(&node->ep_lock);
@@ -436,9 +394,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
 	mutex_unlock(&node->ep_lock);
 
 	/* Notify the local controller about the event */
-	skb = qrtr_alloc_local_bye(node->nid);
-	if (skb)
+	skb = qrtr_alloc_ctrl_packet(&pkt);
+	if (skb) {
+		pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
 		qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
+	}
 
 	qrtr_node_release(node);
 	ep->node = NULL;
@@ -474,6 +434,7 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
 /* Remove port assignment. */
 static void qrtr_port_remove(struct qrtr_sock *ipc)
 {
+	struct qrtr_ctrl_pkt *pkt;
 	struct sk_buff *skb;
 	int port = ipc->us.sq_port;
 	struct sockaddr_qrtr to;
@@ -482,8 +443,12 @@ static void qrtr_port_remove(struct qrtr_sock *ipc)
 	to.sq_node = QRTR_NODE_BCAST;
 	to.sq_port = QRTR_PORT_CTRL;
 
-	skb = qrtr_alloc_del_client(&ipc->us);
+	skb = qrtr_alloc_ctrl_packet(&pkt);
 	if (skb) {
+		pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+		pkt->client.node = cpu_to_le32(ipc->us.sq_node);
+		pkt->client.port = cpu_to_le32(ipc->us.sq_port);
+
 		skb_set_owner_w(skb, &ipc->sk);
 		qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
 				   &to);
-- 
2.12.0

^ permalink raw reply related

* [RESEND PATCH 7/7] net: qrtr: Support decoding incoming v2 packets
From: Bjorn Andersson @ 2017-10-05  3:51 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, linux-kernel, linux-arm-msm, Chris Lew
In-Reply-To: <20171005035105.14677-1-bjorn.andersson@linaro.org>

Add the necessary logic for decoding incoming messages of version 2 as
well. Also make sure there's room for the bigger of version 1 and 2
headers in the code allocating skbs for outgoing messages.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 net/qrtr/qrtr.c | 132 ++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 94 insertions(+), 38 deletions(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 7bca6ec892a5..8bb3e2bb5d0a 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,14 +20,15 @@
 
 #include "qrtr.h"
 
-#define QRTR_PROTO_VER 1
+#define QRTR_PROTO_VER_1 1
+#define QRTR_PROTO_VER_2 3
 
 /* auto-bind range */
 #define QRTR_MIN_EPH_SOCKET 0x4000
 #define QRTR_MAX_EPH_SOCKET 0x7fff
 
 /**
- * struct qrtr_hdr - (I|R)PCrouter packet header
+ * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
  * @version: protocol version
  * @type: packet type; one of QRTR_TYPE_*
  * @src_node_id: source node
@@ -37,7 +38,7 @@
  * @dst_node_id: destination node
  * @dst_port_id: destination port
  */
-struct qrtr_hdr {
+struct qrtr_hdr_v1 {
 	__le32 version;
 	__le32 type;
 	__le32 src_node_id;
@@ -48,6 +49,32 @@ struct qrtr_hdr {
 	__le32 dst_port_id;
 } __packed;
 
+/**
+ * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
+ * @version: protocol version
+ * @type: packet type; one of QRTR_TYPE_*
+ * @flags: bitmask of QRTR_FLAGS_*
+ * @optlen: length of optional header data
+ * @size: length of packet, excluding this header and optlen
+ * @src_node_id: source node
+ * @src_port_id: source port
+ * @dst_node_id: destination node
+ * @dst_port_id: destination port
+ */
+struct qrtr_hdr_v2 {
+	u8 version;
+	u8 type;
+	u8 flags;
+	u8 optlen;
+	__le32 size;
+	__le16 src_node_id;
+	__le16 src_port_id;
+	__le16 dst_node_id;
+	__le16 dst_port_id;
+} __packed;
+
+#define QRTR_FLAGS_CONFIRM_RX	BIT(0)
+
 struct qrtr_cb {
 	u32 src_node;
 	u32 src_port;
@@ -58,7 +85,8 @@ struct qrtr_cb {
 	u8 confirm_rx;
 };
 
-#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
+#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
+					sizeof(struct qrtr_hdr_v2))
 
 struct qrtr_sock {
 	/* WARNING: sk must be the first member */
@@ -154,12 +182,12 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
 			     int type, struct sockaddr_qrtr *from,
 			     struct sockaddr_qrtr *to)
 {
-	struct qrtr_hdr *hdr;
+	struct qrtr_hdr_v1 *hdr;
 	size_t len = skb->len;
 	int rc = -ENODEV;
 
-	hdr = skb_push(skb, QRTR_HDR_SIZE);
-	hdr->version = cpu_to_le32(QRTR_PROTO_VER);
+	hdr = skb_push(skb, sizeof(*hdr));
+	hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
 	hdr->type = cpu_to_le32(type);
 	hdr->src_node_id = cpu_to_le32(from->sq_node);
 	hdr->src_port_id = cpu_to_le32(from->sq_port);
@@ -224,52 +252,80 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
 int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 {
 	struct qrtr_node *node = ep->node;
-	const struct qrtr_hdr *phdr = data;
+	const struct qrtr_hdr_v1 *v1;
+	const struct qrtr_hdr_v2 *v2;
 	struct sk_buff *skb;
 	struct qrtr_cb *cb;
-	unsigned int psize;
 	unsigned int size;
-	unsigned int type;
 	unsigned int ver;
-	unsigned int dst;
+	size_t hdrlen;
 
-	if (len < QRTR_HDR_SIZE || len & 3)
+	if (len & 3)
 		return -EINVAL;
 
-	ver = le32_to_cpu(phdr->version);
-	size = le32_to_cpu(phdr->size);
-	type = le32_to_cpu(phdr->type);
-	dst = le32_to_cpu(phdr->dst_port_id);
+	skb = netdev_alloc_skb(NULL, len);
+	if (!skb)
+		return -ENOMEM;
 
-	psize = (size + 3) & ~3;
+	cb = (struct qrtr_cb *)skb->cb;
 
-	if (ver != QRTR_PROTO_VER)
-		return -EINVAL;
+	/* Version field in v1 is little endian, so this works for both cases */
+	ver = *(u8*)data;
 
-	if (len != psize + QRTR_HDR_SIZE)
-		return -EINVAL;
+	switch (ver) {
+	case QRTR_PROTO_VER_1:
+		v1 = data;
+		hdrlen = sizeof(*v1);
 
-	if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
-		return -EINVAL;
+		cb->type = le32_to_cpu(v1->type);
+		cb->src_node = le32_to_cpu(v1->src_node_id);
+		cb->src_port = le32_to_cpu(v1->src_port_id);
+		cb->confirm_rx = !!v1->confirm_rx;
+		cb->dst_node = le32_to_cpu(v1->dst_node_id);
+		cb->dst_port = le32_to_cpu(v1->dst_port_id);
 
-	skb = netdev_alloc_skb(NULL, len);
-	if (!skb)
-		return -ENOMEM;
+		size = le32_to_cpu(v1->size);
+		break;
+	case QRTR_PROTO_VER_2:
+		v2 = data;
+		hdrlen = sizeof(*v2) + v2->optlen;
+
+		cb->type = v2->type;
+		cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
+		cb->src_node = le16_to_cpu(v2->src_node_id);
+		cb->src_port = le16_to_cpu(v2->src_port_id);
+		cb->dst_node = le16_to_cpu(v2->dst_node_id);
+		cb->dst_port = le16_to_cpu(v2->dst_port_id);
+
+		if (cb->src_port == (u16)QRTR_PORT_CTRL)
+			cb->src_port = QRTR_PORT_CTRL;
+		if (cb->dst_port == (u16)QRTR_PORT_CTRL)
+			cb->dst_port = QRTR_PORT_CTRL;
+
+		size = le32_to_cpu(v2->size);
+		break;
+	default:
+		pr_err("qrtr: Invalid version %d\n", ver);
+		goto err;
+	}
 
-	cb = (struct qrtr_cb *)skb->cb;
-	cb->src_node = le32_to_cpu(phdr->src_node_id);
-	cb->src_port = le32_to_cpu(phdr->src_port_id);
-	cb->dst_node = le32_to_cpu(phdr->dst_node_id);
-	cb->dst_port = le32_to_cpu(phdr->dst_port_id);
-	cb->type = type;
-	cb->confirm_rx = !!phdr->confirm_rx;
+	if (len != ALIGN(size, 4) + hdrlen)
+		goto err;
 
-	skb_put_data(skb, data + QRTR_HDR_SIZE, size);
+	if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
+		goto err;
+
+	skb_put_data(skb, data + hdrlen, size);
 
 	skb_queue_tail(&node->rx_queue, skb);
 	schedule_work(&node->work);
 
 	return 0;
+
+err:
+	kfree_skb(skb);
+	return -EINVAL;
+
 }
 EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
 
@@ -287,11 +343,11 @@ static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
 	const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
 	struct sk_buff *skb;
 
-	skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
+	skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
 	if (!skb)
 		return NULL;
 
-	skb_reserve(skb, QRTR_HDR_SIZE);
+	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
 	*pkt = skb_put_zero(skb, pkt_len);
 
 	return skb;
@@ -720,12 +776,12 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	plen = (len + 3) & ~3;
-	skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
+	skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
 				  msg->msg_flags & MSG_DONTWAIT, &rc);
 	if (!skb)
 		goto out_node;
 
-	skb_reserve(skb, QRTR_HDR_SIZE);
+	skb_reserve(skb, QRTR_HDR_MAX_SIZE);
 
 	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc) {
-- 
2.12.0

^ permalink raw reply related

* 3% Interest Rate
From: Vancity Loan Firm @ 2017-10-05  3:29 UTC (permalink / raw)


We can help you with a genuine loan to meet your needs.
Do you need a personal or business loan without stress and quick approval?
Do you need an urgent loan today? No Credit Checks

* LOAN APPROVAL IN 60MINS !!
* GUARANTEED SAME DAY TRANSFER !!
* 100% APPROVAL RATE !!
*LOW INTEREST RATE !!

Contact US for more information about loan offer and we will solve your
financial problem.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox