Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 1/6] net/ipv4: Update ip_tunnel_metadata_cnt static key to modern api
From: Davidlohr Bueso @ 2018-05-08 16:06 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso
In-Reply-To: <20180508160703.8125-1-dave@stgolabs.net>

No changes in refcount semantics -- key init is false; replace

static_key_slow_inc|dec   with   static_branch_inc|dec
static_key_false          with   static_branch_unlikely

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 include/net/ip_tunnels.h  | 4 ++--
 net/ipv4/ip_tunnel_core.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 751646adc769..90ff430f5e9d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -477,12 +477,12 @@ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstat
 	return (struct ip_tunnel_info *)lwtstate->data;
 }
 
-extern struct static_key ip_tunnel_metadata_cnt;
+DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
 
 /* Returns > 0 if metadata should be collected */
 static inline int ip_tunnel_collect_metadata(void)
 {
-	return static_key_false(&ip_tunnel_metadata_cnt);
+	return static_branch_unlikely(&ip_tunnel_metadata_cnt);
 }
 
 void __init ip_tunnel_core_init(void);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 2f39479be92f..dde671e97829 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -423,17 +423,17 @@ void __init ip_tunnel_core_init(void)
 	lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
 }
 
-struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
 EXPORT_SYMBOL(ip_tunnel_metadata_cnt);
 
 void ip_tunnel_need_metadata(void)
 {
-	static_key_slow_inc(&ip_tunnel_metadata_cnt);
+	static_branch_inc(&ip_tunnel_metadata_cnt);
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_need_metadata);
 
 void ip_tunnel_unneed_metadata(void)
 {
-	static_key_slow_dec(&ip_tunnel_metadata_cnt);
+	static_branch_dec(&ip_tunnel_metadata_cnt);
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
-- 
2.13.6

^ permalink raw reply related

* [PATCH 2/6] net/sock: Update memalloc_socks static key to modern api
From: Davidlohr Bueso @ 2018-05-08 16:06 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso
In-Reply-To: <20180508160703.8125-1-dave@stgolabs.net>

No changes in refcount semantics -- key init is false; replace

static_key_slow_inc|dec   with   static_branch_inc|dec
static_key_false          with   static_branch_unlikely

Added a '_key' suffix to memalloc_socks, for better self
documentation.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 include/net/sock.h | 4 ++--
 net/core/sock.c    | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 3c568b36ee36..4f7c584e9765 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -808,10 +808,10 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)
 }
 
 #ifdef CONFIG_NET
-extern struct static_key memalloc_socks;
+DECLARE_STATIC_KEY_FALSE(memalloc_socks_key);
 static inline int sk_memalloc_socks(void)
 {
-	return static_key_false(&memalloc_socks);
+	return static_branch_unlikely(&memalloc_socks_key);
 }
 #else
 
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d8b6c955c6..042cfc612660 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -327,8 +327,8 @@ EXPORT_SYMBOL(sysctl_optmem_max);
 
 int sysctl_tstamp_allow_data __read_mostly = 1;
 
-struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
-EXPORT_SYMBOL_GPL(memalloc_socks);
+DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
+EXPORT_SYMBOL_GPL(memalloc_socks_key);
 
 /**
  * sk_set_memalloc - sets %SOCK_MEMALLOC
@@ -342,7 +342,7 @@ void sk_set_memalloc(struct sock *sk)
 {
 	sock_set_flag(sk, SOCK_MEMALLOC);
 	sk->sk_allocation |= __GFP_MEMALLOC;
-	static_key_slow_inc(&memalloc_socks);
+	static_branch_inc(&memalloc_socks_key);
 }
 EXPORT_SYMBOL_GPL(sk_set_memalloc);
 
@@ -350,7 +350,7 @@ void sk_clear_memalloc(struct sock *sk)
 {
 	sock_reset_flag(sk, SOCK_MEMALLOC);
 	sk->sk_allocation &= ~__GFP_MEMALLOC;
-	static_key_slow_dec(&memalloc_socks);
+	static_branch_dec(&memalloc_socks_key);
 
 	/*
 	 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
-- 
2.13.6

^ permalink raw reply related

* [PATCH 3/6] net: Update [e/in]gress_needed static key to modern api
From: Davidlohr Bueso @ 2018-05-08 16:07 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso
In-Reply-To: <20180508160703.8125-1-dave@stgolabs.net>

No changes in semantics -- key init is false; replace

static_key_slow_inc|dec   with   static_branch_inc|dec
static_key_false          with   static_branch_unlikely

Added a '_key' suffix to both ingress_needed and egress_needed,
for better self documentation.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 net/core/dev.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 019cb1fc1dde..64483afb7b90 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1755,33 +1755,33 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
 #ifdef CONFIG_NET_INGRESS
-static struct static_key ingress_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
 
 void net_inc_ingress_queue(void)
 {
-	static_key_slow_inc(&ingress_needed);
+	static_branch_inc(&ingress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
 
 void net_dec_ingress_queue(void)
 {
-	static_key_slow_dec(&ingress_needed);
+	static_branch_dec(&ingress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
 #endif
 
 #ifdef CONFIG_NET_EGRESS
-static struct static_key egress_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
 
 void net_inc_egress_queue(void)
 {
-	static_key_slow_inc(&egress_needed);
+	static_branch_inc(&egress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_inc_egress_queue);
 
 void net_dec_egress_queue(void)
 {
-	static_key_slow_dec(&egress_needed);
+	static_branch_dec(&egress_needed_key);
 }
 EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 #endif
@@ -3514,7 +3514,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_at_ingress = 0;
 # ifdef CONFIG_NET_EGRESS
-	if (static_key_false(&egress_needed)) {
+	if (static_branch_unlikely(&egress_needed_key)) {
 		skb = sch_handle_egress(skb, &rc, dev);
 		if (!skb)
 			goto out;
@@ -4548,7 +4548,7 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 
 skip_taps:
 #ifdef CONFIG_NET_INGRESS
-	if (static_key_false(&ingress_needed)) {
+	if (static_branch_unlikely(&ingress_needed_key)) {
 		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
 		if (!skb)
 			goto out;
-- 
2.13.6

^ permalink raw reply related

* [PATCH 4/6] net: Update netstamp_needed static key to modern api
From: Davidlohr Bueso @ 2018-05-08 16:07 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso
In-Reply-To: <20180508160703.8125-1-dave@stgolabs.net>

No changes in refcount semantics -- key init is false; replace

static_key_slow_inc|dec   with   static_branch_inc|dec
static_key_false          with   static_branch_unlikely

Added a '_key' suffix to netstamp_needed, for better self
documentation.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 net/core/dev.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 64483afb7b90..668be88b5308 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1786,7 +1786,7 @@ void net_dec_egress_queue(void)
 EXPORT_SYMBOL_GPL(net_dec_egress_queue);
 #endif
 
-static struct static_key netstamp_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
 #ifdef HAVE_JUMP_LABEL
 static atomic_t netstamp_needed_deferred;
 static atomic_t netstamp_wanted;
@@ -1797,9 +1797,9 @@ static void netstamp_clear(struct work_struct *work)
 
 	wanted = atomic_add_return(deferred, &netstamp_wanted);
 	if (wanted > 0)
-		static_key_enable(&netstamp_needed);
+		static_branch_enable(&netstamp_needed_key);
 	else
-		static_key_disable(&netstamp_needed);
+		static_branch_disable(&netstamp_needed_key);
 }
 static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
@@ -1819,7 +1819,7 @@ void net_enable_timestamp(void)
 	atomic_inc(&netstamp_needed_deferred);
 	schedule_work(&netstamp_work);
 #else
-	static_key_slow_inc(&netstamp_needed);
+	static_branch_inc(&netstamp_needed_key);
 #endif
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1839,7 +1839,7 @@ void net_disable_timestamp(void)
 	atomic_dec(&netstamp_needed_deferred);
 	schedule_work(&netstamp_work);
 #else
-	static_key_slow_dec(&netstamp_needed);
+	static_branch_dec(&netstamp_needed_key);
 #endif
 }
 EXPORT_SYMBOL(net_disable_timestamp);
@@ -1847,15 +1847,15 @@ EXPORT_SYMBOL(net_disable_timestamp);
 static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	skb->tstamp = 0;
-	if (static_key_false(&netstamp_needed))
+	if (static_branch_unlikely(&netstamp_needed_key))
 		__net_timestamp(skb);
 }
 
-#define net_timestamp_check(COND, SKB)			\
-	if (static_key_false(&netstamp_needed)) {		\
-		if ((COND) && !(SKB)->tstamp)	\
-			__net_timestamp(SKB);		\
-	}						\
+#define net_timestamp_check(COND, SKB)				\
+	if (static_branch_unlikely(&netstamp_needed_key)) {	\
+		if ((COND) && !(SKB)->tstamp)			\
+			__net_timestamp(SKB);			\
+	}							\
 
 bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
 {
-- 
2.13.6

^ permalink raw reply related

* [PATCH 5/6] net: Update generic_xdp_needed static key to modern api
From: Davidlohr Bueso @ 2018-05-08 16:07 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso
In-Reply-To: <20180508160703.8125-1-dave@stgolabs.net>

No changes in refcount semantics -- key init is false; replace

static_key_slow_inc|dec   with   static_branch_inc|dec
static_key_false          with   static_branch_unlikely

Added a '_key' suffix to generic_xdp_needed, for better self
documentation.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 net/core/dev.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 668be88b5308..1a8c0bb44e28 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4136,7 +4136,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
 }
 EXPORT_SYMBOL_GPL(generic_xdp_tx);
 
-static struct static_key generic_xdp_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
 
 int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
 {
@@ -4176,7 +4176,7 @@ static int netif_rx_internal(struct sk_buff *skb)
 
 	trace_netif_rx(skb);
 
-	if (static_key_false(&generic_xdp_needed)) {
+	if (static_branch_unlikely(&generic_xdp_needed_key)) {
 		int ret;
 
 		preempt_disable();
@@ -4708,9 +4708,9 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 			bpf_prog_put(old);
 
 		if (old && !new) {
-			static_key_slow_dec(&generic_xdp_needed);
+			static_branch_dec(&generic_xdp_needed_key);
 		} else if (new && !old) {
-			static_key_slow_inc(&generic_xdp_needed);
+			static_branch_inc(&generic_xdp_needed_key);
 			dev_disable_lro(dev);
 			dev_disable_gro_hw(dev);
 		}
@@ -4738,7 +4738,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 	if (skb_defer_rx_timestamp(skb))
 		return NET_RX_SUCCESS;
 
-	if (static_key_false(&generic_xdp_needed)) {
+	if (static_branch_unlikely(&generic_xdp_needed_key)) {
 		int ret;
 
 		preempt_disable();
-- 
2.13.6

^ permalink raw reply related

* [PATCH 6/6] net/udp: Update udp_encap_needed static key to modern api
From: Davidlohr Bueso @ 2018-05-08 16:07 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, dave, Davidlohr Bueso
In-Reply-To: <20180508160703.8125-1-dave@stgolabs.net>

No changes in refcount semantics -- key init is false; replace

static_key_enable         with   static_branch_enable
static_key_slow_inc|dec   with   static_branch_inc|dec
static_key_false          with   static_branch_unlikely

Added a '_key' suffix to udp and udpv6 encap_needed, for better
self documentation.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 net/ipv4/udp.c | 8 ++++----
 net/ipv6/udp.c | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dd3102a37ef9..ea86d8832340 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1873,10 +1873,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	return 0;
 }
 
-static struct static_key udp_encap_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void)
 {
-	static_key_enable(&udp_encap_needed);
+	static_branch_enable(&udp_encap_needed_key);
 }
 EXPORT_SYMBOL(udp_encap_enable);
 
@@ -1900,7 +1900,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	nf_reset(skb);
 
-	if (static_key_false(&udp_encap_needed) && up->encap_type) {
+	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
 		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 
 		/*
@@ -2363,7 +2363,7 @@ void udp_destroy_sock(struct sock *sk)
 	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
 	unlock_sock_fast(sk, slow);
-	if (static_key_false(&udp_encap_needed) && up->encap_type) {
+	if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
 		void (*encap_destroy)(struct sock *sk);
 		encap_destroy = READ_ONCE(up->encap_destroy);
 		if (encap_destroy)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a34e28ac03a7..0056ae766d93 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -546,10 +546,10 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
 	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
 }
 
-static struct static_key udpv6_encap_needed __read_mostly;
+static DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
 void udpv6_encap_enable(void)
 {
-	static_key_enable(&udpv6_encap_needed);
+	static_branch_enable(&udpv6_encap_needed_key);
 }
 EXPORT_SYMBOL(udpv6_encap_enable);
 
@@ -561,7 +561,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto drop;
 
-	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+	if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
 		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
 
 		/*
@@ -1427,7 +1427,7 @@ void udpv6_destroy_sock(struct sock *sk)
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
-	if (static_key_false(&udpv6_encap_needed) && up->encap_type) {
+	if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
 		void (*encap_destroy)(struct sock *sk);
 		encap_destroy = READ_ONCE(up->encap_destroy);
 		if (encap_destroy)
-- 
2.13.6

^ permalink raw reply related

* [PATCH 1/2] sunrpc: handle ENOMEM in rpcb_getport_async
From: bfields @ 2018-05-08 16:09 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com,
	syzkaller-bugs@googlegroups.com, anna.schumaker@netapp.com,
	davem@davemloft.net, linux-kernel@vger.kernel.org,
	linux-nfs@vger.kernel.org, jlayton@kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <1524002074.63751.5.camel@hammer.space>

From: "J. Bruce Fields" <bfields@redhat.com>

If we ignore the error we'll hit a null dereference a little later.

Reported-by: syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/rpcb_clnt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c526f8fb37c9..82c120e51d64 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -771,6 +771,12 @@ void rpcb_getport_async(struct rpc_task *task)
 	case RPCBVERS_3:
 		map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
 		map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
+		if (!map->r_addr) {
+			status = -ENOMEM;
+			dprintk("RPC: %5u %s: no memory available\n",
+				task->tk_pid, __func__);
+			goto bailout_free_args;
+		}
 		map->r_owner = "";
 		break;
 	case RPCBVERS_2:
@@ -793,6 +799,8 @@ void rpcb_getport_async(struct rpc_task *task)
 	rpc_put_task(child);
 	return;
 
+bailout_free_args:
+	kfree(map);
 bailout_release_client:
 	rpc_release_client(rpcb_clnt);
 bailout_nofree:
-- 
2.17.0

^ permalink raw reply related

* Re: general protection fault in encode_rpcb_string
From: bfields @ 2018-05-08 16:11 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com,
	syzkaller-bugs@googlegroups.com, anna.schumaker@netapp.com,
	davem@davemloft.net, linux-kernel@vger.kernel.org,
	linux-nfs@vger.kernel.org, jlayton@kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <1524002074.63751.5.camel@hammer.space>

From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 8 May 2018 11:47:03 -0400
Subject: [PATCH 2/2] sunrpc: convert unnecessary GFP_ATOMIC to GFP_NOFS

It's OK to sleep here, we just don't want to recurse into the filesystem
as this writeout could be waiting on this.

As a next step: the documentation for GFP_NOFS says "Please try to avoid
using this flag directly and instead use memalloc_nofs_{save,restore} to
mark the whole scope which cannot/shouldn't recurse into the FS layer
with a short explanation why. All allocation requests will inherit
GFP_NOFS implicitly."

But I'm not sure where to do this.  Should the workqueue could be
arranging that for us in the case of workqueues created with
WQ_MEM_RECLAIM?

Reported-by: Trond Myklebust <trondmy@hammer.space>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/rpcb_clnt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

On Tue, Apr 17, 2018 at 09:54:36PM +0000, Trond Myklebust wrote:
> Yes, and we can probably convert it, and the other GFP_ATOMIC
> allocations in the rpcbind client to use GFP_NOFS in order to improve
> reliability.

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 82c120e51d64..576e84a1adee 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -752,7 +752,7 @@ void rpcb_getport_async(struct rpc_task *task)
 		goto bailout_nofree;
 	}
 
-	map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
+	map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS);
 	if (!map) {
 		status = -ENOMEM;
 		dprintk("RPC: %5u %s: no memory available\n",
@@ -770,7 +770,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	case RPCBVERS_4:
 	case RPCBVERS_3:
 		map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
-		map->r_addr = rpc_sockaddr2uaddr(sap, GFP_ATOMIC);
+		map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS);
 		if (!map->r_addr) {
 			status = -ENOMEM;
 			dprintk("RPC: %5u %s: no memory available\n",
-- 
2.17.0

^ permalink raw reply related

* Re: general protection fault in encode_rpcb_string
From: bfields @ 2018-05-08 16:15 UTC (permalink / raw)
  To: Trond Myklebust
  Cc: syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com,
	syzkaller-bugs@googlegroups.com, anna.schumaker@netapp.com,
	davem@davemloft.net, linux-kernel@vger.kernel.org,
	linux-nfs@vger.kernel.org, jlayton@kernel.org,
	netdev@vger.kernel.org, Chuck Lever
In-Reply-To: <1524002074.63751.5.camel@hammer.space>

On Tue, Apr 17, 2018 at 09:54:36PM +0000, Trond Myklebust wrote:
> Yes, and we can probably convert it, and the other GFP_ATOMIC
> allocations in the rpcbind client to use GFP_NOFS in order to improve
> reliability.

Chuck, I think the GFP_ATOMIC is unnecessary here as well?

--b.

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e8adad33d0bb..de90c6c90cde 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -228,7 +228,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
 			/* XXX: Certain upper layer operations do
 			 *	not provide receive buffer pages.
 			 */
-			*ppages = alloc_page(GFP_ATOMIC);
+			*ppages = alloc_page(GFP_NOFS);
 			if (!*ppages)
 				return -EAGAIN;
 		}

^ permalink raw reply related

* Re: [RFC PATCH 1/3] arcnet: com20020: Add memory map of com20020
From: Rob Herring @ 2018-05-08 16:16 UTC (permalink / raw)
  To: Andrea Greco
  Cc: m.grzeschik, Andrea Greco, Mark Rutland, netdev, devicetree,
	linux-kernel
In-Reply-To: <20180505213448.8180-1-andrea.greco.gapmilano@gmail.com>

On Sat, May 05, 2018 at 11:34:45PM +0200, Andrea Greco wrote:
> From: Andrea Greco <a.greco@4sigma.it>
> 
> Add support for com20022I/com20020, memory mapped chip version.
> Support bus: Intel 80xx and Motorola 68xx.
> Bus size: Only 8 bit bus size is supported.
> Added related device tree bindings
> 
> Signed-off-by: Andrea Greco <a.greco@4sigma.it>
> ---
>  .../devicetree/bindings/net/smsc-com20020.txt      |  23 +++

Please split bindings to separate patch.

>  drivers/net/arcnet/Kconfig                         |  12 +-
>  drivers/net/arcnet/Makefile                        |   1 +
>  drivers/net/arcnet/arcdevice.h                     |  27 ++-
>  drivers/net/arcnet/com20020-membus.c               | 191 +++++++++++++++++++++
>  drivers/net/arcnet/com20020.c                      |   9 +-
>  6 files changed, 253 insertions(+), 10 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/net/smsc-com20020.txt
>  create mode 100644 drivers/net/arcnet/com20020-membus.c
> 
> diff --git a/Documentation/devicetree/bindings/net/smsc-com20020.txt b/Documentation/devicetree/bindings/net/smsc-com20020.txt
> new file mode 100644
> index 000000000000..39c5b19c55af
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/smsc-com20020.txt
> @@ -0,0 +1,23 @@
> +SMSC com20020, com20022I

What does this device do?

> +
> +timeout: Arcnet timeout, checkout datashet
> +clockp: Clock Prescaler, checkout datashet

s/datashet/datasheet/

> +clockm: Clock multiplier, checkout datasheet

Would these 3 properties be common for arcnet devices? If not, then they 
should have a vendor prefix.

> +
> +phy-reset-gpios: Chip reset ppin

Use 'reset-gpios' as that is standard.

> +phy-irq-gpios: Chip irq pin

Use 'interrupts'. Interrupt capable gpio controllers are also interrupt 
controllers.

> +
> +com20020_A@0 {

Node names should be generic based on the class of device. I don't think 
we have one defined, but how about 'arcnet'.

Unit addresses must have a corresponding reg property. How is this 
device accessed?

> +    compatible = "smsc,com20020";

Not documented.

> +
> +	timeout	= <0x3>;
> +	backplane = <0x0>;
> +
> +	clockp = <0x0>;
> +	clockm = <0x3>;
> +
> +	phy-reset-gpios = <&gpio3 21 GPIO_ACTIVE_LOW>;
> +	phy-irq-gpios = <&gpio2 10 GPIO_ACTIVE_LOW>;
> +
> +	status = "okay";

Don't should status in examples.

> +};

^ permalink raw reply

* Re: [net PATCH v2] net: sched, fix OOO packets with pfifo_fast
From: Paolo Abeni @ 2018-05-08 16:17 UTC (permalink / raw)
  To: John Fastabend, Cong Wang, Jamal Hadi Salim
  Cc: Eric Dumazet, Jiri Pirko, David Miller,
	Linux Kernel Network Developers
In-Reply-To: <36a89ed1-d6ff-ddad-c736-4e68909d61c4@gmail.com>

Hi all,

I'm still crashing my head on this item...

On Wed, 2018-04-18 at 09:44 -0700, John Fastabend wrote:
> There is a set of conditions
> that if met we can run without the lock. Possibly ONETXQUEUE and
> aligned cpu_map is sufficient. We could detect this case and drop
> the locking. For existing systems and high Gbps NICs I think (feel
> free to correct me) assuming a core per cpu is OK. At some point
> though we probably need to revisit this assumption.

I think we can improve measurably moving the __QDISC_STATE_RUNNING bit
fiddling around the __qdisc_run() call in the 'lockless' path, instead
of keeping it inside __qdisc_restart().

Currently, in the single sender, pkt rate below link-limit scenario we
hit the atomic bit overhead twice per xmitted packet: one for each
dequeue, plus another one for the next, failing, dequeue attempt. With
the wider scope we will hit it always only once.

After that change __QDISC_STATE_RUNNING usage will look a bit like
qdisc_lock(), for the dequeue part at least. So I'm wondering if we
could replace __QDISC_STATE_RUNNING with spin_trylock(qdisc_lock())
_and_ keep such lock held for the whole qdisc_run() !?! 

The comment above qdisc_restart() states clearly we can't, but I don't
see why !?! Acquiring qdisc_lock() and xmit lock always in the given
sequence looks safe to me. Can someone please explain? Is there some
possible deathlock condition I'm missing ?!?

It looks like the comment itself cames directly from the pre-bitkeeper
era (modulo locks name change).

Performance wise, acquiring the qdisc_lock only once per xmitted packet
should improve considerably 'locked' qdisc performance, both in the
contented and in the uncontended scenario (and some quick experiments
seems to confirm that).

Thanks,

Paolo

^ permalink raw reply

* [PATCH net-next] tun: Do SIOCGSKNS out of rtnl_lock()
From: Kirill Tkhai @ 2018-05-08 16:21 UTC (permalink / raw)
  To: davem, jasowang, edumazet, mst, brouer, peterpenkov96, sd, netdev,
	ktkhai

Since net ns of tun device is assigned on the device creation,
and it never changes, we do not need to use any lock to get it
from alive tun.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 drivers/net/tun.c |   18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d3c04ab9752a..44d4f3d25350 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2850,10 +2850,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg, int ifreq_len)
 {
 	struct tun_file *tfile = file->private_data;
+	struct net *net = sock_net(&tfile->sk);
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
-	struct net *net;
 	kuid_t owner;
 	kgid_t group;
 	int sndbuf;
@@ -2877,14 +2877,18 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		 */
 		return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
 				(unsigned int __user*)argp);
-	} else if (cmd == TUNSETQUEUE)
+	} else if (cmd == TUNSETQUEUE) {
 		return tun_set_queue(file, &ifr);
+	} else if (cmd == SIOCGSKNS) {
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+		return open_related_ns(&net->ns, get_net_ns);
+	}
 
 	ret = 0;
 	rtnl_lock();
 
 	tun = tun_get(tfile);
-	net = sock_net(&tfile->sk);
 	if (cmd == TUNSETIFF) {
 		ret = -EEXIST;
 		if (tun)
@@ -2914,14 +2918,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tfile->ifindex = ifindex;
 		goto unlock;
 	}
-	if (cmd == SIOCGSKNS) {
-		ret = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto unlock;
-
-		ret = open_related_ns(&net->ns, get_net_ns);
-		goto unlock;
-	}
 
 	ret = -EBADFD;
 	if (!tun)

^ permalink raw reply related

* Re: general protection fault in encode_rpcb_string
From: Chuck Lever @ 2018-05-08 16:34 UTC (permalink / raw)
  To: Bruce Fields
  Cc: Trond Myklebust,
	syzbot+4b98281f2401ab849f4b@syzkaller.appspotmail.com,
	syzkaller-bugs@googlegroups.com, Anna Schumaker,
	davem@davemloft.net, linux-kernel@vger.kernel.org,
	Linux NFS Mailing List, jlayton@kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <20180508161529.GD6151@fieldses.org>



> On May 8, 2018, at 12:15 PM, bfields@fieldses.org wrote:
> 
> On Tue, Apr 17, 2018 at 09:54:36PM +0000, Trond Myklebust wrote:
>> Yes, and we can probably convert it, and the other GFP_ATOMIC
>> allocations in the rpcbind client to use GFP_NOFS in order to improve
>> reliability.
> 
> Chuck, I think the GFP_ATOMIC is unnecessary here as well?
> 
> --b.
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index e8adad33d0bb..de90c6c90cde 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -228,7 +228,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
> 			/* XXX: Certain upper layer operations do
> 			 *	not provide receive buffer pages.
> 			 */
> -			*ppages = alloc_page(GFP_ATOMIC);
> +			*ppages = alloc_page(GFP_NOFS);
> 			if (!*ppages)
> 				return -EAGAIN;
> 		}

This code can't sleep, as I understand it. Caller is holding
the transport write lock. This logic was copied from
xdr_partial_copy_from_skb, which uses GFP_ATOMIC.

Recall that this is here because of GETACL. As I've stated in
the past, the correct solution is to ensure that these pages
are provided in every case by the upper layer, making this
alloc_page call site unnecessary.


--
Chuck Lever
chucklever@gmail.com

^ permalink raw reply

* Performance regression between 4.13 and 4.14
From: Ben Greear @ 2018-05-08 16:44 UTC (permalink / raw)
  To: netdev

Hello,

I am trying to track down a performance regression that appears to be between 4.13
and 4.14.

I first saw the problem with a hacked version of pktgen on some ixgbe NICs.  4.13 can do
right at 10G bi-directional on two ports, and 4.14 and later can do only about 6Gbps.

I also tried with user-space UDP traffic on a stock kernel, and I can get about 3.2Gbps combined tx+rx
on 4.14 and about 4.4Gbps on 4.13.

Attempting to bisect seems to be triggering a weirdness in git, and also lots of commits
crash or do not bring up networking, which makes the bisect difficult.

Looking at perf top, it would appear that some lock is probably to blame.

Any ideas what might have been introduced during this interval that
would cause this?

Anyone else seen similar?

I'm going to attempt some more manual steps to try to find the commit that
introduces this...

Thanks,
Ben

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply

* Re: [PATCH] net: phy: DP83811: Add support for the phy
From: Andrew Lunn @ 2018-05-08 16:49 UTC (permalink / raw)
  To: Dan Murphy; +Cc: f.fainelli, netdev, linux-kernel
In-Reply-To: <1b6877db-61cf-101a-054e-9c1fbe95a813@ti.com>

On Tue, May 08, 2018 at 10:56:55AM -0500, Dan Murphy wrote:
> All
> 
> On 05/08/2018 09:11 AM, Dan Murphy wrote:
> > Add support for the DP83811 phy by extending
> > the DP83822 driver to recognize the PHY IDs.
> > 
> > The DP83811 supports both rgmii and sgmii interfaces.
> > There are 2 part numbers for this the DP83811R does not
> > reliably support the SGMII interface but the DP83811S will.
> > 
> > There is not a way to differentiate these parts from the
> > hardware or register set.  So this is controlled via the DT
> > to indicate which phy mode is required.  Or the part can be
> > strapped to a certain interface.
> > 
> > Data sheet can be found here:
> > http://www.ti.com/product/DP83TC811S-Q1/description
> > http://www.ti.com/product/DP83TC811R-Q1/description
> > 
> 
> I am withdrawing this patch for comment.
> Some of the future features have varying register definitions between the DP83811
> and DP83822

Hi Dan

It might be worth talking to the ASIC engineers and the
test/qualification engineers. There are sometime undocumented
registers for testing. You might be able to identify the exact device
from these registers.

     Andrew

^ permalink raw reply

* Re: [PATCH net-next] dt-bindings: dsa: Remove unnecessary #address/#size-cells
From: Rob Herring @ 2018-05-08 16:53 UTC (permalink / raw)
  To: Fabio Estevam
  Cc: davem, f.fainelli, andrew, netdev, devicetree, Fabio Estevam
In-Reply-To: <1525695471-19984-1-git-send-email-festevam@gmail.com>

On Mon, May 07, 2018 at 09:17:51AM -0300, Fabio Estevam wrote:
> From: Fabio Estevam <fabio.estevam@nxp.com>
> 
> If the example binding is used on a real dts file, the following DTC
> warning is seen with W=1:
>     
> arch/arm/boot/dts/imx6q-b450v3.dtb: Warning (avoid_unnecessary_addr_size): /mdio-gpio/switch@0: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property
> 
> Remove unnecessary #address-cells/#size-cells to improve the binding
> document examples.
> 
> Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
> ---
>  Documentation/devicetree/bindings/net/dsa/dsa.txt | 6 ------
>  1 file changed, 6 deletions(-)

Reviewed-by: Rob Herring <robh@kernel.org>

^ permalink raw reply

* Re: [net-next 2/6] fm10k: reduce duplicate fm10k_stat macro code
From: Joe Perches @ 2018-05-08 16:59 UTC (permalink / raw)
  To: Jeff Kirsher, davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180507144521.6979-3-jeffrey.t.kirsher@intel.com>

On Mon, 2018-05-07 at 07:45 -0700, Jeff Kirsher wrote:
> Share some of the code for setting up fm10k_stat macros by implementing
> an FM10K_STAT_FIELDS macro which we can use when setting up the type
> specific macros.
[]
> diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
[]
> @@ -11,12 +11,16 @@ struct fm10k_stats {
>  	int stat_offset;
>  };o 
>  
> -#define FM10K_NETDEV_STAT(_net_stat) { \
> -	.stat_string = #_net_stat, \
> -	.sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
> -	.stat_offset = offsetof(struct net_device_stats, _net_stat) \
> +#define FM10K_STAT_FIELDS(_type, _name, _stat) { \
> +	.stat_string = _name, \
> +	.sizeof_stat = FIELD_SIZEOF(_type, _stat), \
> +	.stat_offset = offsetof(_type, _stat) \
>  }
>  
> +/* netdevice statistics */
> +#define FM10K_NETDEV_STAT(_net_stat) \
> +	FM10K_STAT_FIELDS(struct net_device_stats, #_net_stat, _net_stat)

trivia:

It's somewhat unusual to use # in a macro argument.
Perhaps this would be slightly easier to understand using __stringify

#define FM10K_NETDEV_STAT(_net_stat) \
	FM10K_STAT_FIELDS(struct net_device_stats, __stringify(_net_stat), _net_stat)

^ permalink raw reply

* Re: [PATCH] selinux: add AF_UNSPEC and INADDR_ANY checks to selinux_socket_bind()
From: Paul Moore @ 2018-05-08 17:05 UTC (permalink / raw)
  To: Alexey Kodanev, Richard Haines
  Cc: selinux, Stephen Smalley, Eric Paris, linux-security-module,
	netdev
In-Reply-To: <1525788303-23244-1-git-send-email-alexey.kodanev@oracle.com>

On Tue, May 8, 2018 at 10:05 AM, Alexey Kodanev
<alexey.kodanev@oracle.com> wrote:
> Commit d452930fd3b9 ("selinux: Add SCTP support") breaks compatibility
> with the old programs that can pass sockaddr_in with AF_UNSPEC and
> INADDR_ANY to bind(). As a result, bind() returns EAFNOSUPPORT error.
> It was found with LTP/asapi_01 test.
>
> Similar to commit 29c486df6a20 ("net: ipv4: relax AF_INET check in
> bind()"), which relaxed AF_INET check for compatibility, add AF_UNSPEC
> case to AF_INET and make sure that the address is INADDR_ANY.
>
> Also, in the end of selinux_socket_bind(), instead of adding AF_UNSPEC
> to 'address->sa_family == AF_INET', verify AF_INET6 first.
>
> Fixes: d452930fd3b9 ("selinux: Add SCTP support")
> Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
> ---
>  security/selinux/hooks.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)

Thanks for finding and reporting this regression.

I think I would prefer to avoid having to duplicate the
AF_UNSPEC/INADDR_ANY checking logic in the SELinux hook, even though
it is a small bit of code and unlikely to change.  I'm wondering if it
would be better to check both the socket and sockaddr address family
in the main if conditional inside selinux_socket_bind(), what do you
think?  Another option would be to go back to just checking the
soackaddr address family; we moved away from that for a reason which
escapes at the moment (code cleanliness?), but perhaps that was a
mistake.

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4cafe6a19167..a3789b167667 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4577,6 +4577,7 @@ static int selinux_socket_bind(struct socket *sock, struc>
{
       struct sock *sk = sock->sk;
       u16 family;
+       u16 family_sa;
       int err;

       err = sock_has_perm(sk, SOCKET__BIND);
@@ -4585,7 +4586,9 @@ static int selinux_socket_bind(struct socket *sock, struc>

       /* If PF_INET or PF_INET6, check name_bind permission for the port. */
       family = sk->sk_family;
-       if (family == PF_INET || family == PF_INET6) {
+       family_sa = address->sa_family;
+       if ((family == PF_INET || family == PF_INET6) &&
+           (family_sa == PF_INET || family_sa == PF_INET6)) {
               char *addrp;
               struct sk_security_struct *sksec = sk->sk_security;
               struct common_audit_data ad;
@@ -4601,7 +4604,7 @@ static int selinux_socket_bind(struct socket *sock, struc>
                * need to check address->sa_family as it is possible to have
                * sk->sk_family = PF_INET6 with addr->sa_family = AF_INET.
                */
-               switch (address->sa_family) {
+               switch (family_sa) {
               case AF_INET:
                       if (addrlen < sizeof(struct sockaddr_in))
                               return -EINVAL;

> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 4cafe6a..649a3be 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -4602,10 +4602,16 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
>                  * sk->sk_family = PF_INET6 with addr->sa_family = AF_INET.
>                  */
>                 switch (address->sa_family) {
> +               case AF_UNSPEC:
>                 case AF_INET:
>                         if (addrlen < sizeof(struct sockaddr_in))
>                                 return -EINVAL;
>                         addr4 = (struct sockaddr_in *)address;
> +
> +                       if (address->sa_family == AF_UNSPEC &&
> +                           addr4->sin_addr.s_addr != htonl(INADDR_ANY))
> +                               return -EAFNOSUPPORT;
> +
>                         snum = ntohs(addr4->sin_port);
>                         addrp = (char *)&addr4->sin_addr.s_addr;
>                         break;
> @@ -4681,10 +4687,10 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
>                 ad.u.net->sport = htons(snum);
>                 ad.u.net->family = family;
>
> -               if (address->sa_family == AF_INET)
> -                       ad.u.net->v4info.saddr = addr4->sin_addr.s_addr;
> -               else
> +               if (address->sa_family == AF_INET6)
>                         ad.u.net->v6info.saddr = addr6->sin6_addr;
> +               else
> +                       ad.u.net->v4info.saddr = addr4->sin_addr.s_addr;
>
>                 err = avc_has_perm(&selinux_state,
>                                    sksec->sid, sid,
> --
> 1.8.3.1
>

-- 
paul moore
www.paul-moore.com

^ permalink raw reply related

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
From: Eric Dumazet @ 2018-05-08 17:07 UTC (permalink / raw)
  To: Alexander Duyck, Tom Herbert
  Cc: Eric Dumazet, Amritha Nambiar, netdev, David Miller,
	Alexander Duyck, Samudrala, Sridhar, Hannes Frederic Sowa
In-Reply-To: <CAKgT0UdtjZeY8-vw-BGKW-QRqqpkJSS83VUM9Dgueh312h1omg@mail.gmail.com>



On 05/08/2018 09:02 AM, Alexander Duyck wrote:
> On Tue, May 8, 2018 at 8:15 AM, Tom Herbert <tom@herbertland.com> wrote:
>> On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
>>> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
>>> wrote:
>>>
>>>> This patch series implements support for Tx queue selection based on
>>>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>>>> using sysfs attribute. If the user configuration for Rx queues does
>>>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>>>> finally to hashing.
>>>
>>>> XPS is refactored to support Tx queue selection based on either the
>>>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>>>> enabled. By default no receive queues are configured for the Tx queue.
>>>
>>>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>>>
>>>> This is to enable sending packets on the same Tx-Rx queue pair as this
>>>> is useful for busy polling multi-threaded workloads where it is not
>>>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>>>> patch for symmetric queueing via socket option:
>>>> https://www.spinics.net/lists/netdev/msg453106.html
>>>
>> I suspect this is an artifact of flow director which I believe
>> required queue pairs to be able to work (i.e. receive queue chose
>> hardware is determined send queue). But that was only required because
>> of hardware design, I don't see the rationale for introducing queue
>> pairs in the software stack. There's no need to correlate the transmit
>> path with receive path, no need to enforce a 1-1 mapping between RX
>> and TX queues, and the OOO mitigations should be sufficient when TX
>> queue changes for a flow.
>>
>> Tom
> 
> If I am not mistaken I think there are benefits to doing this sort of
> thing with polling as it keeps the Tx work locked into the same queue
> pair that a given application is polling on. So as a result you can
> keep the interrupts contained to the queue pair that is being busy
> polled on and if the application cleans up the packets during the busy
> poll it ends up being a net savings in terms of both latency and power
> since the Tx clean-up happens sooner, and it happens on the queue that
> is already busy polling instead of possibly triggering an interrupt on
> another CPU.
> 
> So for example in the case of routing and bridging workloads we
> already had code that would take the Rx queue and associate it to a Tx
> queue. One of the ideas behind doing this is to try and keep the CPU
> overhead low by having a 1:1 mapping. In the case of this code we
> allow for a little more flexibility in that you could have
> many-to-many mappings but the general idea and common use case is the
> same which is a 1:1 mapping.


I thought we had everything in place to be able to have this already.

Setting IRQ affinities and XPS is certainly something doable.

This is why I wanted a proper documentation of yet another way to reach the
same behavior.

^ permalink raw reply

* Re: Performance regression between 4.13 and 4.14
From: Eric Dumazet @ 2018-05-08 17:10 UTC (permalink / raw)
  To: Ben Greear, netdev
In-Reply-To: <9115910b-dd8b-e7f3-be53-f739b8382032@candelatech.com>



On 05/08/2018 09:44 AM, Ben Greear wrote:
> Hello,
> 
> I am trying to track down a performance regression that appears to be between 4.13
> and 4.14.
> 
> I first saw the problem with a hacked version of pktgen on some ixgbe NICs.  4.13 can do
> right at 10G bi-directional on two ports, and 4.14 and later can do only about 6Gbps.
> 
> I also tried with user-space UDP traffic on a stock kernel, and I can get about 3.2Gbps combined tx+rx
> on 4.14 and about 4.4Gbps on 4.13.
> 
> Attempting to bisect seems to be triggering a weirdness in git, and also lots of commits
> crash or do not bring up networking, which makes the bisect difficult.
> 
> Looking at perf top, it would appear that some lock is probably to blame.


perf record -a -g -e cycles:pp sleep 5
perf report

Then you'll be able to tell us which lock (or call graph) is killing your perf.

^ permalink raw reply

* Re: [PATCH] net: phy: DP83811: Add support for the phy
From: Dan Murphy @ 2018-05-08 17:13 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: f.fainelli, netdev, linux-kernel
In-Reply-To: <20180508164944.GE2888@lunn.ch>

Andrew

On 05/08/2018 11:49 AM, Andrew Lunn wrote:
> On Tue, May 08, 2018 at 10:56:55AM -0500, Dan Murphy wrote:
>> All
>>
>> On 05/08/2018 09:11 AM, Dan Murphy wrote:
>>> Add support for the DP83811 phy by extending
>>> the DP83822 driver to recognize the PHY IDs.
>>>
>>> The DP83811 supports both rgmii and sgmii interfaces.
>>> There are 2 part numbers for this the DP83811R does not
>>> reliably support the SGMII interface but the DP83811S will.
>>>
>>> There is not a way to differentiate these parts from the
>>> hardware or register set.  So this is controlled via the DT
>>> to indicate which phy mode is required.  Or the part can be
>>> strapped to a certain interface.
>>>
>>> Data sheet can be found here:
>>> http://www.ti.com/product/DP83TC811S-Q1/description
>>> http://www.ti.com/product/DP83TC811R-Q1/description
>>>
>>
>> I am withdrawing this patch for comment.
>> Some of the future features have varying register definitions between the DP83811
>> and DP83822
> 
> Hi Dan
> 
> It might be worth talking to the ASIC engineers and the
> test/qualification engineers. There are sometime undocumented
> registers for testing. You might be able to identify the exact device
> from these registers.
> 

Thanks.  I talked to them prior to submitting this patch about determining which part is on the board.
I will ping them again and poke a little harder.

It turns out that we will probably need a new driver for this part anyway as there are
additional features that need to be supported that the 811 just does not support.

They want to support Master/Slave configurations.
The 822 supports fiber and eee while the 811 does not.
The 811 supports the IEEE802.3bw specific fields in MMD1 and MMD3, which are not in the 822.
The 811 does not support auto-negotiation on the MDI so all the auto-neg registers are invalid for the 811 but are valid for the 822.

Our Customer engineers felt that combining these two devices into a single driver may confuse the customer.

Dan

>      Andrew
> 

-- 
------------------
Dan Murphy

^ permalink raw reply

* Re: [PATCH] net: phy: DP83811: Add support for the phy
From: Florian Fainelli @ 2018-05-08 17:14 UTC (permalink / raw)
  To: Dan Murphy, Andrew Lunn; +Cc: netdev, linux-kernel
In-Reply-To: <f4cfe325-f462-e965-3bd8-629acf460fbf@ti.com>

On 05/08/2018 10:13 AM, Dan Murphy wrote:
> Andrew
> 
> On 05/08/2018 11:49 AM, Andrew Lunn wrote:
>> On Tue, May 08, 2018 at 10:56:55AM -0500, Dan Murphy wrote:
>>> All
>>>
>>> On 05/08/2018 09:11 AM, Dan Murphy wrote:
>>>> Add support for the DP83811 phy by extending
>>>> the DP83822 driver to recognize the PHY IDs.
>>>>
>>>> The DP83811 supports both rgmii and sgmii interfaces.
>>>> There are 2 part numbers for this the DP83811R does not
>>>> reliably support the SGMII interface but the DP83811S will.
>>>>
>>>> There is not a way to differentiate these parts from the
>>>> hardware or register set.  So this is controlled via the DT
>>>> to indicate which phy mode is required.  Or the part can be
>>>> strapped to a certain interface.
>>>>
>>>> Data sheet can be found here:
>>>> http://www.ti.com/product/DP83TC811S-Q1/description
>>>> http://www.ti.com/product/DP83TC811R-Q1/description
>>>>
>>>
>>> I am withdrawing this patch for comment.
>>> Some of the future features have varying register definitions between the DP83811
>>> and DP83822
>>
>> Hi Dan
>>
>> It might be worth talking to the ASIC engineers and the
>> test/qualification engineers. There are sometime undocumented
>> registers for testing. You might be able to identify the exact device
>> from these registers.
>>
> 
> Thanks.  I talked to them prior to submitting this patch about determining which part is on the board.
> I will ping them again and poke a little harder.
> 
> It turns out that we will probably need a new driver for this part anyway as there are
> additional features that need to be supported that the 811 just does not support.
> 
> They want to support Master/Slave configurations.
> The 822 supports fiber and eee while the 811 does not.
> The 811 supports the IEEE802.3bw specific fields in MMD1 and MMD3, which are not in the 822.
> The 811 does not support auto-negotiation on the MDI so all the auto-neg registers are invalid for the 811 but are valid for the 822.
> 
> Our Customer engineers felt that combining these two devices into a single driver may confuse the customer.

Why? Having a single Kconfig option to enable and "automatically"
gaining support for new hardware is nice.
-- 
Florian

^ permalink raw reply

* Re: [net-next PATCH 0/3] Symmetric queue selection using XPS for Rx queues
From: Alexander Duyck @ 2018-05-08 17:19 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, Eric Dumazet, Amritha Nambiar, netdev, David Miller,
	Alexander Duyck, Samudrala, Sridhar, Hannes Frederic Sowa
In-Reply-To: <439f729f-8a1d-1d2b-b59a-3f13786aab77@gmail.com>

On Tue, May 8, 2018 at 10:07 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 05/08/2018 09:02 AM, Alexander Duyck wrote:
>> On Tue, May 8, 2018 at 8:15 AM, Tom Herbert <tom@herbertland.com> wrote:
>>> On Thu, Apr 19, 2018 at 7:41 PM, Eric Dumazet <edumazet@google.com> wrote:
>>>> On Thu, Apr 19, 2018 at 6:07 PM Amritha Nambiar <amritha.nambiar@intel.com>
>>>> wrote:
>>>>
>>>>> This patch series implements support for Tx queue selection based on
>>>>> Rx queue map. This is done by configuring Rx queue map per Tx-queue
>>>>> using sysfs attribute. If the user configuration for Rx queues does
>>>>> not apply, then the Tx queue selection falls back to XPS using CPUs and
>>>>> finally to hashing.
>>>>
>>>>> XPS is refactored to support Tx queue selection based on either the
>>>>> CPU map or the Rx-queue map. The config option CONFIG_XPS needs to be
>>>>> enabled. By default no receive queues are configured for the Tx queue.
>>>>
>>>>> - /sys/class/net/eth0/queues/tx-*/xps_rxqs
>>>>
>>>>> This is to enable sending packets on the same Tx-Rx queue pair as this
>>>>> is useful for busy polling multi-threaded workloads where it is not
>>>>> possible to pin the threads to a CPU. This is a rework of Sridhar's
>>>>> patch for symmetric queueing via socket option:
>>>>> https://www.spinics.net/lists/netdev/msg453106.html
>>>>
>>> I suspect this is an artifact of flow director which I believe
>>> required queue pairs to be able to work (i.e. receive queue chose
>>> hardware is determined send queue). But that was only required because
>>> of hardware design, I don't see the rationale for introducing queue
>>> pairs in the software stack. There's no need to correlate the transmit
>>> path with receive path, no need to enforce a 1-1 mapping between RX
>>> and TX queues, and the OOO mitigations should be sufficient when TX
>>> queue changes for a flow.
>>>
>>> Tom
>>
>> If I am not mistaken I think there are benefits to doing this sort of
>> thing with polling as it keeps the Tx work locked into the same queue
>> pair that a given application is polling on. So as a result you can
>> keep the interrupts contained to the queue pair that is being busy
>> polled on and if the application cleans up the packets during the busy
>> poll it ends up being a net savings in terms of both latency and power
>> since the Tx clean-up happens sooner, and it happens on the queue that
>> is already busy polling instead of possibly triggering an interrupt on
>> another CPU.
>>
>> So for example in the case of routing and bridging workloads we
>> already had code that would take the Rx queue and associate it to a Tx
>> queue. One of the ideas behind doing this is to try and keep the CPU
>> overhead low by having a 1:1 mapping. In the case of this code we
>> allow for a little more flexibility in that you could have
>> many-to-many mappings but the general idea and common use case is the
>> same which is a 1:1 mapping.
>
>
> I thought we had everything in place to be able to have this already.
>
> Setting IRQ affinities and XPS is certainly something doable.
>
> This is why I wanted a proper documentation of yet another way to reach the
> same behavior.

IRQ affinities and XPS work for pure NAPI setups, but the problem is
you have to also do application affinity in the case of busy polling
which can provide some additional challenges since then you have to
add code in your application to associate a given queue/CPU to a given
application thread. I believe this is a way of simplifying this.

I agree on the documentation aspect. The usage of this should be well
documented as well as the why of using it.

- Alex

^ permalink raw reply

* Re: [PATCH] net: phy: DP83811: Add support for the phy
From: Dan Murphy @ 2018-05-08 17:21 UTC (permalink / raw)
  To: Florian Fainelli, Andrew Lunn; +Cc: netdev, linux-kernel
In-Reply-To: <cc5acc63-bcb5-2bda-9c9d-fd18e0a7740a@gmail.com>

Florian

On 05/08/2018 12:14 PM, Florian Fainelli wrote:
> On 05/08/2018 10:13 AM, Dan Murphy wrote:
>> Andrew
>>
>> On 05/08/2018 11:49 AM, Andrew Lunn wrote:
>>> On Tue, May 08, 2018 at 10:56:55AM -0500, Dan Murphy wrote:
>>>> All
>>>>
>>>> On 05/08/2018 09:11 AM, Dan Murphy wrote:
>>>>> Add support for the DP83811 phy by extending
>>>>> the DP83822 driver to recognize the PHY IDs.
>>>>>
>>>>> The DP83811 supports both rgmii and sgmii interfaces.
>>>>> There are 2 part numbers for this the DP83811R does not
>>>>> reliably support the SGMII interface but the DP83811S will.
>>>>>
>>>>> There is not a way to differentiate these parts from the
>>>>> hardware or register set.  So this is controlled via the DT
>>>>> to indicate which phy mode is required.  Or the part can be
>>>>> strapped to a certain interface.
>>>>>
>>>>> Data sheet can be found here:
>>>>> http://www.ti.com/product/DP83TC811S-Q1/description
>>>>> http://www.ti.com/product/DP83TC811R-Q1/description
>>>>>
>>>>
>>>> I am withdrawing this patch for comment.
>>>> Some of the future features have varying register definitions between the DP83811
>>>> and DP83822
>>>
>>> Hi Dan
>>>
>>> It might be worth talking to the ASIC engineers and the
>>> test/qualification engineers. There are sometime undocumented
>>> registers for testing. You might be able to identify the exact device
>>> from these registers.
>>>
>>
>> Thanks.  I talked to them prior to submitting this patch about determining which part is on the board.
>> I will ping them again and poke a little harder.
>>
>> It turns out that we will probably need a new driver for this part anyway as there are
>> additional features that need to be supported that the 811 just does not support.
>>
>> They want to support Master/Slave configurations.
>> The 822 supports fiber and eee while the 811 does not.
>> The 811 supports the IEEE802.3bw specific fields in MMD1 and MMD3, which are not in the 822.
>> The 811 does not support auto-negotiation on the MDI so all the auto-neg registers are invalid for the 811 but are valid for the 822.
>>
>> Our Customer engineers felt that combining these two devices into a single driver may confuse the customer.
> 
> Why? Having a single Kconfig option to enable and "automatically"
> gaining support for new hardware is nice.
> 

I do agree that it is nice to have.  But with additional features or lack of features the driver source will get pretty messy
differentiating between devices.

What I added was just the basic if 811 & SGMII check.

Dan 

-- 
------------------
Dan Murphy

^ permalink raw reply

* [PATCH] hv_netvsc: Fix net device attach on older Windows hosts
From: Mohammed Gamal @ 2018-05-08 17:40 UTC (permalink / raw)
  To: netdev, sthemmin
  Cc: kys, haiyangz, devel, vkuznets, linux-kernel, Mohammed Gamal

On older windows hosts the net_device instance is returned to
the caller of rndis_filter_device_add() without having the presence
bit set first. This would cause any subsequent calls to network device
operations (e.g. MTU change, channel change) to fail after the device
is detached once, returning -ENODEV.

Make sure we explicitly call netif_device_attach() before returning
the net_device instance to make sure the presence bit is set

Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")

Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
---
 drivers/net/hyperv/rndis_filter.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 6b127be..09a3c1d 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1287,8 +1287,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 		   rndis_device->hw_mac_adr,
 		   rndis_device->link_state ? "down" : "up");
 
-	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) {
+		netif_device_attach(net);
 		return net_device;
+	}
 
 	rndis_filter_query_link_speed(rndis_device, net_device);
 
-- 
1.8.3.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox