All of lore.kernel.org
 help / color / mirror / Atom feed
From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: pabeni@redhat.com, steffen.klassert@secunet.com,
	davem@davemloft.net, Willem de Bruijn <willemb@google.com>
Subject: [PATCH net-next RFC 6/8] net: make gro configurable
Date: Fri, 14 Sep 2018 13:59:39 -0400	[thread overview]
Message-ID: <20180914175941.213950-7-willemdebruijn.kernel@gmail.com> (raw)
In-Reply-To: <20180914175941.213950-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Add net_offload flag NET_OFF_FLAG_GRO_OFF. If set, a net_offload will
not be used for gro receive processing.

Also add sysctl helper proc_do_net_offload that toggles this flag and
register sysctls net.{core,ipv4,ipv6}.gro

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 drivers/net/vxlan.c        |  8 +++++
 include/linux/netdevice.h  |  7 ++++-
 net/core/dev.c             |  1 +
 net/core/sysctl_net_core.c | 60 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c |  7 +++++
 net/ipv6/ip6_offload.c     | 10 +++++--
 net/ipv6/sysctl_net_ipv6.c |  8 +++++
 7 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e5d236595206..8cb8e02c8ab6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -572,6 +572,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
 					 struct list_head *head,
 					 struct sk_buff *skb)
 {
+	const struct net_offload *ops;
 	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct vxlanhdr *vh, *vh2;
@@ -606,6 +607,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
 			goto out;
 	}
 
+	rcu_read_lock();
+	ops = net_gro_receive(dev_offloads, ETH_P_TEB);
+	rcu_read_unlock();
+	if (!ops)
+		goto out;
+
 	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
 
 	list_for_each_entry(p, head, list) {
@@ -621,6 +628,7 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
 	}
 
 	pp = call_gro_receive(eth_gro_receive, head, skb);
+
 	flush = 0;
 
 out:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b9e671887fc2..93e8c9ade593 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2377,6 +2377,10 @@ struct net_offload {
 
 /* This should be set for any extension header which is compatible with GSO. */
 #define INET6_PROTO_GSO_EXTHDR	0x1
+#define NET_OFF_FLAG_GRO_OFF	0x2
+
+int proc_do_net_offload(struct ctl_table *ctl, int write, void __user *buffer,
+			size_t *lenp, loff_t *ppos);
 
 /* often modified stats are per-CPU, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
@@ -3583,7 +3587,8 @@ net_gro_receive(struct net_offload __rcu **offs, u16 type)
 
 	off = rcu_dereference(offs[net_offload_from_type(type)]);
 	if (off && off->callbacks.gro_receive &&
-	    (!off->type || off->type == type))
+	    (!off->type || off->type == type) &&
+	    !(off->flags & NET_OFF_FLAG_GRO_OFF))
 		return off;
 	else
 		return NULL;
diff --git a/net/core/dev.c b/net/core/dev.c
index 20d9552afd38..0fd5273bc931 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -154,6 +154,7 @@
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
 static DEFINE_SPINLOCK(ptype_lock);
+DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 struct list_head ptype_all __read_mostly;	/* Taps */
 static struct list_head offload_base __read_mostly;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e38530..d2d72afdd9eb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -15,6 +15,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/bitmap.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
@@ -34,6 +35,58 @@ static int net_msg_warn;	/* Unused, but still a sysctl */
 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 
+extern spinlock_t offload_lock;
+
+#define NET_OFF_TBL_LEN	256
+
+int proc_do_net_offload(struct ctl_table *ctl, int write, void __user *buffer,
+			size_t *lenp, loff_t *ppos)
+{
+	unsigned long bitmap[NET_OFF_TBL_LEN / (sizeof(unsigned long) << 3)];
+	struct ctl_table tbl = { .maxlen = NET_OFF_TBL_LEN, .data = bitmap };
+	unsigned long flag = (unsigned long) ctl->extra2;
+	struct net_offload __rcu **offs = ctl->extra1;
+	struct net_offload *off;
+	int i, ret;
+
+	memset(bitmap, 0, sizeof(bitmap));
+
+	spin_lock(&offload_lock);
+
+	for (i = 0; i < tbl.maxlen; i++) {
+		off = rcu_dereference_protected(offs[i], lockdep_is_held(&offload_lock));
+		if (off && off->flags & flag) {
+			/* flag specific constraints */
+			if (flag == NET_OFF_FLAG_GRO_OFF) {
+				/* gro disable bit: only if can gro */
+				if (!off->callbacks.gro_receive &&
+				    !(off->flags & INET6_PROTO_GSO_EXTHDR))
+					continue;
+			}
+			set_bit(i, bitmap);
+		}
+	}
+
+	ret = proc_do_large_bitmap(&tbl, write, buffer, lenp, ppos);
+
+	if (write && !ret) {
+		for (i = 0; i < tbl.maxlen; i++) {
+			bool isset = test_bit(i, bitmap);
+
+			off = rcu_dereference_protected(offs[i], lockdep_is_held(&offload_lock));
+			if (!isset && (off->flags & flag))
+				off->flags &= ~flag;
+			else if (isset && !(off->flags & flag))
+				off->flags |= flag;
+		}
+	}
+
+	spin_unlock(&offload_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(proc_do_net_offload);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -435,6 +488,13 @@ static struct ctl_table net_core_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one
 	},
+	{
+		.procname	= "gro",
+		.mode		= 0644,
+		.proc_handler	= proc_do_net_offload,
+		.extra1		= dev_offloads,
+		.extra2		= (void *) NET_OFF_FLAG_GRO_OFF,
+	},
 #ifdef CONFIG_RPS
 	{
 		.procname	= "rps_sock_flow_entries",
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b92f422f2fa8..7a525039afb2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -477,6 +477,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "gro",
+		.mode		= 0644,
+		.proc_handler	= proc_do_net_offload,
+		.extra1		= inet_offloads,
+		.extra2		= (void *) NET_OFF_FLAG_GRO_OFF,
+	},
 #ifdef CONFIG_NETLABEL
 	{
 		.procname	= "cipso_cache_enable",
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 2d0ea3f453f2..6be5adbd2ce7 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -20,7 +20,7 @@
 
 #include "ip6_offload.h"
 
-static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
+static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto, bool is_gro)
 {
 	for (;;) {
 		struct ipv6_opt_hdr *opth;
@@ -30,6 +30,10 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 					  INET6_PROTO_GSO_EXTHDR))
 			break;
 
+		if (is_gro && !net_offload_has_flag(inet6_offloads, proto,
+						    NET_OFF_FLAG_GRO_OFF))
+			break;
+
 		if (unlikely(!pskb_may_pull(skb, 8)))
 			break;
 
@@ -76,7 +80,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	__skb_pull(skb, sizeof(*ipv6h));
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
-	proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+	proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr, false);
 
 	if (skb->encapsulation &&
 	    skb_shinfo(skb)->gso_type & (SKB_GSO_IPXIP4 | SKB_GSO_IPXIP6))
@@ -188,7 +192,7 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
 	if (!ops) {
 		__pskb_pull(skb, skb_gro_offset(skb));
 		skb_gro_frag0_invalidate(skb);
-		proto = ipv6_gso_pull_exthdrs(skb, proto);
+		proto = ipv6_gso_pull_exthdrs(skb, proto, true);
 		skb_gro_pull(skb, -skb_transport_offset(skb));
 		skb_reset_transport_header(skb);
 		__skb_push(skb, skb_gro_offset(skb));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index e15cd37024fd..83f14962a909 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -20,6 +20,7 @@
 #ifdef CONFIG_NETLABEL
 #include <net/calipso.h>
 #endif
+#include <net/protocol.h>
 
 static int zero;
 static int one = 1;
@@ -178,6 +179,13 @@ static struct ctl_table ipv6_rotable[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &one
 	},
+	{
+		.procname	= "gro",
+		.mode		= 0644,
+		.proc_handler	= proc_do_net_offload,
+		.extra1		= inet6_offloads,
+		.extra2		= (void *) NET_OFF_FLAG_GRO_OFF,
+	},
 #ifdef CONFIG_NETLABEL
 	{
 		.procname	= "calipso_cache_enable",
-- 
2.19.0.397.gdd90340f6a-goog

  parent reply	other threads:[~2018-09-14 23:15 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-14 17:59 [PATCH net-next RFC 0/8] udp and configurable gro Willem de Bruijn
2018-09-14 17:59 ` [PATCH net-next RFC 1/8] gro: convert device offloads to net_offload Willem de Bruijn
2018-09-14 17:59 ` [PATCH net-next RFC 2/8] gro: deduplicate gro_complete Willem de Bruijn
2018-09-14 17:59 ` [PATCH net-next RFC 3/8] gro: add net_gro_receive Willem de Bruijn
2018-09-14 17:59 ` [PATCH net-next RFC 4/8] ipv6: remove offload exception for hopopts Willem de Bruijn
2018-09-14 17:59 ` [PATCH net-next RFC 5/8] net: deconstify net_offload Willem de Bruijn
2018-09-15  3:30   ` Subash Abhinov Kasiviswanathan
2018-09-16 18:12     ` Willem de Bruijn
2018-09-14 17:59 ` Willem de Bruijn [this message]
2018-09-14 18:38   ` [PATCH net-next RFC 6/8] net: make gro configurable Stephen Hemminger
2018-09-14 22:50     ` Willem de Bruijn
2018-09-14 23:09       ` Willem de Bruijn
2018-09-14 23:14   ` Willem de Bruijn
2018-09-14 17:59 ` [PATCH net-next RFC 7/8] udp: gro behind static key Willem de Bruijn
2018-09-15  3:37   ` Subash Abhinov Kasiviswanathan
2018-09-16 18:10     ` Willem de Bruijn
2018-09-17  9:03   ` Steffen Klassert
2018-09-17 14:10     ` Willem de Bruijn
2018-09-17 10:24   ` Paolo Abeni
2018-09-17 14:12     ` Willem de Bruijn
2018-09-17 10:37   ` Steffen Klassert
2018-09-17 14:19     ` Willem de Bruijn
2018-09-18 10:59       ` Steffen Klassert
2018-09-14 17:59 ` [PATCH net-next RFC 8/8] udp: add gro Willem de Bruijn
2018-10-05 13:53 ` [PATCH net-next RFC 0/8] udp and configurable gro Paolo Abeni
2018-10-05 14:41   ` Willem de Bruijn
2018-10-05 15:30     ` Paolo Abeni
2018-10-05 15:45       ` Willem de Bruijn
2018-10-05 16:05         ` Paolo Abeni
2018-10-05 16:12           ` Willem de Bruijn
2018-10-08 11:27     ` Steffen Klassert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180914175941.213950-7-willemdebruijn.kernel@gmail.com \
    --to=willemdebruijn.kernel@gmail.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=steffen.klassert@secunet.com \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.