netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled.
@ 2012-10-17 15:46 Vlad Yasevich
  2012-10-17 16:13 ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Vlad Yasevich @ 2012-10-17 15:46 UTC (permalink / raw)
  To: netdev; +Cc: davem, Vlad Yasevich

Changes:
  - Fix spelling in description
  - Removed obsolete stub definition from ipv.h

When a guest using virtio is attempting to use IPv6 on a host where
the IPv6 module is administratively disabled, the tcp bulk transfer
rate drops down to 10kbps.  The problem is that when IPv6 is disabled,
none of the protocol handlers are registered.  When a guest attempts
to send an IPv6 packet with GSO (which is the default), this packet
is dropped by the host stack because there are no gso handlers
registered.  A retransmit is triggered for every GSO packet thus
reducing the transfer rate.

This patch attempts to solve this by enabling just enough code so GSO
is correctly processed.  However, I should point out that if IPv6 is
simply blacklisted or not built for the kernel, this problem will
still persist.

This problem also exists for AF_PACKET sockets, so it is not limited
to guests using virtio.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
---
 include/linux/ipv6.h  |    1 +
 net/ipv6/af_inet6.c   |   69 +++++++++++++++++++++++++++++++++++++++++++++---
 net/ipv6/ip6_input.c  |    5 +++
 net/ipv6/reassembly.c |   11 +++++--
 net/ipv6/tcp_ipv6.c   |    9 +++++-
 net/ipv6/udp.c        |    6 +++-
 6 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0b94e91..1d5e061 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -176,6 +176,7 @@ struct ipv6_devconf {
 };
 
 struct ipv6_params {
+	__s32 disable_ipv6_mod;
 	__s32 disable_ipv6;
 	__s32 autoconf;
 };
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a974247..1fd0d73 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -74,13 +74,13 @@ static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
 struct ipv6_params ipv6_defaults = {
+	.disable_ipv6_mod = 0,
 	.disable_ipv6 = 0,
 	.autoconf = 1,
 };
 
-static int disable_ipv6_mod;
 
-module_param_named(disable, disable_ipv6_mod, int, 0444);
+module_param_named(disable, ipv6_defaults.disable_ipv6_mod, int, 0444);
 MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional");
 
 module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444);
@@ -1048,6 +1048,53 @@ static struct pernet_operations inet6_net_ops = {
 	.exit = inet6_net_exit,
 };
 
+/* This function is called when IPv6 is administratively disabled.
+ * Its sole purpose is to set up enough of IPv6 that GSO/TSO will work
+ * for any outgoing IPv6 packets that may have been generated by
+ * guest or AF_PACKET sockets.
+ */
+static int inet6_init_disabled(void)
+{
+	int err = 0;
+
+	/* Init v6 extension headers. */
+	err = ipv6_exthdrs_init();
+	if (err)
+		goto out;
+
+	err = ipv6_frag_init();
+	if (err)
+		goto ipv6_frag_fail;
+
+	/* Init v6 transport protocols. UDP-light doesn't support GSO so
+	 * it is omitted
+	 */
+	err = udpv6_init();
+	if (err)
+		goto udpv6_fail;
+
+	err = tcpv6_init();
+	if (err)
+		goto tcpv6_fail;
+
+	err = ipv6_packet_init();
+	if (err)
+		goto ipv6_packet_fail;
+
+	return 0;
+
+ipv6_packet_fail:
+	tcpv6_exit();
+tcpv6_fail:
+	udpv6_exit();
+udpv6_fail:
+	ipv6_frag_exit();
+ipv6_frag_fail:
+	ipv6_exthdrs_exit();
+out:
+	return err;
+}
+	
 static int __init inet6_init(void)
 {
 	struct sk_buff *dummy_skb;
@@ -1060,8 +1107,9 @@ static int __init inet6_init(void)
 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
 		INIT_LIST_HEAD(r);
 
-	if (disable_ipv6_mod) {
+	if (ipv6_defaults.disable_ipv6_mod) {
 		pr_info("Loaded, but administratively disabled, reboot required to enable\n");
+		err = inet6_init_disabled();
 		goto out;
 	}
 
@@ -1238,11 +1286,22 @@ out_unregister_tcp_proto:
 }
 module_init(inet6_init);
 
+static void __exit inet6_exit_disabled(void)
+{
+	ipv6_packet_cleanup();
+	ipv6_frag_exit();
+	ipv6_exthdrs_exit();
+	udpv6_exit();
+	tcpv6_exit();
+}
+
 static void __exit inet6_exit(void)
 {
-	if (disable_ipv6_mod)
+	if (ipv6_defaults.disable_ipv6_mod) {
+		inet6_exit_disabled();
 		return;
-
+	}
+		
 	/* First of all disallow new sockets creation. */
 	sock_unregister(PF_INET6);
 	/* Disallow any further netlink messages */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a52d864..3e86ec3 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -74,6 +74,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		return NET_RX_DROP;
 	}
 
+	if (ipv6_defaults.disable_ipv6_mod) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
 	rcu_read_lock();
 
 	idev = __in6_dev_get(skb->dev);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index da8a4e3..71a0e61 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -705,6 +705,9 @@ int __init ipv6_frag_init(void)
 	if (ret)
 		goto out;
 
+	if (ipv6_defaults.disable_ipv6_mod)
+		goto out;
+
 	ret = ip6_frags_sysctl_register();
 	if (ret)
 		goto err_sysctl;
@@ -734,8 +737,10 @@ err_sysctl:
 
 void ipv6_frag_exit(void)
 {
-	inet_frags_fini(&ip6_frags);
-	ip6_frags_sysctl_unregister();
-	unregister_pernet_subsys(&ip6_frags_ops);
+	if (!ipv6_defaults.disable_ipv6_mod) {
+		inet_frags_fini(&ip6_frags);
+		ip6_frags_sysctl_unregister();
+		unregister_pernet_subsys(&ip6_frags_ops);
+	}
 	inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 49c8903..ebb297c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2109,6 +2109,9 @@ int __init tcpv6_init(void)
 	if (ret)
 		goto out;
 
+	if (ipv6_defaults.disable_ipv6_mod)
+		goto out;
+
 	/* register inet6 protocol */
 	ret = inet6_register_protosw(&tcpv6_protosw);
 	if (ret)
@@ -2129,7 +2132,9 @@ out_tcpv6_protosw:
 
 void tcpv6_exit(void)
 {
-	unregister_pernet_subsys(&tcpv6_net_ops);
-	inet6_unregister_protosw(&tcpv6_protosw);
+	if (!ipv6_defaults.disable_ipv6_mod) {
+		unregister_pernet_subsys(&tcpv6_net_ops);
+		inet6_unregister_protosw(&tcpv6_protosw);
+	}
 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
 }
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index fc99972..2e8dddb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1569,6 +1569,9 @@ int __init udpv6_init(void)
 	if (ret)
 		goto out;
 
+	if (ipv6_defaults.disable_ipv6_mod)
+		goto out;
+
 	ret = inet6_register_protosw(&udpv6_protosw);
 	if (ret)
 		goto out_udpv6_protocol;
@@ -1582,6 +1585,7 @@ out_udpv6_protocol:
 
 void udpv6_exit(void)
 {
-	inet6_unregister_protosw(&udpv6_protosw);
+	if (!ipv6_defaults.disable_ipv6_mod)
+		inet6_unregister_protosw(&udpv6_protosw);
 	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
 }
-- 
1.7.7.6

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled.
  2012-10-17 15:46 [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled Vlad Yasevich
@ 2012-10-17 16:13 ` Eric Dumazet
  2012-10-17 16:50   ` Ben Hutchings
  2012-10-17 19:11   ` Vlad Yasevich
  0 siblings, 2 replies; 6+ messages in thread
From: Eric Dumazet @ 2012-10-17 16:13 UTC (permalink / raw)
  To: Vlad Yasevich; +Cc: netdev, davem

On Wed, 2012-10-17 at 11:46 -0400, Vlad Yasevich wrote:

> This patch attempts to solve this by enabling just enough code so GSO
> is correctly processed.  However, I should point out that if IPv6 is
> simply blacklisted or not built for the kernel, this problem will
> still persist.

So I guess this should be done in a different way ?

We currently use a single structure (struct packet_type) to hold 
pointers to different methods. (The .func() field, and the gso/gro
stuff)

We probably need to split it in two parts, and make one part linked into
kernel, even if CONFIG_IPV6=n, so that GRO/GSO is fully IPv4/IPv6
functional.

By the way, do we really need a hash table for this ?
It seems we only have IPv4 (ETH_P_IP) and IPv6 (ETH_P_IPV6) to take care
of ?

This would remove some tests we currently have in GRO stack, and some
RCU stuff as well.
(GRO is slower if we have many af_packet sockets)

list_for_each_entry_rcu(ptype, head, list) {
    if (ptype->type != type || ptype->dev || !ptype->gro_receive)
        continue;
...

Adding a hook/test in ipv6_rcv() is ugly.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled.
  2012-10-17 16:13 ` Eric Dumazet
@ 2012-10-17 16:50   ` Ben Hutchings
  2012-10-17 19:03     ` Vlad Yasevich
  2012-10-17 19:11   ` Vlad Yasevich
  1 sibling, 1 reply; 6+ messages in thread
From: Ben Hutchings @ 2012-10-17 16:50 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Vlad Yasevich, netdev, davem

On Wed, 2012-10-17 at 18:13 +0200, Eric Dumazet wrote:
> On Wed, 2012-10-17 at 11:46 -0400, Vlad Yasevich wrote:
> 
> > This patch attempts to solve this by enabling just enough code so GSO
> > is correctly processed.  However, I should point out that if IPv6 is
> > simply blacklisted or not built for the kernel, this problem will
> > still persist.
> 
> So I guess this should be done in a different way ?
> 
> We currently use a single structure (struct packet_type) to hold 
> pointers to different methods. (The .func() field, and the gso/gro
> stuff)
> 
> We probably need to split it in two parts, and make one part linked into
> kernel, even if CONFIG_IPV6=n, so that GRO/GSO is fully IPv4/IPv6
> functional.
[...]

Either that or make sure that we don't advertise IPv6 GSO when IPv6 is
disabled.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled.
  2012-10-17 16:50   ` Ben Hutchings
@ 2012-10-17 19:03     ` Vlad Yasevich
  0 siblings, 0 replies; 6+ messages in thread
From: Vlad Yasevich @ 2012-10-17 19:03 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Eric Dumazet, netdev, davem

On 10/17/2012 12:50 PM, Ben Hutchings wrote:
> On Wed, 2012-10-17 at 18:13 +0200, Eric Dumazet wrote:
>> On Wed, 2012-10-17 at 11:46 -0400, Vlad Yasevich wrote:
>>
>>> This patch attempts to solve this by enabling just enough code so GSO
>>> is correctly processed.  However, I should point out that if IPv6 is
>>> simply blacklisted or not built for the kernel, this problem will
>>> still persist.
>>
>> So I guess this should be done in a different way ?
>>
>> We currently use a single structure (struct packet_type) to hold
>> pointers to different methods. (The .func() field, and the gso/gro
>> stuff)
>>
>> We probably need to split it in two parts, and make one part linked into
>> kernel, even if CONFIG_IPV6=n, so that GRO/GSO is fully IPv4/IPv6
>> functional.
> [...]
>
> Either that or make sure that we don't advertise IPv6 GSO when IPv6 is
> disabled.
>
> Ben.
>

this becomes a problem when migration or save/restore is used.  since 
offload features may be different between systesm, we may not be able to 
migrate.

-vlad

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled.
  2012-10-17 16:13 ` Eric Dumazet
  2012-10-17 16:50   ` Ben Hutchings
@ 2012-10-17 19:11   ` Vlad Yasevich
  2012-10-17 19:33     ` Eric Dumazet
  1 sibling, 1 reply; 6+ messages in thread
From: Vlad Yasevich @ 2012-10-17 19:11 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem

On 10/17/2012 12:13 PM, Eric Dumazet wrote:
> On Wed, 2012-10-17 at 11:46 -0400, Vlad Yasevich wrote:
>
>> This patch attempts to solve this by enabling just enough code so GSO
>> is correctly processed.  However, I should point out that if IPv6 is
>> simply blacklisted or not built for the kernel, this problem will
>> still persist.
>
> So I guess this should be done in a different way ?
>
> We currently use a single structure (struct packet_type) to hold
> pointers to different methods. (The .func() field, and the gso/gro
> stuff)
>
> We probably need to split it in two parts, and make one part linked into
> kernel, even if CONFIG_IPV6=n, so that GRO/GSO is fully IPv4/IPv6
> functional.

The thing about this approach is that if there are any other protocols 
that could have to provide their own segmentation functionality, such 
functionality would always have to be part of the kernel.  I wasn't sure 
how much I liked that.

>
> By the way, do we really need a hash table for this ?
> It seems we only have IPv4 (ETH_P_IP) and IPv6 (ETH_P_IPV6) to take care
> of ?

Which hash are you talking about?  I didn't add any hashes.

>
> This would remove some tests we currently have in GRO stack, and some
> RCU stuff as well.
> (GRO is slower if we have many af_packet sockets)
>
> list_for_each_entry_rcu(ptype, head, list) {
>      if (ptype->type != type || ptype->dev || !ptype->gro_receive)
>          continue;
> ...
>
> Adding a hook/test in ipv6_rcv() is ugly.
>

There already is one for device.  I original had a stub ipv6_rcv that 
just dropped the packet.  I could resurrect that if that's better...

-vlad

>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled.
  2012-10-17 19:11   ` Vlad Yasevich
@ 2012-10-17 19:33     ` Eric Dumazet
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Dumazet @ 2012-10-17 19:33 UTC (permalink / raw)
  To: vyasevic; +Cc: netdev, davem

On Wed, 2012-10-17 at 15:11 -0400, Vlad Yasevich wrote:
> On 10/17/2012 12:13 PM, Eric Dumazet wrote:
> > On Wed, 2012-10-17 at 11:46 -0400, Vlad Yasevich wrote:
> >
> >> This patch attempts to solve this by enabling just enough code so GSO
> >> is correctly processed.  However, I should point out that if IPv6 is
> >> simply blacklisted or not built for the kernel, this problem will
> >> still persist.
> >
> > So I guess this should be done in a different way ?
> >
> > We currently use a single structure (struct packet_type) to hold
> > pointers to different methods. (The .func() field, and the gso/gro
> > stuff)
> >
> > We probably need to split it in two parts, and make one part linked into
> > kernel, even if CONFIG_IPV6=n, so that GRO/GSO is fully IPv4/IPv6
> > functional.
> 
> The thing about this approach is that if there are any other protocols 
> that could have to provide their own segmentation functionality, such 
> functionality would always have to be part of the kernel.  I wasn't sure 
> how much I liked that.

Well, an hypervisor probably has to handle IPv6, at least to a certain
extent.

Make this part a module of its own, or statically linked into vmlinux,
instead of adding some stubs in IPv6 'module'


> 
> >
> > By the way, do we really need a hash table for this ?
> > It seems we only have IPv4 (ETH_P_IP) and IPv6 (ETH_P_IPV6) to take care
> > of ?
> 
> Which hash are you talking about?  I didn't add any hashes.

I suspect you didnt really understood what I said then.

packet_type structures are hashed into ptype_base[]

struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];

Now let say we split the structure into 2 parts.

ptype_base[] would be used for pre GRO/GSO stuff (packet handlers)

So we would need to add a second hash for the GRO/GSO stuff.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-10-17 19:34 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-17 15:46 [PATCHv3] ipv6: Enable enough of the code to handle GSO when disabled Vlad Yasevich
2012-10-17 16:13 ` Eric Dumazet
2012-10-17 16:50   ` Ben Hutchings
2012-10-17 19:03     ` Vlad Yasevich
2012-10-17 19:11   ` Vlad Yasevich
2012-10-17 19:33     ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).