Netdev List
 help / color / mirror / Atom feed
* [PATCHv6] usbnet: Resubmit interrupt URB if device is open
From: Paul Stewart @ 2011-04-19 17:44 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: stern-nwvwT67g6+6dFdvTe/nMLpVzexx5G7lz,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, oliver-GvhC2dPhHPQdnm+yROfE0A,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q, bhutchings-s/n/eUQHGBpZroRs9YW3xA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <Pine.LNX.4.44L0.1104221129530.1877-100000-IYeN2dnnYyZXsRXLowluHWD2FQJk+8+b@public.gmane.org>

Resubmit interrupt URB if device is open.  Use a flag set in
usbnet_open() to determine this state.  Also kill and free
interrupt URB in usbnet_disconnect().

Signed-off-by: Paul Stewart <pstew-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
---
 drivers/net/usb/usbnet.c   |   11 +++++++++++
 include/linux/usb/usbnet.h |    1 +
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 02d25c7..b4572c3 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -643,6 +643,7 @@ static int usbnet_open (struct net_device *net)
 		}
 	}
 
+	set_bit(EVENT_DEV_OPEN, &dev->flags);
 	netif_start_queue (net);
 	if (netif_msg_ifup (dev)) {
 		char	*framing;
@@ -1105,6 +1106,9 @@ void usbnet_disconnect (struct usb_interface *intf)
 	if (dev->driver_info->unbind)
 		dev->driver_info->unbind (dev, intf);
 
+	usb_kill_urb(dev->interrupt);
+	usb_free_urb(dev->interrupt);
+
 	free_netdev(net);
 	usb_put_dev (xdev);
 }
@@ -1285,6 +1289,9 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message)
 		 * wake the device
 		 */
 		netif_device_attach (dev->net);
+
+		/* Stop interrupt URBs */
+		usb_kill_urb(dev->interrupt);
 	}
 	return 0;
 }
@@ -1297,6 +1304,10 @@ int usbnet_resume (struct usb_interface *intf)
 	if (!--dev->suspend_count)
 		tasklet_schedule (&dev->bh);
 
+		/* resume interrupt URBs */
+		if (test_bit(EVENT_DEV_OPEN, &dev->flags))
+			usb_submit_urb(dev->interrupt, GFP_NOIO);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usbnet_resume);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index ba09fe8..d148cca 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -64,6 +64,7 @@ struct usbnet {
 #		define EVENT_RX_MEMORY	2
 #		define EVENT_STS_SPLIT	3
 #		define EVENT_LINK_RESET	4
+#		define EVENT_DEV_OPEN	5
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCHv5] usbnet: Resubmit interrupt URB once if halted
From: Paul Stewart @ 2011-04-19 17:44 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: stern-nwvwT67g6+6dFdvTe/nMLpVzexx5G7lz,
	linux-usb-u79uwXL29TY76Z2rM5mHXA, oliver-GvhC2dPhHPQdnm+yROfE0A,
	davemloft.net-hpIqsD4AKlfQT0dZR+AlfA,
	bhutchings-s/n/eUQHGBpZroRs9YW3xA,
	linux-usb-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <Pine.LNX.4.44L0.1104211436540.1939-100000-IYeN2dnnYyZXsRXLowluHWD2FQJk+8+b@public.gmane.org>

Resubmit interrupt URB if device is open.  Use a flag set in
usbnet_open() to determine this state.  Also kill and free
interrupt URB in usbnet_disconnect().

Signed-off-by: Paul Stewart <pstew-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
---
 drivers/net/usb/usbnet.c   |   14 ++++++++++++++
 include/linux/usb/usbnet.h |    1 +
 2 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 02d25c7..c7cf4af 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -643,6 +643,7 @@ static int usbnet_open (struct net_device *net)
 		}
 	}
 
+	set_bit(EVENT_DEV_OPEN, &dev->flags);
 	netif_start_queue (net);
 	if (netif_msg_ifup (dev)) {
 		char	*framing;
@@ -1105,6 +1106,11 @@ void usbnet_disconnect (struct usb_interface *intf)
 	if (dev->driver_info->unbind)
 		dev->driver_info->unbind (dev, intf);
 
+	if (dev->interrupt) {
+		usb_kill_urb(dev->interrupt);
+		usb_free_urb(dev->interrupt);
+	}
+
 	free_netdev(net);
 	usb_put_dev (xdev);
 }
@@ -1285,6 +1291,10 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message)
 		 * wake the device
 		 */
 		netif_device_attach (dev->net);
+
+		/* Stop interrupt URBs */
+		if (dev->interrupt)
+			usb_kill_urb(dev->interrupt);
 	}
 	return 0;
 }
@@ -1297,6 +1307,10 @@ int usbnet_resume (struct usb_interface *intf)
 	if (!--dev->suspend_count)
 		tasklet_schedule (&dev->bh);
 
+		/* resume interrupt URBs */
+		if (dev->interrupt && test_bit(EVENT_DEV_OPEN, &dev->flags))
+			usb_submit_urb(dev->interrupt, GFP_NOIO);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usbnet_resume);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index ba09fe8..d148cca 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -64,6 +64,7 @@ struct usbnet {
 #		define EVENT_RX_MEMORY	2
 #		define EVENT_STS_SPLIT	3
 #		define EVENT_LINK_RESET	4
+#		define EVENT_DEV_OPEN	5
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCHv4] usbnet: Resubmit interrupt URB once if halted
From: Paul Stewart @ 2011-04-19 17:44 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	stern-nwvwT67g6+6dFdvTe/nMLpVzexx5G7lz,
	greg-U8xfFu+wG4EAvxtiuMwx3w
In-Reply-To: <BANLkTi=N3T-V8VNOcbKu6COKvbEHqMoAog-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Set a flag if the interrupt URB completes with ENOENT as this
occurs legitimately during system suspend.  When the
usbnet_resume is called, test this flag and try once to resubmit
the interrupt URB.

This version of the patch moves the urb submit directly into
usbnet_resume.  Is it okay to submit a GFP_KERNEL urb from
usbnet_resume()?

Signed-off-by: Paul Stewart <pstew-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
---
 drivers/net/usb/usbnet.c   |   13 ++++++++++++-
 include/linux/usb/usbnet.h |    1 +
 2 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 02d25c7..3651a48 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -482,6 +482,7 @@ static void intr_complete (struct urb *urb)
 	case -ESHUTDOWN:	/* hardware gone */
 		if (netif_msg_ifdown (dev))
 			devdbg (dev, "intr shutdown, code %d", status);
+		set_bit(EVENT_INTR_HALT, &dev->flags);
 		return;
 
 	/* NOTE:  not throttling like RX/TX, since this endpoint
@@ -1294,9 +1295,19 @@ int usbnet_resume (struct usb_interface *intf)
 {
 	struct usbnet		*dev = usb_get_intfdata(intf);
 
-	if (!--dev->suspend_count)
+	if (!--dev->suspend_count) {
 		tasklet_schedule (&dev->bh);
 
+		/* resubmit interrupt URB if it was halted by suspend */
+		if (dev->interrupt && netif_running(dev->net) &&
+		    netif_device_present(dev->net) &&
+		    test_bit(EVENT_INTR_HALT, &dev->flags)) {
+			clear_bit(EVENT_INTR_HALT, &dev->flags);
+			usb_submit_urb(dev->interrupt, GFP_KERNEL);
+		}
+	}
+}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usbnet_resume);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index ba09fe8..6c4b5f8 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -64,6 +64,7 @@ struct usbnet {
 #		define EVENT_RX_MEMORY	2
 #		define EVENT_STS_SPLIT	3
 #		define EVENT_LINK_RESET	4
+#		define EVENT_INTR_HALT	5
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCHv3] usbnet: Resubmit interrupt URB once if halted
From: Paul Stewart @ 2011-04-19 17:44 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: linux-usb-u79uwXL29TY76Z2rM5mHXA, davem-fT/PcQaiUtIeIZ0/mPfg9Q,
	stern-nwvwT67g6+6dFdvTe/nMLpVzexx5G7lz,
	greg-U8xfFu+wG4EAvxtiuMwx3w
In-Reply-To: <20110420.012431.104074243.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

Set a flag if the interrupt URB completes with ENOENT as this
occurs legitimately during system suspend.  When the usbnet_bh
is called after resume, test this flag and try once to resubmit
the interrupt URB.

Signed-off-by: Paul Stewart <pstew-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
---
 drivers/net/usb/usbnet.c   |    8 ++++++++
 include/linux/usb/usbnet.h |    1 +
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 02d25c7..9ac4bae 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -482,6 +482,7 @@ static void intr_complete (struct urb *urb)
 	case -ESHUTDOWN:	/* hardware gone */
 		if (netif_msg_ifdown (dev))
 			devdbg (dev, "intr shutdown, code %d", status);
+		set_bit(EVENT_INTR_HALT, &dev->flags);
 		return;
 
 	/* NOTE:  not throttling like RX/TX, since this endpoint
@@ -1065,6 +1066,13 @@ static void usbnet_bh (unsigned long param)
 		if (dev->txq.qlen < TX_QLEN (dev))
 			netif_wake_queue (dev->net);
 	}
+
+	// try once to resume interrupt URBs if they were halted before
+	if (dev->interrupt && netif_running(dev->net) &&
+	    test_bit(EVENT_INTR_HALT, &dev->flags)) {
+		clear_bit(EVENT_INTR_HALT, &dev->flags);
+		usb_submit_urb(dev->interrupt, GFP_KERNEL);
+	}
 }
 
 
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index ba09fe8..6c4b5f8 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -64,6 +64,7 @@ struct usbnet {
 #		define EVENT_RX_MEMORY	2
 #		define EVENT_STS_SPLIT	3
 #		define EVENT_LINK_RESET	4
+#		define EVENT_INTR_HALT	5
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: DSCP values in TCP handshake
From: Mikael Abrahamsson @ 2011-04-19 17:38 UTC (permalink / raw)
  To: Matt Mathis; +Cc: Stephen Hemminger, Joe Buehler, Eric Dumazet, netdev
In-Reply-To: <BANLkTi=fDdoF=kSfY=9ER4iR3M27L=CLXw@mail.gmail.com>

On Tue, 19 Apr 2011, Matt Mathis wrote:

> Please do.  This missing spec is one of the things that makes Less than 
> Best Effort (aka scavenger service) unusable.  Only the client knows if 
> they are fetching data in the background.  The server doesn't care.

I emailed with Fred Baker and the below text seems to be where it ended 
regarding reflexive marking of packets. If someone wants this changed, 
they need to bring it up with the IETF.

<http://www.ietf.org/proceedings/54/220.htm>

"Fred Baker presented draft-ietf-ieprep-packet-marking-policy-00.txt

Van Jacobsen presented a concern for Kathy Nichols, Diffserv co-chair. The 
concern was that the DSCP is intended to identify traffic supported by a 
service, as opposed to traffic of a certain application type. He also 
worried that a specific BCP indicating the facilities used to support such 
services might be misused by regulators to mandate ISP services. 
Specifically, he wanted a statement between the requirements document and 
a document suggesting a specific configuration identifying the fact that 
inter-service-provider signaling as specified in this draft is certainly 
specified but is not commonly implemented.

Christian Huitema and another speaker also commented that while a cookbook 
such as this was an interesting useful document, it didn't seem to 
directly stem from emergency as a context.

Rei Atarashi presented draft-ietf-ieprep-reflexive-dscp-00.txt

Van basically felt that the notion of a default response from the host was 
inappropriate; this is something every network should provide a policy 
for. "

-- 
Mikael Abrahamsson    email: swmike@swm.pp.se

^ permalink raw reply

* Re: [PATCH] usbnet: Resubmit interrupt URB more often
From: Ben Hutchings @ 2011-04-19 17:11 UTC (permalink / raw)
  To: Paul Stewart; +Cc: netdev, davem
In-Reply-To: <20110419164703.5A6A82052B@glenhelen.mtv.corp.google.com>

On Tue, 2011-04-19 at 09:35 -0700, Paul Stewart wrote:
[...]
> index 4342bd9..e4dbb29 100644
> --- a/drivers/usb/core/urb.c
> +++ b/drivers/usb/core/urb.c
> @@ -295,7 +295,9 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
>  	struct usb_host_endpoint	*ep;
>  	int				is_out;
>  
> -	if (!urb || urb->hcpriv || !urb->complete)
> +	if (urb->hcpriv)
> +		return -EALREADY;
> +	if (!urb || !urb->complete)
>  		return -EINVAL;
[...]

The test for !urb must come before the test on urb->hcpriv.

Also, the kernel coding style does not allow '//' comments.  Please use
scripts/checkpatch.pl to check for style and other common errors.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [Bugme-new] [Bug 33502] New: Caught 64-bit read from uninitialized memory in __alloc_skb
From: Christoph Lameter @ 2011-04-19 17:10 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andrew Morton, netdev, bugzilla-daemon, bugme-daemon,
	casteyde.christian, Vegard Nossum, Pekka Enberg
In-Reply-To: <1303183217.4152.49.camel@edumazet-laptop>

On Tue, 19 Apr 2011, Eric Dumazet wrote:

>  	}
>  }
>
> -#ifdef CONFIG_CMPXCHG_LOCAL
> +#if defined(CONFIG_CMPXCHG_LOCAL) && \
> +	!defined(CONFIG_KMEMCHECK) && !defined(DEBUG_PAGEALLOC)
> +#define SLUB_USE_CMPXCHG_DOUBLE
> +#endif
> +
> +#ifdef SLUB_USE_CMPXCHG_DOUBLE
>  #ifdef CONFIG_PREEMPT
>  /*

Ugg.. Isnt there some way to indicate to kmemcheck that a speculative
access is occurring?

^ permalink raw reply

* Re: [Bugme-new] [Bug 33502] New: Caught 64-bit read from uninitialized memory in __alloc_skb
From: Christoph Lameter @ 2011-04-19 17:09 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Andrew Morton, netdev, bugzilla-daemon, bugme-daemon,
	casteyde.christian, Vegard Nossum, Pekka Enberg
In-Reply-To: <1303182557.4152.48.camel@edumazet-laptop>

On Tue, 19 Apr 2011, Eric Dumazet wrote:

> get_freepointer(s, object) can access to freed memory and kmemcheck
> triggers the fault, while this_cpu_cmpxchg_double() would presumably
> detect a change of tid and would not perform the freelist/tid change.

Sounds right. The new lockless patchset for slub that uses a locked
cmpxchg16b will make this behavior even more common since it will do more
speculative accesses.


^ permalink raw reply

* Re: [PATCH] bonding: 802.3ad - fix agg_device_up
From: Jay Vosburgh @ 2011-04-19 16:55 UTC (permalink / raw)
  To: Jiri Bohac; +Cc: netdev, Andy Gospodarek, Stephen Hemminger
In-Reply-To: <20110419120955.GA14302@midget.suse.cz>

Jiri Bohac <jbohac@suse.cz> wrote:

>The slave member of struct aggregator does not necessarily point
>to a slave which is part of the aggregator. It points to the
>slave structure containing the aggregator structure, while
>completely different slaves (or no slaves at all) may be part of
>the aggregator.
>
>The agg_device_up() function wrongly uses agg->slave to find the state of the
>aggregator.  Use agg->lag_ports->slave instead. The bug has been
>introduced by commit 4cd6fe1c6483cde93e2ec91f58b7af9c9eea51ad.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>

	One additional note: port->slave can be NULL when the slave is
transitioning in or out of the bond, but not when the port is part of an
aggregator, so this usage should be safe.

	-J

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>


>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 494bf96..31912f1 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -1482,8 +1482,11 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
>
> static int agg_device_up(const struct aggregator *agg)
> {
>-	return (netif_running(agg->slave->dev) &&
>-		netif_carrier_ok(agg->slave->dev));
>+	struct port *port = agg->lag_ports;
>+	if (!port)
>+		return 0;
>+	return (netif_running(port->slave->dev) &&
>+		netif_carrier_ok(port->slave->dev));
> }
>
> /**
>-- 
>Jiri Bohac <jbohac@suse.cz>
>SUSE Labs, SUSE CZ
>

^ permalink raw reply

* Re: A patch you wrote some time ago (aka: "[patch 41/54] ICMP: Fix icmp_errors_use_inbound_ifaddr sysctl")
From: Chris Wright @ 2011-04-19 16:54 UTC (permalink / raw)
  To: Alexander Hoogerhuis; +Cc: Chris Wright, linux-kernel, netdev, kaber
In-Reply-To: <4DADBBBC.6020803@boxed.no>

* Alexander Hoogerhuis (alexh@boxed.no) wrote:
> I hope you (or anyone else) can spare half a minute to have a quick
> look at a patch you wrote a few years ago:
> 
> >http://lkml.org/lkml/2007/6/8/124

I actually did not write that patch, rather added it to the -stable tree.
Patrick (CCd) wrote it.

> I've been tracking down a case of ICMP Redirects originating from
> the wrong IPs, and as far I can tell, you patch is the last to touch
> this code (net/ipv4/icmp.c:507):
> 
> > if (rt->fl.iif && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
> >        dev = dev_get_by_index_rcu(net, rt->fl.iif);
> >
> >if (dev)
> >        saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
> >else
> >        saddr = 0;
> 
> In a plain world this would work, but I have come across a case that
> seems to be not handled by this.
> 
> I have two machines set up with VRRP to act as routers out of a
> subnet, and they have IPs x.x.x.13/28 and x.x.x.14/28, with VRRP
> holding on to x.x.x.1/28.
> 
> If a node in x.x.x.0/28 needs to get a ICMP redirect from x.x.x.1/28
> (to reach another subnet behind a  different gateway in x.x.x.0/28),
> then the source IP on the ICMP redirect is chosen as the primary IP
> on the interface that the packet arrived at.
> 
> This is as far as I can tell from RFCs and colleagues fine for most
> things after you're routed one hop or more, but in the case of ICMP
> redirect it means that the redirect is not adhered to by the client,
> as it will get the reidrect from x.x.x.13/28, not x.x.x.1/28.
> 
> inet_select_addr seems to be explicitly looking for the primary IP
> in all cases (./net/ipv4/devinet.c:875), and in the case of sending
> ICMP recdirect when in an VRRP setup, that would not work well. It
> should try to match the actual inbound IP.
> 
> Judging by the comments from your patch I am not sure if the source
> IP that triggers the ICMP redirect is available at this point any
> more.
> 
> The way I understand it should pick adress is this way:
> 
> >  if (rt->fl.iif && net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
> >         dev = dev_get_by_index_rcu(net, rt->fl.iif);
> >
> > if (dev == fl.iif)
> >         saddr = iph->daddr;
> >
> > if (dev != fl.iif)
> >         saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
> > else
> >         saddr = 0;
> 
> I.e. if we are replying to something that is from a local network
> segment, then iph->daddr would be a more correct source. My C skill
> is prehistoric so what I've written likely is far from correct, but
> the general gist is that there is a special case for replying to
> something local.
> 
> As it stands today (I'm on 2.6.35.11), ICMP redirects when using
> VRRP are broken, and I'm hoping I may have found out why. :)
> 
> mvh,
> A
> -- 
> Alexander Hoogerhuis | http://no.linkedin.com/in/alexh
> Boxed Solutions AS   | +47 908 21 485 - alexh@boxed.no
> "Given enough eyeballs, all bugs are shallow." -Eric S. Raymond

^ permalink raw reply

* [PATCH] usbnet: Resubmit interrupt URB more often
From: Paul Stewart @ 2011-04-19 16:35 UTC (permalink / raw)
  To: netdev; +Cc: davem

I previously sent a patch to resubmit the interrupt URB when
coming out of suspend.  I haven't seen much activity on the
list about it, and thought I'd send a slight variant of this
change.  This one unconditionally resubmits the interrupt urb
in usbnet_bh.  The consequences for resubmitting the URB often
are not large.  In most HCI cases this just means usb_submit_urb
returns immediately and leaves the previous request outstanding.

Doing things this way allows us to avoid keeping track of the
URB transmit status, which may change silently over suspend-
resume transitions and is not tracked in any way currently by
usbnet.

I've designed this change on two types of systems: the first
class of system leaves USB devices powered during suspend.
This might silently cause the interrupt URB to disappear (or at
least not be resubmitted in intr_complete).  Resubmission after
system resume will prevent this from causing problems.

The second class of device are those which shut down the device
during suspend.  During a suspend-resume cycle, the device is
re-enumerated at system resume, and for whatever reason
usbnet_resume may be called on the device during the call-tree
from usbnet_open-> usb_autopm_get_interface, which may cause a
race where the first change above may cause the bh to submit the
interrupt urb before usbnet_open() does.  As a result, I've added
an EALREADY check and a fix to urb.c to send one.

Signed-off-by: Paul Stewart <pstew@chromium.org>
Cc: David S. Miller <davem@davemloft.net>

---
 drivers/net/usb/usbnet.c |   12 +++++++++++-
 drivers/usb/core/urb.c   |    5 +++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 02d25c7..3b3c169 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -636,7 +636,10 @@ static int usbnet_open (struct net_device *net)
 	/* start any status interrupt transfer */
 	if (dev->interrupt) {
 		retval = usb_submit_urb (dev->interrupt, GFP_KERNEL);
-		if (retval < 0) {
+		if (retval == -EALREADY) {
+			// It is not an error if interrupt urb is alredy active
+			retval = 0;
+		} else if (retval < 0) {
 			if (netif_msg_ifup (dev))
 				deverr (dev, "intr submit %d", retval);
 			goto done;
@@ -1065,6 +1068,10 @@ static void usbnet_bh (unsigned long param)
 		if (dev->txq.qlen < TX_QLEN (dev))
 			netif_wake_queue (dev->net);
 	}
+
+	// Re-submit interrupt urb (doesn't hurt to retry)
+	if (netif_running (dev->net))
+		usb_submit_urb (dev->interrupt, GFP_KERNEL);
 }
 
 
@@ -1285,6 +1292,9 @@ int usbnet_suspend (struct usb_interface *intf, pm_message_t message)
 		 * wake the device
 		 */
 		netif_device_attach (dev->net);
+		// Stop interrupt urbs while in suspend
+		if (dev->interrupt)
+			usb_kill_urb(dev->interrupt);
 	}
 	return 0;
 }
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 4342bd9..e4dbb29 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -295,7 +295,9 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	struct usb_host_endpoint	*ep;
 	int				is_out;
 
-	if (!urb || urb->hcpriv || !urb->complete)
+	if (urb->hcpriv)
+		return -EALREADY;
+	if (!urb || !urb->complete)
 		return -EINVAL;
 	dev = urb->dev;
 	if ((!dev) || (dev->state < USB_STATE_DEFAULT))
@@ -807,4 +809,3 @@ int usb_anchor_empty(struct usb_anchor *anchor)
 }
 
 EXPORT_SYMBOL_GPL(usb_anchor_empty);
-
-- 
1.7.3.1


^ permalink raw reply related

* [PATCH] net: tun: convert to hw_features
From: Michał Mirosław @ 2011-04-19 16:13 UTC (permalink / raw)
  To: netdev; +Cc: Rusty Russell

This changes offload setting behaviour to what I think is correct:
 - offloads set via ethtool mean what admin wants to use (by default
   he wants 'em all)
 - offloads set via ioctl() mean what userspace is expecting to get
   (this limits which admin wishes are granted)
 - TUN_NOCHECKSUM is ignored, as it might cause broken packets when
   forwarded (ip_summed == CHECKSUM_UNNECESSARY means that checksum
   was verified, not that it can be ignored)

If TUN_NOCHECKSUM is implemented, it should set skb->csum_* and
skb->ip_summed (= CHECKSUM_PARTIAL) for known protocols and let others
be verified by kernel when necessary.

TUN_NOCHECKSUM handling was introduced by commit
f43798c27684ab925adde7d8acc34c78c6e50df8:

    tun: Allow GSO using virtio_net_hdr
    
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/tun.c |   63 +++++++++++++++++++++--------------------------------
 1 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f5e9ac0..ade3cf9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -123,6 +123,9 @@ struct tun_struct {
 	gid_t			group;
 
 	struct net_device	*dev;
+	u32			set_features;
+#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
+			  NETIF_F_TSO6|NETIF_F_UFO)
 	struct fasync_struct	*fasync;
 
 	struct tap_filter       txflt;
@@ -451,12 +454,20 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static u32 tun_net_fix_features(struct net_device *dev, u32 features)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+
+	return (features & tun->set_features) | (features & ~TUN_USER_FEATURES);
+}
+
 static const struct net_device_ops tun_netdev_ops = {
 	.ndo_uninit		= tun_net_uninit,
 	.ndo_open		= tun_net_open,
 	.ndo_stop		= tun_net_close,
 	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_change_mtu		= tun_net_change_mtu,
+	.ndo_fix_features	= tun_net_fix_features,
 };
 
 static const struct net_device_ops tap_netdev_ops = {
@@ -465,6 +476,7 @@ static const struct net_device_ops tap_netdev_ops = {
 	.ndo_stop		= tun_net_close,
 	.ndo_start_xmit		= tun_net_xmit,
 	.ndo_change_mtu		= tun_net_change_mtu,
+	.ndo_fix_features	= tun_net_fix_features,
 	.ndo_set_multicast_list	= tun_net_mclist,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -628,8 +640,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
 			kfree_skb(skb);
 			return -EINVAL;
 		}
-	} else if (tun->flags & TUN_NOCHECKSUM)
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
 
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
@@ -1094,6 +1105,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 				goto err_free_sk;
 		}
 
+		dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
+			TUN_USER_FEATURES;
+		dev->features = dev->hw_features;
+
 		err = register_netdevice(tun->dev);
 		if (err < 0)
 			goto err_free_sk;
@@ -1158,18 +1173,12 @@ static int tun_get_iff(struct net *net, struct tun_struct *tun,
 
 /* This is like a cut-down ethtool ops, except done via tun fd so no
  * privs required. */
-static int set_offload(struct net_device *dev, unsigned long arg)
+static int set_offload(struct tun_struct *tun, unsigned long arg)
 {
-	u32 old_features, features;
-
-	old_features = dev->features;
-	/* Unset features, set them as we chew on the arg. */
-	features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
-				    |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6
-				    |NETIF_F_UFO));
+	u32 features = 0;
 
 	if (arg & TUN_F_CSUM) {
-		features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
+		features |= NETIF_F_HW_CSUM;
 		arg &= ~TUN_F_CSUM;
 
 		if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
@@ -1195,9 +1204,8 @@ static int set_offload(struct net_device *dev, unsigned long arg)
 	if (arg)
 		return -EINVAL;
 
-	dev->features = features;
-	if (old_features != dev->features)
-		netdev_features_change(dev);
+	tun->set_features = features;
+	netdev_update_features(tun->dev);
 
 	return 0;
 }
@@ -1262,12 +1270,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 
 	case TUNSETNOCSUM:
 		/* Disable/Enable checksum */
-		if (arg)
-			tun->flags |= TUN_NOCHECKSUM;
-		else
-			tun->flags &= ~TUN_NOCHECKSUM;
 
-		tun_debug(KERN_INFO, tun, "checksum %s\n",
+		/* [unimplemented] */
+		tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n",
 			  arg ? "disabled" : "enabled");
 		break;
 
@@ -1316,7 +1321,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		break;
 #endif
 	case TUNSETOFFLOAD:
-		ret = set_offload(tun->dev, arg);
+		ret = set_offload(tun, arg);
 		break;
 
 	case TUNSETTXFILTER:
@@ -1595,30 +1600,12 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
 #endif
 }
 
-static u32 tun_get_rx_csum(struct net_device *dev)
-{
-	struct tun_struct *tun = netdev_priv(dev);
-	return (tun->flags & TUN_NOCHECKSUM) == 0;
-}
-
-static int tun_set_rx_csum(struct net_device *dev, u32 data)
-{
-	struct tun_struct *tun = netdev_priv(dev);
-	if (data)
-		tun->flags &= ~TUN_NOCHECKSUM;
-	else
-		tun->flags |= TUN_NOCHECKSUM;
-	return 0;
-}
-
 static const struct ethtool_ops tun_ethtool_ops = {
 	.get_settings	= tun_get_settings,
 	.get_drvinfo	= tun_get_drvinfo,
 	.get_msglevel	= tun_get_msglevel,
 	.set_msglevel	= tun_set_msglevel,
 	.get_link	= ethtool_op_get_link,
-	.get_rx_csum	= tun_get_rx_csum,
-	.set_rx_csum	= tun_set_rx_csum
 };
 
 
-- 
1.7.2.5


^ permalink raw reply related

* [BUG] bnx2x: bnx2x_set_pbd_csum() random accesses
From: Eric Dumazet @ 2011-04-19 16:11 UTC (permalink / raw)
  To: Dmitry Kravkov, Eilon Greenstein; +Cc: netdev

Hi guys

bnx2x_set_pbd_csum() / bnx2x_set_pbd_csum_e2() seem to read
tcp_hdrlen(skb) even for non TCP frames ?

Also, (skb_network_header(skb) - skb->data) is signed, so 
	(skb_network_header(skb) - skb->data) / 2 is a bit expensive...

Thanks



^ permalink raw reply

* Re: Hight speed data sending from custom IP out of kernel
From: juice @ 2011-04-19 16:02 UTC (permalink / raw)
  To: monstr, netdev
In-Reply-To: <4DAD76F1.40309@monstr.eu>


Hi!

I can see you are probably going to run into CPU performance problems, but
it depends a lot on the type of traffic you are going to send.

My system requires quite fast processor, but even more important is to
have a network interface card that really supports the full speed of
gigabit ethernet line. The reason for that is that my test traffic
includes streams of very small packets that cause a lot of overhead in
processing.

Most of my test traffic is UDP, but it does not really matter what the
higher layers of the traffic are, this scheme operates on the ethernet
layer and does not care about payload structure.

I tried several NIC:s before i settled using Intel 82576 cards with the
igb driver. If you have less capable interface card, your small packet
performance is going to be a lot poorer.

Using that card I can get to full speed GE line rate even with 64byte
packets, but if you want to send larger packets, say close to 1500byte
then almost any NIC will work OK for you.

You can download the module code and the userland seeding application from
my svn server at https://toosa.swagman.org/svn/streamgen
The streamseed userland application requires libpcap-dev to build
correctly but the streamgen module is self-sufficent.

There is not a lot of documentation, and the module is still "work in
progress" as I am going to fix it to work with more than one interface at
the same time when I get to do it. Currently it can only use one interface
on the sending host machine.

  - Juice -


> Hi Juice,
>
> juice wrote:
>> Hi Michal.
>>
>> How fast do you need to send the data?
>
> It sounds weird but as fast as possible. There is no specific limit
> because I
> want to create demo and test it on various hw configuration which I can
> easily
> create on FPGA. For now the bottleneck is Microblaze cpu. It can run from
> 50MHz
> till 170-180MHz. We also support both endians and have two hw IP
> cores(10/100/1000) which I can use.
>
>> I have an application where I send test stream out to GE line and can
>> fill
>> the total capacity of the ethernet regardless of the packet size.
>
> What cpu do you use?
>
>>
>> The test stream I am sending is stored in kernel memory, and therefore
>> is
>> limited by the amount of free memory. 200M is no problem.
>
> Is it UDP or TCP?
>
>>
>> The solution I am using is loosely based on the pktgen module, except
>> that
>> my module can load a wireshark capture from userland program and then
>> send
>> it from ethernet interface in wire speed.
>
> Sound good. Would it be possible to see it and test it?
>
> Thanks,
> Michal
>
>
>>
>>   - Juice -
>>
>>
>>> Hi,
>>> I would like to create demo for high speed data sending from custom IP
>> through
>>> the ethernet. I think the best description is that there are dmaable
>>> memory
>>> mapped registers or just memory which store data I want to send (for
>> example 200MB).
>>> Linux should handle all communication between target(probably server)
>> and
>>> host
>>> (client) but data in the packets should go from that custom IP and
>>> can't go
>>> through the kernel because of performance issue.
>>> Ethernet core have own DMA which I could use but the question is if
>> there
>>> is any
>>> option how to convince the kernel that data will go directly from
>>> memory
>> mapped
>>> registers and the kernel/driver/... just setup dma BD for headers and
>> second for
>>> data.
>>> Do you have any experience with any solution with passing data
>> completely
>>> out of
>>> kernel?
>>> Thanks,
>>> Michal
>>> --
>>> Michal Simek, Ing. (M.Eng)
>>> w: www.monstr.eu p: +42-0-721842854
>>> Maintainer of Linux kernel 2.6 Microblaze Linux -
>>> http://www.monstr.eu/fdt/
>>> Microblaze U-BOOT custodian
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>>> the
>> body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>>
>>
>>
>
>
> --
> Michal Simek, Ing. (M.Eng)
> w: www.monstr.eu p: +42-0-721842854
> Maintainer of Linux kernel 2.6 Microblaze Linux -
> http://www.monstr.eu/fdt/
> Microblaze U-BOOT custodian
>



^ permalink raw reply

* Re: [PATCH] net: xen-netback: convert to hw_features
From: Michał Mirosław @ 2011-04-19 15:25 UTC (permalink / raw)
  To: Ian Campbell; +Cc: netdev@vger.kernel.org, xen-devel@lists.xensource.com
In-Reply-To: <1303226148.5997.233.camel@zakaz.uk.xensource.com>

On Tue, Apr 19, 2011 at 04:15:48PM +0100, Ian Campbell wrote:
> On Tue, 2011-04-19 at 14:43 +0100, Michał Mirosław wrote:
> > On Tue, Apr 19, 2011 at 02:39:00PM +0100, Ian Campbell wrote:
> > > On Tue, 2011-04-19 at 14:30 +0100, Michał Mirosław wrote:
> > > > On Tue, Apr 19, 2011 at 02:17:53PM +0100, Ian Campbell wrote:
> > > > > I fixed it with the following, I also moved the !can_sg MTU clamping
> > > > > into a set_features hook (like we do with netfront). Am I right that
> > > > > this pattern copes with changes to SG via ethtool etc better? I think
> > > > > it's more future proof in any case.
> > > > This looks wrong. Even if SG is turned on, you might get big skbs which
> > > > are linearized. There is a difference in SG capability and SG offload
> > > > status and as I see it the capability is what you need to test for MTU.
> > > So the existing stuff in drivers/net/xen-netfront.c is wrong too?
> > Looks like it. But I don't really know what are the real constraints for MTU.
> > What I know is that SG even if turned on needs not be used (and currently
> > it's not e.g. if checksum offload is disabled).
> The interesting case is the opposite one, isn't it? IOW if NETIF_F_SG is
> disabled but the frontend/backend agree that they have the capability to
> handle >PAGE_SIZE skbs

Then the driver might get bigger skbs but they won't ever be fragmented.

> In my experience, the normal reason for disabling the NETIF_F_SG offload
> status is that the underlying capability is somehow buggy, otherwise is
> there any reason to turn it off?

Some features depend on others to function or on some hardware/software state.
Though in most cases the reason is the one you wrote (capability also includes
what driver has implemented).

Best Regards,
Michał Mirosław

^ permalink raw reply

* [PATCH 3/3] IPVS: init and cleanup restructuring.
From: Hans Schillstrom @ 2011-04-19 15:25 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303226705-29178-1-git-send-email-hans@schillstrom.com>

This patch tries to restore the initial init and cleanup
sequences that was before name space patch.

The number of calls to register_pernet_device have been
reduced to one for the ip_vs.ko
Schedulers still have their own calls.

This patch adds a function __ip_vs_service_cleanup()
and a throttle or actually on/off switch for
the netfilter hooks.

The nf hooks will be enabled when the first service is loaded
and disabled when the last service is removed or when a
name space exit starts.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 include/net/ip_vs.h              |   17 +++++++
 net/netfilter/ipvs/ip_vs_app.c   |   15 +-----
 net/netfilter/ipvs/ip_vs_conn.c  |   20 ++++----
 net/netfilter/ipvs/ip_vs_core.c  |   86 ++++++++++++++++++++++++++++++++------
 net/netfilter/ipvs/ip_vs_ctl.c   |   66 ++++++++++++++++++++++-------
 net/netfilter/ipvs/ip_vs_est.c   |   14 +-----
 net/netfilter/ipvs/ip_vs_proto.c |   11 +----
 net/netfilter/ipvs/ip_vs_sync.c  |   13 +----
 8 files changed, 161 insertions(+), 81 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d516f00..558e490 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -791,6 +791,7 @@ struct ip_vs_app {
 /* IPVS in network namespace */
 struct netns_ipvs {
 	int			gen;		/* Generation */
+	int			throttle;	/* Instead of nf unreg */
 	/*
 	 *	Hash table: for real service lookups
 	 */
@@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }

+/*
+ * IPVS netns init & cleanup functions
+ */
+extern int __ip_vs_estimator_init(struct net *net);
+extern int __ip_vs_control_init(struct net *net);
+extern int __ip_vs_protocol_init(struct net *net);
+extern int __ip_vs_app_init(struct net *net);
+extern int __ip_vs_conn_init(struct net *net);
+extern int __ip_vs_sync_init(struct net *net);
+extern void __ip_vs_conn_cleanup(struct net *net);
+extern void __ip_vs_app_cleanup(struct net *net);
+extern void __ip_vs_protocol_cleanup(struct net *net);
+extern void __ip_vs_control_cleanup(struct net *net);
+extern void __ip_vs_estimator_cleanup(struct net *net);
+extern void __ip_vs_sync_cleanup(struct net *net);
+extern void __ip_vs_service_cleanup(struct net *net);

 /*
  *      IPVS application functions
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 7e8e769..51f3af7 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif

-static int __net_init __ip_vs_app_init(struct net *net)
+int __net_init __ip_vs_app_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_app_cleanup(struct net *net)
+void __net_exit __ip_vs_app_cleanup(struct net *net)
 {
 	proc_net_remove(net, "ip_vs_app");
 }

-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_app_init,
-	.exit = __ip_vs_app_cleanup,
-};
-
 int __init ip_vs_app_init(void)
 {
-	int rv;
-
-	rv = register_pernet_device(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }


 void ip_vs_app_cleanup(void)
 {
-	unregister_pernet_device(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 36cd5ea..f8d6702 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1251,30 +1251,30 @@ int __net_init __ip_vs_conn_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	atomic_set(&ipvs->conn_count, 0);

 	proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
 	proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+	LeaveFunction(2);
 	return 0;
 }

-static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+void __net_exit __ip_vs_conn_cleanup(struct net *net)
 {
+	EnterFunction(2);
 	/* flush all the connection entries first */
 	ip_vs_conn_flush(net);
 	proc_net_remove(net, "ip_vs_conn");
 	proc_net_remove(net, "ip_vs_conn_sync");
+	LeaveFunction(2);
 }
-static struct pernet_operations ipvs_conn_ops = {
-	.init = __ip_vs_conn_init,
-	.exit = __ip_vs_conn_cleanup,
-};

 int __init ip_vs_conn_init(void)
 {
 	int idx;
-	int retc;

+	EnterFunction(2);
 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
 	ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
@@ -1309,18 +1309,18 @@ int __init ip_vs_conn_init(void)
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}

-	retc = register_pernet_device(&ipvs_conn_ops);
-
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+	LeaveFunction(2);

-	return retc;
+	return 0;
 }

 void ip_vs_conn_cleanup(void)
 {
-	unregister_pernet_device(&ipvs_conn_ops);
+	EnterFunction(2);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	vfree(ip_vs_conn_tab);
+	LeaveFunction(2);
 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a7bb81d..dc27fdf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1343,6 +1343,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */

 	net = skb_net(skb);
+	/* Name space in use ? */
+	if (net->ipvs->throttle)
+		return NF_ACCEPT;
+
 	pd = ip_vs_proto_data_get(net, cih->protocol);
 	if (!pd)
 		return NF_ACCEPT;
@@ -1563,6 +1567,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		}

 	net = skb_net(skb);
+	if (net->ipvs->throttle)
+		return NF_ACCEPT;
 	/* Protocol supported? */
 	pd = ip_vs_proto_data_get(net, iph.protocol);
 	if (unlikely(!pd))
@@ -1588,7 +1594,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}

 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	net = skb_net(skb);
 	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1879,24 +1884,73 @@ static int __net_init __ip_vs_init(struct net *net)
 {
 	struct netns_ipvs *ipvs;

+	EnterFunction(2);
 	ipvs = net_generic(net, ip_vs_net_id);
 	if (ipvs == NULL) {
 		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
+	/* Hold the beast until a service is registerd */
+	ipvs->throttle = -1;
 	ipvs->net = net;
 	/* Counters used for creating unique names */
 	ipvs->gen = atomic_read(&ipvs_netns_cnt);
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;
+
+	if ( __ip_vs_estimator_init(net) < 0)
+		goto estimator_fail;
+
+	if (__ip_vs_control_init(net) < 0)
+		goto control_fail;
+
+	if (__ip_vs_protocol_init(net) < 0)
+		goto protocol_fail;
+
+	if (__ip_vs_app_init(net) < 0)
+		goto app_fail;
+
+	if (__ip_vs_conn_init(net) < 0)
+		goto conn_fail;
+
+	if (__ip_vs_sync_init(net) < 0)
+		goto sync_fail;
+
+	LeaveFunction(2);
 	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
 			 sizeof(struct netns_ipvs), ipvs->gen);
 	return 0;
+/*
+ * Error handling
+ */
+
+sync_fail:
+	__ip_vs_conn_cleanup(net);
+conn_fail:
+	__ip_vs_app_cleanup(net);
+app_fail:
+	__ip_vs_protocol_cleanup(net);
+protocol_fail:
+	__ip_vs_control_cleanup(net);
+control_fail:
+	__ip_vs_estimator_cleanup(net);
+estimator_fail:
+	return -ENOMEM;
 }

 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	net->ipvs->throttle = -1;
+	EnterFunction(2);
+	__ip_vs_sync_cleanup(net);
+	__ip_vs_service_cleanup(net);	/* ip_vs_flush() with locks */
+	__ip_vs_conn_cleanup(net);
+	__ip_vs_app_cleanup(net);
+	__ip_vs_protocol_cleanup(net);
+	__ip_vs_control_cleanup(net);
+	__ip_vs_estimator_cleanup(net);
+	LeaveFunction(2);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }

 static struct pernet_operations ipvs_core_ops = {
@@ -1913,10 +1967,7 @@ static int __init ip_vs_init(void)
 {
 	int ret;

-	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
-	if (ret < 0)
-		return ret;
-
+	EnterFunction(2);
 	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
@@ -1944,41 +1995,50 @@ static int __init ip_vs_init(void)
 		goto cleanup_conn;
 	}

+	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		goto cleanup_sync;
+
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_sync;
+		goto cleanup_net;
 	}

 	pr_info("ipvs loaded.\n");
+	LeaveFunction(2);
+
 	return ret;

+cleanup_net:
+	unregister_pernet_device(&ipvs_core_ops);       /* free ip_vs struct */
 cleanup_sync:
 	ip_vs_sync_cleanup();
-  cleanup_conn:
+cleanup_conn:
 	ip_vs_conn_cleanup();
-  cleanup_app:
+cleanup_app:
 	ip_vs_app_cleanup();
-  cleanup_protocol:
+cleanup_protocol:
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-  cleanup_estimator:
+cleanup_estimator:
 	ip_vs_estimator_cleanup();
-	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }

 static void __exit ip_vs_cleanup(void)
 {
+	EnterFunction(2);
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
-	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
+	LeaveFunction(2);
 }

 module_init(ip_vs_init);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 08715d8..6534ca3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void)
 }
 #endif

+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 static int __ip_vs_addr_is_local_v6(struct net *net,
@@ -345,6 +350,9 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)

 	svc->flags &= ~IP_VS_SVC_F_HASHED;
 	atomic_dec(&svc->refcnt);
+	/* No more services then no need for input */
+	if (atomic_read(&svc->refcnt) == 0)
+		svc->net->ipvs->throttle = -1;
 	return 1;
 }

@@ -480,7 +488,6 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
 	}
 }

-
 /*
  *	Returns hash value for real service
  */
@@ -1214,6 +1221,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	write_unlock_bh(&__ip_vs_svc_lock);

 	*svc_p = svc;
+	/* Now whe have a service - full throttle */
+	ipvs->throttle = 0;
 	return 0;


@@ -1472,6 +1481,41 @@ static int ip_vs_flush(struct net *net)
 	return 0;
 }

+/*
+ *	Delete service by {netns} in the service table.
+ *	Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+	unsigned hash;
+	struct ip_vs_service *svc, *tmp;
+
+	EnterFunction(2);
+	/* Check for "full" addressed entries */
+	for (hash = 0; hash<IP_VS_SVC_TAB_SIZE; hash++) {
+		write_lock_bh(&__ip_vs_svc_lock);
+		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_table[hash],
+					 s_list) {
+			if (net_eq(svc->net, net)) {
+				ip_vs_svc_unhash(svc);
+				/*  Wait until all the svc users go away. */
+				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+				__ip_vs_del_service(svc);
+			}
+		}
+		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_fwm_table[hash],
+					 f_list) {
+			if (net_eq(svc->net, net)) {
+				ip_vs_svc_unhash(svc);
+				/*  Wait until all the svc users go away. */
+				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+				__ip_vs_del_service(svc);
+			}
+		}
+		write_unlock_bh(&__ip_vs_svc_lock);
+	}
+	LeaveFunction(2);
+}

 /*
  *	Zero counters in a service or all services
@@ -3593,6 +3637,7 @@ int __net_init __ip_vs_control_init(struct net *net)
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);

 	/* Initialize rs_table */
@@ -3619,6 +3664,7 @@ int __net_init __ip_vs_control_init(struct net *net)
 	if (__ip_vs_control_init_sysctl(net))
 		goto err;

+	LeaveFunction(2);
 	return 0;

 err:
@@ -3626,10 +3672,11 @@ err:
 	return -ENOMEM;
 }

-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_exit __ip_vs_control_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
 	__ip_vs_control_cleanup_sysctl(net);
@@ -3637,13 +3684,9 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
 	proc_net_remove(net, "ip_vs_stats");
 	proc_net_remove(net, "ip_vs");
 	free_percpu(ipvs->tot_stats.cpustats);
+	LeaveFunction(2);
 }

-static struct pernet_operations ipvs_control_ops = {
-	.init = __ip_vs_control_init,
-	.exit = __ip_vs_control_cleanup,
-};
-
 int __init ip_vs_control_init(void)
 {
 	int idx;
@@ -3657,12 +3700,6 @@ int __init ip_vs_control_init(void)
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}

-	ret = register_pernet_device(&ipvs_control_ops);
-	if (ret) {
-		pr_err("cannot register namespace.\n");
-		goto err;
-	}
-
 	smp_wmb();	/* Do we really need it now ? */

 	ret = nf_register_sockopt(&ip_vs_sockopts);
@@ -3682,8 +3719,6 @@ int __init ip_vs_control_init(void)
 	return 0;

 err_net:
-	unregister_pernet_device(&ipvs_control_ops);
-err:
 	return ret;
 }

@@ -3691,7 +3726,6 @@ err:
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	unregister_pernet_device(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 759163e..508cce9 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
 	dst->outbps = (e->outbps + 0xF) >> 5;
 }

-static int __net_init __ip_vs_estimator_init(struct net *net)
+int __net_init __ip_vs_estimator_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_estimator_exit(struct net *net)
+void __net_exit __ip_vs_estimator_cleanup(struct net *net)
 {
 	del_timer_sync(&net_ipvs(net)->est_timer);
 }
-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_estimator_init,
-	.exit = __ip_vs_estimator_exit,
-};

 int __init ip_vs_estimator_init(void)
 {
-	int rv;
-
-	rv = register_pernet_device(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }

 void ip_vs_estimator_cleanup(void)
 {
-	unregister_pernet_device(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index f7021fc..eb86028 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 /*
  * per network name-space init
  */
-static int __net_init __ip_vs_protocol_init(struct net *net)
+int __net_init __ip_vs_protocol_init(struct net *net)
 {
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
@@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd;
@@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 	}
 }

-static struct pernet_operations ipvs_proto_ops = {
-	.init = __ip_vs_protocol_init,
-	.exit = __ip_vs_protocol_cleanup,
-};
-
 int __init ip_vs_protocol_init(void)
 {
 	char protocols[64];
@@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
-	return register_pernet_device(&ipvs_proto_ops);

 	return 0;
 }
@@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;

-	unregister_pernet_device(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 1aeca1d..e911f03 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1664,7 +1664,7 @@ int stop_sync_thread(struct net *net, int state)
 /*
  * Initialize data struct for each netns
  */
-static int __net_init __ip_vs_sync_init(struct net *net)
+int __net_init __ip_vs_sync_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -1678,24 +1678,17 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 	return 0;
 }

-static void __ip_vs_sync_cleanup(struct net *net)
+void __ip_vs_sync_cleanup(struct net *net)
 {
 	stop_sync_thread(net, IP_VS_STATE_MASTER);
 	stop_sync_thread(net, IP_VS_STATE_BACKUP);
 }

-static struct pernet_operations ipvs_sync_ops = {
-	.init = __ip_vs_sync_init,
-	.exit = __ip_vs_sync_cleanup,
-};
-
-
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_device(&ipvs_sync_ops);
+	return 0;
 }

 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_device(&ipvs_sync_ops);
 }
--
1.7.2.3


^ permalink raw reply related

* [PATCH 2/3] IPVS: Change of register_pernet_subsys to register_pernet_device
From: Hans Schillstrom @ 2011-04-19 15:25 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303226705-29178-1-git-send-email-hans@schillstrom.com>

This is part 1 of a makeover of the init and cleanup
functions in ip_vs using name space.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 net/netfilter/ipvs/ip_vs_app.c   |    4 ++--
 net/netfilter/ipvs/ip_vs_conn.c  |    4 ++--
 net/netfilter/ipvs/ip_vs_core.c  |    6 +++---
 net/netfilter/ipvs/ip_vs_ctl.c   |    6 +++---
 net/netfilter/ipvs/ip_vs_est.c   |    4 ++--
 net/netfilter/ipvs/ip_vs_ftp.c   |    4 ++--
 net/netfilter/ipvs/ip_vs_lblc.c  |    6 +++---
 net/netfilter/ipvs/ip_vs_lblcr.c |    6 +++---
 net/netfilter/ipvs/ip_vs_proto.c |    4 ++--
 net/netfilter/ipvs/ip_vs_sync.c  |    4 ++--
 10 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 2dc6de1..7e8e769 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -599,12 +599,12 @@ int __init ip_vs_app_init(void)
 {
 	int rv;
 
-	rv = register_pernet_subsys(&ip_vs_app_ops);
+	rv = register_pernet_device(&ip_vs_app_ops);
 	return rv;
 }
 
 
 void ip_vs_app_cleanup(void)
 {
-	unregister_pernet_subsys(&ip_vs_app_ops);
+	unregister_pernet_device(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index c97bd45..36cd5ea 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1309,7 +1309,7 @@ int __init ip_vs_conn_init(void)
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}
 
-	retc = register_pernet_subsys(&ipvs_conn_ops);
+	retc = register_pernet_device(&ipvs_conn_ops);
 
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
@@ -1319,7 +1319,7 @@ int __init ip_vs_conn_init(void)
 
 void ip_vs_conn_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_conn_ops);
+	unregister_pernet_device(&ipvs_conn_ops);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 07accf6..a7bb81d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1913,7 +1913,7 @@ static int __init ip_vs_init(void)
 {
 	int ret;
 
-	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	ret = register_pernet_device(&ipvs_core_ops);	/* Alloc ip_vs struct */
 	if (ret < 0)
 		return ret;
 
@@ -1964,7 +1964,7 @@ cleanup_sync:
 	ip_vs_control_cleanup();
   cleanup_estimator:
 	ip_vs_estimator_cleanup();
-	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
+	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }
 
@@ -1977,7 +1977,7 @@ static void __exit ip_vs_cleanup(void)
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
-	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
+	unregister_pernet_device(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ae47090..08715d8 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3657,7 +3657,7 @@ int __init ip_vs_control_init(void)
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}
 
-	ret = register_pernet_subsys(&ipvs_control_ops);
+	ret = register_pernet_device(&ipvs_control_ops);
 	if (ret) {
 		pr_err("cannot register namespace.\n");
 		goto err;
@@ -3682,7 +3682,7 @@ int __init ip_vs_control_init(void)
 	return 0;
 
 err_net:
-	unregister_pernet_subsys(&ipvs_control_ops);
+	unregister_pernet_device(&ipvs_control_ops);
 err:
 	return ret;
 }
@@ -3691,7 +3691,7 @@ err:
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	unregister_pernet_subsys(&ipvs_control_ops);
+	unregister_pernet_device(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 8c8766c..759163e 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -216,11 +216,11 @@ int __init ip_vs_estimator_init(void)
 {
 	int rv;
 
-	rv = register_pernet_subsys(&ip_vs_app_ops);
+	rv = register_pernet_device(&ip_vs_app_ops);
 	return rv;
 }
 
 void ip_vs_estimator_cleanup(void)
 {
-	unregister_pernet_subsys(&ip_vs_app_ops);
+	unregister_pernet_device(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 6b5dd6d..dfa04d3 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -451,7 +451,7 @@ int __init ip_vs_ftp_init(void)
 {
 	int rv;
 
-	rv = register_pernet_subsys(&ip_vs_ftp_ops);
+	rv = register_pernet_device(&ip_vs_ftp_ops);
 	return rv;
 }
 
@@ -460,7 +460,7 @@ int __init ip_vs_ftp_init(void)
  */
 static void __exit ip_vs_ftp_exit(void)
 {
-	unregister_pernet_subsys(&ip_vs_ftp_ops);
+	unregister_pernet_device(&ip_vs_ftp_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 87e40ea..96765d0 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -603,20 +603,20 @@ static int __init ip_vs_lblc_init(void)
 {
 	int ret;
 
-	ret = register_pernet_subsys(&ip_vs_lblc_ops);
+	ret = register_pernet_device(&ip_vs_lblc_ops);
 	if (ret)
 		return ret;
 
 	ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
 	if (ret)
-		unregister_pernet_subsys(&ip_vs_lblc_ops);
+		unregister_pernet_device(&ip_vs_lblc_ops);
 	return ret;
 }
 
 static void __exit ip_vs_lblc_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
-	unregister_pernet_subsys(&ip_vs_lblc_ops);
+	unregister_pernet_device(&ip_vs_lblc_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 90f618a..5de425f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -799,20 +799,20 @@ static int __init ip_vs_lblcr_init(void)
 {
 	int ret;
 
-	ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+	ret = register_pernet_device(&ip_vs_lblcr_ops);
 	if (ret)
 		return ret;
 
 	ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
 	if (ret)
-		unregister_pernet_subsys(&ip_vs_lblcr_ops);
+		unregister_pernet_device(&ip_vs_lblcr_ops);
 	return ret;
 }
 
 static void __exit ip_vs_lblcr_cleanup(void)
 {
 	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
-	unregister_pernet_subsys(&ip_vs_lblcr_ops);
+	unregister_pernet_device(&ip_vs_lblcr_ops);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 17484a4..f7021fc 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -382,7 +382,7 @@ int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
-	return register_pernet_subsys(&ipvs_proto_ops);
+	return register_pernet_device(&ipvs_proto_ops);
 
 	return 0;
 }
@@ -393,7 +393,7 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;
 
-	unregister_pernet_subsys(&ipvs_proto_ops);
+	unregister_pernet_device(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3f87555..1aeca1d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1692,10 +1692,10 @@ static struct pernet_operations ipvs_sync_ops = {
 
 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_subsys(&ipvs_sync_ops);
+	return register_pernet_device(&ipvs_sync_ops);
 }
 
 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_sync_ops);
+	unregister_pernet_device(&ipvs_sync_ops);
 }
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 1/3] IPVS: Change of socket usage to enable name space exit.
From: Hans Schillstrom @ 2011-04-19 15:25 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom

This is the first patch in a series of three.
The cleanup doesn't work when not exit in a clean way by using ipvsadm.
Killing of a namespace causes a hanging ipvs, this series will cure that.

If the sync daemons run in a namespace while it crashes
or get killed, there is no way to stop them except for a reboot.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used, instead sk_release_kernel() should be used.

Thanks to Eric W Biederman.

This patch is based on net-next-2.6  ver 2.6.39-rc2

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 net/netfilter/ipvs/ip_vs_sync.c |   28 +++++++++++++++++++---------
 1 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3e7961e..3f87555 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1309,7 +1309,12 @@ static struct socket *make_send_sock(struct net *net)
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)

 	return sock;

-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }

@@ -1355,7 +1360,12 @@ static struct socket *make_receive_sock(struct net *net)
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;

@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)

 	return sock;

-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }

@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
 		ip_vs_sync_buff_release(sb);

 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo);

 	return 0;
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
 	}

 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo->buf);
 	kfree(tinfo);

@@ -1601,7 +1611,7 @@ outtinfo:
 outbuf:
 	kfree(buf);
 outsocket:
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 out:
 	return result;
 }
--
1.7.2.3


^ permalink raw reply related

* Re: [PATCH] net: xen-netback: convert to hw_features
From: Ian Campbell @ 2011-04-19 15:15 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev@vger.kernel.org, xen-devel@lists.xensource.com
In-Reply-To: <20110419134352.GB4716@rere.qmqm.pl>

On Tue, 2011-04-19 at 14:43 +0100, Michał Mirosław wrote:
> On Tue, Apr 19, 2011 at 02:39:00PM +0100, Ian Campbell wrote:
> > On Tue, 2011-04-19 at 14:30 +0100, Michał Mirosław wrote:
> > > On Tue, Apr 19, 2011 at 02:17:53PM +0100, Ian Campbell wrote:
> > > > I fixed it with the following, I also moved the !can_sg MTU clamping
> > > > into a set_features hook (like we do with netfront). Am I right that
> > > > this pattern copes with changes to SG via ethtool etc better? I think
> > > > it's more future proof in any case.
> > > This looks wrong. Even if SG is turned on, you might get big skbs which
> > > are linearized. There is a difference in SG capability and SG offload
> > > status and as I see it the capability is what you need to test for MTU.
> > So the existing stuff in drivers/net/xen-netfront.c is wrong too?
> 
> Looks like it. But I don't really know what are the real constraints for MTU.
> What I know is that SG even if turned on needs not be used (and currently
> it's not e.g. if checksum offload is disabled).

The interesting case is the opposite one, isn't it? IOW if NETIF_F_SG is
disabled but the frontend/backend agree that they have the capability to
handle >PAGE_SIZE skbs

In my experience, the normal reason for disabling the NETIF_F_SG offload
status is that the underlying capability is somehow buggy, otherwise is
there any reason to turn it off?

> So MTU setting should not depend on SG offload state but on some capability.

Ian.



^ permalink raw reply

* Re: DSCP values in TCP handshake
From: Matt Mathis @ 2011-04-19 15:09 UTC (permalink / raw)
  To: Mikael Abrahamsson; +Cc: Stephen Hemminger, Joe Buehler, Eric Dumazet, netdev
In-Reply-To: <alpine.DEB.2.00.1104190620070.14027@uplift.swm.pp.se>

> I don't know why this didn't make it into RFC, I can inquiry if there is
> interest.

Please do.    This missing spec is one of the things that makes Less
than Best Effort (aka scavenger service) unusable.   Only the client
knows if they are fetching data in the background.   The server
doesn't care.

The other botch it is the spec that DSCP can be cleared under certain
conditions, which has the effect of promoting LBE to BE.   I have lost
track of the details.

Making LBE work would go a long way to solving the buffer bloat
problem and more....

Thanks,
--MM--
The best way to predict the future is to create it.  - Alan Kay




On Tue, Apr 19, 2011 at 12:28 AM, Mikael Abrahamsson <swmike@swm.pp.se> wrote:
> On Mon, 18 Apr 2011, Stephen Hemminger wrote:
>
>> Linux does not look at DSCP of incoming packets (there is no queue).
>
> Then I see no reason for the policy of not reflecting DSCP.
>
> If we receive the DSCP marked packet then it means the network is either not
> QoS enabled (it doesn't care) or it's actually allowed through the border
> router with DSCP unchanged. Either means it's safe to reflect the DSCP
> value, either it will have no effect or it's actually meant to be
> prioritized.
>
> With precedence, it originally was mandated that if the precedence value
> changed, the TCP session should be reset. Fortunately, this was changed but
> I would still say that it's thought that DSCP values should be reflected by
> the server.
>
> For instance:
>
> <http://tools.ietf.org/html/draft-ietf-ieprep-reflexive-dscp-02>
>
> "The requester could initiate this. Thus, if the DSCP
>   received on one TCP segment differs from the TCP used on a prior TCP
>   segment in a session, the new DSCP SHOULD be reflected unless local
>   policy prevents this."
>
> I don't know why this didn't make it into RFC, I can inquiry if there is
> interest.
>
> --
> Mikael Abrahamsson    email: swmike@swm.pp.se
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* Re: [PATCH net-next-2.6 3/3] bonding,ipv4,ipv6,vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
From: Ben Hutchings @ 2011-04-19 14:56 UTC (permalink / raw)
  To: Brian Haley
  Cc: Jay Vosburgh, David Miller, Andy Gospodarek, Patrick McHardy,
	netdev
In-Reply-To: <4DACE638.9060909@hp.com>

On Mon, 2011-04-18 at 21:32 -0400, Brian Haley wrote:
> On 04/18/2011 03:09 PM, Ben Hutchings wrote:
> > How about restoring the parameters like this:
> > 
> > ---
> > From: Ben Hutchings <bhutchings@solarflare.com>
> > Date: Mon, 18 Apr 2011 19:36:48 +0100
> > Subject: [PATCH net-next-2.6] ipv4,ipv6,bonding: Restore control over number of peer notifications
> > 
> > For backward compatibility, we should retain the module parameters and
> > sysfs attributes to control the number of peer notifications
> > (gratuitous ARPs and unsolicited NAs) sent after bonding failover.
> > Also, it is possible for failover to take place even though the new
> > active slave does not have link up, and in that case the peer
> > notification should be deferred until it does.
> > 
> > Change ipv4 and ipv6 so they do not automatically send peer
> > notifications on bonding failover.  Change the bonding driver to send
> > separate NETDEV_NOTIFY_PEERS notifications when the link is up, as
> > many times as requested.  Since it does not directly control which
> > protocols send notifications, make num_grat_arp and num_unsol_na
> > aliases for a single parameter.
> 
> Hi Ben,
> 
> I think this looks good, I'll try and get this tested here when I have
> a chance, but for now I can:
> 
> Acked-by: Brian Haley <brian.haley@hp.com>
> 
> Should we just go ahead and make a new parameter for peer notification?
> Compiled but untested patch below.
[...]

If everyone is in agreement to deprecate the old parameters in favour of
a single parameter (or none) I think there should be a target date for
removal, documented in Documentation/feature-removal-schedule.txt.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* [PATCH 0/3] netfilter: netfilter fixes for 2.6.39-rc4
From: kaber @ 2011-04-19 14:51 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev

Hi Dave,

following are three netfilter fixes for 2.6.39-rc4, containing:

- a fix for the bitmap:ip,mac set type to require the src/dst parameter
  to be set to src, from Jozsef

- a fix to make --del-set of the SET target work, from Jozsef

- a patch to fix the order in which sets are dumped, from Jozsef

Please pull from:

git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.git master

Thanks!


^ permalink raw reply

* [PATCH 1/3] netfilter: ipset: bitmap:ip,mac type requires "src" for MAC
From: kaber @ 2011-04-19 14:51 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1303224705-17400-1-git-send-email-kaber@trash.net>

From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

Enforce that the second "src/dst" parameter of the set match and SET target
must be "src", because we have access to the source MAC only in the packet.
The previous behaviour, that the type required the second parameter
but actually ignored the value was counter-intuitive and confusing.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_bitmap_ipmac.c |    4 ++++
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 00a3324..a274300 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -343,6 +343,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct ipmac data;
 
+	/* MAC can be src only */
+	if (!(flags & IPSET_DIM_TWO_SRC))
+		return 0;
+
 	data.id = ntohl(ip4addr(skb, flags & IPSET_DIM_ONE_SRC));
 	if (data.id < map->first_ip || data.id > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 3/3] netfilter: ipset: Fix the order of listing of sets
From: kaber @ 2011-04-19 14:51 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1303224705-17400-1-git-send-email-kaber@trash.net>

From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

A restoreable saving of sets requires that list:set type of sets
come last and the code part which should have taken into account
the ordering was broken. The patch fixes the listing order.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_core.c |   18 ++++++++++--------
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e88ac3c..d87e03b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1022,8 +1022,9 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	if (cb->args[1] >= ip_set_max)
 		goto out;
 
-	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
 	max = cb->args[0] == DUMP_ONE ? cb->args[1] + 1 : ip_set_max;
+dump_last:
+	pr_debug("args[0]: %ld args[1]: %ld\n", cb->args[0], cb->args[1]);
 	for (; cb->args[1] < max; cb->args[1]++) {
 		index = (ip_set_id_t) cb->args[1];
 		set = ip_set_list[index];
@@ -1038,8 +1039,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		 * so that lists (unions of sets) are dumped last.
 		 */
 		if (cb->args[0] != DUMP_ONE &&
-		    !((cb->args[0] == DUMP_ALL) ^
-		      (set->type->features & IPSET_DUMP_LAST)))
+		    ((cb->args[0] == DUMP_ALL) ==
+		     !!(set->type->features & IPSET_DUMP_LAST)))
 			continue;
 		pr_debug("List set: %s\n", set->name);
 		if (!cb->args[2]) {
@@ -1083,6 +1084,12 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 			goto release_refcount;
 		}
 	}
+	/* If we dump all sets, continue with dumping last ones */
+	if (cb->args[0] == DUMP_ALL) {
+		cb->args[0] = DUMP_LAST;
+		cb->args[1] = 0;
+		goto dump_last;
+	}
 	goto out;
 
 nla_put_failure:
@@ -1093,11 +1100,6 @@ release_refcount:
 		pr_debug("release set %s\n", ip_set_list[index]->name);
 		ip_set_put_byindex(index);
 	}
-
-	/* If we dump all sets, continue with dumping last ones */
-	if (cb->args[0] == DUMP_ALL && cb->args[1] >= max && !cb->args[2])
-		cb->args[0] = DUMP_LAST;
-
 out:
 	if (nlh) {
 		nlmsg_end(skb, nlh);
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 2/3] netfilter: ipset: set match and SET target fixes
From: kaber @ 2011-04-19 14:51 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1303224705-17400-1-git-send-email-kaber@trash.net>

From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

The SET target with --del-set did not work due to using wrongly
the internal dimension of --add-set instead of --del-set.
Also, the checkentries did not release the set references when
returned an error. Bugs reported by Lennert Buytenhek.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_set.c |   18 ++++++++++++++++--
 1 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 061d48c..b3babae 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -81,6 +81,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
+		ip_set_nfnl_put(info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -135,6 +136,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(info->add_set.index);
 			return -ENOENT;
 		}
 	}
@@ -142,6 +145,10 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 	    info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->del_set.index);
 		return -ERANGE;
 	}
 
@@ -192,6 +199,7 @@ set_match_checkentry(const struct xt_mtchk_param *par)
 	if (info->match_set.dim > IPSET_DIM_MAX) {
 		pr_warning("Protocol error: set match dimension "
 			   "is over the limit!\n");
+		ip_set_nfnl_put(info->match_set.index);
 		return -ERANGE;
 	}
 
@@ -219,7 +227,7 @@ set_target(struct sk_buff *skb, const struct xt_action_param *par)
 	if (info->del_set.index != IPSET_INVALID_ID)
 		ip_set_del(info->del_set.index,
 			   skb, par->family,
-			   info->add_set.dim,
+			   info->del_set.dim,
 			   info->del_set.flags);
 
 	return XT_CONTINUE;
@@ -245,13 +253,19 @@ set_target_checkentry(const struct xt_tgchk_param *par)
 		if (index == IPSET_INVALID_ID) {
 			pr_warning("Cannot find del_set index %u as target\n",
 				   info->del_set.index);
+			if (info->add_set.index != IPSET_INVALID_ID)
+				ip_set_nfnl_put(info->add_set.index);
 			return -ENOENT;
 		}
 	}
 	if (info->add_set.dim > IPSET_DIM_MAX ||
-	    info->del_set.flags > IPSET_DIM_MAX) {
+	    info->del_set.dim > IPSET_DIM_MAX) {
 		pr_warning("Protocol error: SET target dimension "
 			   "is over the limit!\n");
+		if (info->add_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->add_set.index);
+		if (info->del_set.index != IPSET_INVALID_ID)
+			ip_set_nfnl_put(info->del_set.index);
 		return -ERANGE;
 	}
 
-- 
1.7.2.3


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox