netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Stephen Hemminger <shemminger@vyatta.com>
To: karaluh@karaluh.pl, Ed Cashin <ecashin@coraid.com>
Cc: roel.kluin@gmail.com, harvey.harrison@gmail.com,
	bzolnier@gmail.com, netdev@vger.kernel.org
Subject: Re: [PATCH 04/10] AOE: use rcu to find network device
Date: Tue, 10 Nov 2009 15:53:16 -0800	[thread overview]
Message-ID: <20091110155316.2c3d7b6e@nehalam> (raw)
In-Reply-To: <20091110150617.0e6920f0@nehalam>

On Tue, 10 Nov 2009 15:06:17 -0800
Stephen Hemminger <shemminger@vyatta.com> wrote:

> On Tue, 10 Nov 2009 15:01:49 -0500
> Ed Cashin <ecashin@coraid.com> wrote:
> 
> > On Tue Nov 10 13:07:37 EST 2009, shemminger@vyatta.com wrote:
> > > This gets rid of another use of read_lock(&dev_base_lock) by using
> > > RCU. Also, it only increments the reference count of the device actually
> > > used rather than holding and releasing every device
> > > 
> > > Compile tested only.
> > 
> > This function runs once a minute when the aoe driver is loaded,
> > if you'd like to test it a bit more.
> > 
> > It looks like there's no dev_put corresponding to the dev_hold
> > after the changes.
> > 
> 
> Hmm, looks like AOE actually is not ref counting the network device.
> So my patch is incorrect. 
> 
> As it stands (before my patch), it is UNSAFE. It can decide to queue
> packets to a device that is removed out from underneath it causing
> reference to freed memory.
> 
> Moving the rcu_read_lock up to aoecmd_cfg() would solve that but the
> whole driver appears to be unsafe about device refcounting and handling
> device removal properly.  
> 
> It needs to:
> 
> 1. Get a device ref count when it remembers a device: (ie addif)
> 2. Install a notifier that looks for device removal events
> 3. In notifier, remove interface, including flushing all pending
>    skb's for that device.
> 
> This obviously is beyond the scope of the RCU stuff.

Here is a patch to get you going, it does compile but it probably
won't work because the code doesn't handle the case of the last
device going away from a target. This is yet another pre-existing
bug, since if a timeout happens: ejectif() is called to remove a device,
resend() will BUG in ifrotate() if all devices are gone.


---
 drivers/block/aoe/aoe.h     |    2 ++
 drivers/block/aoe/aoecmd.c  |   19 +++++++++++++++++++
 drivers/block/aoe/aoedev.c  |   14 ++++++++++++++
 drivers/block/aoe/aoemain.c |   24 ++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

--- a/drivers/block/aoe/aoecmd.c	2009-11-10 15:13:25.673859220 -0800
+++ b/drivers/block/aoe/aoecmd.c	2009-11-10 15:49:20.009047132 -0800
@@ -413,6 +413,8 @@ addif(struct aoetgt *t, struct net_devic
 	p = getif(t, NULL);
 	if (!p)
 		return NULL;
+
+	dev_hold(nd);
 	p->nd = nd;
 	p->maxbcnt = DEFAULTBCNT;
 	p->lost = 0;
@@ -424,12 +426,29 @@ static void
 ejectif(struct aoetgt *t, struct aoeif *ifp)
 {
 	struct aoeif *e;
+	struct net_device *nd;
 	ulong n;
 
 	e = t->ifs + NAOEIFS - 1;
+	nd = e->nd;
 	n = (e - ifp) * sizeof *ifp;
 	memmove(ifp, ifp+1, n);
 	e->nd = NULL;
+	dev_put(nd);
+}
+
+void aoecmd_flushnet(struct aoedev *d, struct net_device *nd)
+{
+	struct aoetgt **tt, **te;
+	tt = d->targets;
+	te = tt + NTARGETS;
+	for (; tt < te && *tt; tt++) {
+		struct aoetgt *t = *tt;
+		struct aoeif *ifp;
+
+		while ( (ifp = getif(t, nd)) )
+			ejectif(t, ifp);
+	}
 }
 
 static int
--- a/drivers/block/aoe/aoemain.c	2009-11-10 15:13:25.696859195 -0800
+++ b/drivers/block/aoe/aoemain.c	2009-11-10 15:48:43.352047188 -0800
@@ -8,6 +8,8 @@
 #include <linux/blkdev.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
 #include "aoe.h"
 
 MODULE_LICENSE("GPL");
@@ -54,11 +56,28 @@ discover_timer(ulong vp)
 	}
 }
 
+/* Callback on change of state of network device. */
+static int aoe_device_event(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *nd = ptr;
+
+	if (is_aoe_netif(nd) && event == NETDEV_UNREGISTER)
+		aoedev_ejectnet(nd);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block aoe_notifier = {
+	.notifier_call = aoe_device_event,
+};
+
 static void
 aoe_exit(void)
 {
 	discover_timer(TKILL);
 
+	unregister_netdevice_notifier(&aoe_notifier);
 	aoenet_exit();
 	unregister_blkdev(AOE_MAJOR, DEVICE_NAME);
 	aoechr_exit();
@@ -83,6 +102,9 @@ aoe_init(void)
 	ret = aoenet_init();
 	if (ret)
 		goto net_fail;
+	ret = register_netdevice_notifier(&aoe_notifier);
+	if (ret)
+		goto notifier_fail;
 	ret = register_blkdev(AOE_MAJOR, DEVICE_NAME);
 	if (ret < 0) {
 		printk(KERN_ERR "aoe: can't register major\n");
@@ -94,6 +116,8 @@ aoe_init(void)
 	return 0;
 
  blkreg_fail:
+	unregister_netdevice_notifier(&aoe_notifier);
+ notifier_fail:
 	aoenet_exit();
  net_fail:
 	aoeblk_exit();
--- a/drivers/block/aoe/aoe.h	2009-11-10 15:36:07.775921768 -0800
+++ b/drivers/block/aoe/aoe.h	2009-11-10 15:43:14.972984754 -0800
@@ -186,6 +186,7 @@ void aoecmd_ata_rsp(struct sk_buff *);
 void aoecmd_cfg_rsp(struct sk_buff *);
 void aoecmd_sleepwork(struct work_struct *);
 void aoecmd_cleanslate(struct aoedev *);
+void aoecmd_flushnet(struct aoedev *, struct net_device *);
 struct sk_buff *aoecmd_ata_id(struct aoedev *);
 
 int aoedev_init(void);
@@ -194,6 +195,7 @@ struct aoedev *aoedev_by_aoeaddr(int maj
 struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
 void aoedev_downdev(struct aoedev *d);
 int aoedev_flush(const char __user *str, size_t size);
+void aoedev_ejectnet(struct net_device *);
 
 int aoenet_init(void);
 void aoenet_exit(void);
--- a/drivers/block/aoe/aoedev.c	2009-11-10 15:13:25.685859893 -0800
+++ b/drivers/block/aoe/aoedev.c	2009-11-10 15:46:19.430861404 -0800
@@ -162,6 +162,20 @@ aoedev_flush(const char __user *str, siz
 	return 0;
 }
 
+void aoedev_ejectnet(struct net_device *nd)
+{
+	struct aoedev *d;
+	unsigned long flags;
+
+	spin_lock_irqsave(&devlist_lock, flags);
+	for (d = devlist; d; d = d->next) {
+		spin_lock(&d->lock);
+		aoecmd_flushnet(d, nd);
+		spin_unlock(&d->lock);
+	}
+	spin_unlock_irqrestore(&d->lock, flags);
+}
+
 /* I'm not really sure that this is a realistic problem, but if the
 network driver goes gonzo let's just leak memory after complaining. */
 static void

  reply	other threads:[~2009-11-10 23:53 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-10 17:54 [PATCH 00/10] netdev: get rid of read_lock(&dev_base_lock) usages Stephen Hemminger
2009-11-10 17:54 ` [PATCH 01/10] netdev: add netdev_continue_rcu Stephen Hemminger
2009-11-10 18:19   ` Eric Dumazet
2009-11-11  6:47     ` David Miller
2009-11-10 19:39   ` Paul E. McKenney
2009-11-10 17:54 ` [PATCH 02/10] vlan: eliminate use of dev_base_lock Stephen Hemminger
2009-11-10 18:20   ` Eric Dumazet
2009-11-11  6:47     ` David Miller
2009-11-10 17:54 ` [PATCH 03/10] net: use rcu for network scheduler API Stephen Hemminger
2009-11-10 18:20   ` Eric Dumazet
2009-11-11  6:47     ` David Miller
2009-11-10 17:54 ` [PATCH 04/10] AOE: use rcu to find network device Stephen Hemminger
2009-11-10 18:23   ` Eric Dumazet
2009-11-10 20:01   ` Ed Cashin
2009-11-10 23:06     ` Stephen Hemminger
2009-11-10 23:53       ` Stephen Hemminger [this message]
2009-11-11  6:48         ` David Miller
2009-11-12 14:33         ` Ed Cashin
2009-11-12 17:10           ` Stephen Hemminger
2009-11-12 18:07             ` Ed Cashin
2009-11-12 19:09               ` Stephen Hemminger
2009-11-18 16:49         ` Ed Cashin
2009-11-11 14:22       ` Ed Cashin
2009-11-13 21:39         ` Ed Cashin
2009-11-13 22:24           ` Stephen Hemminger
2009-11-10 17:54 ` [PATCH 05/10] parisc: use RCU " Stephen Hemminger
2009-11-10 18:26   ` Eric Dumazet
2009-11-11  6:48   ` David Miller
2009-11-10 17:54 ` [PATCH 06/10] s390: use RCU to walk list of network devices Stephen Hemminger
2009-11-10 18:27   ` Eric Dumazet
2009-11-10 18:29     ` Stephen Hemminger
2009-11-10 18:40   ` Eric Dumazet
2009-11-11  6:49     ` David Miller
2009-11-10 17:54 ` [PATCH 07/10] decnet: use RCU to find " Stephen Hemminger
2009-11-10 18:43   ` Eric Dumazet
2009-11-10 18:50     ` Stephen Hemminger
2009-11-10 18:24       ` steve
2009-11-11 17:39         ` [PATCH 1/2] decnet: add RTNL lock when reading address list Stephen Hemminger
2009-11-11 17:40           ` [PATCH 2/2] decnet: convert dndev_lock to spinlock Stephen Hemminger
2009-11-12  3:56             ` David Miller
2009-11-12  3:56           ` [PATCH 1/2] decnet: add RTNL lock when reading address list David Miller
2009-11-10 19:25       ` [PATCH 07/10] decnet: use RCU to find network devices Eric Dumazet
2009-11-11  6:49   ` David Miller
2009-11-10 17:54 ` [PATCH 08/10] ipv6: use RCU to walk list of " Stephen Hemminger
2009-11-11  6:50   ` David Miller
2009-11-12  3:34   ` [PATCH net-next-2.6] " Eric Dumazet
2009-11-14  4:39     ` David Miller
2009-11-10 17:54 ` [PATCH 09/10] IPV4: use rcu to walk list of devices in IGMP Stephen Hemminger
2009-11-10 18:47   ` Eric Dumazet
2009-11-11  6:50   ` David Miller
2009-11-10 17:54 ` [PATCH 10/10] CAN: use dev_get_by_index_rcu Stephen Hemminger
2009-11-10 18:34   ` Eric Dumazet
2009-11-11  5:54     ` Oliver Hartkopp
2009-11-11  6:50       ` David Miller
2009-11-10 18:18 ` [PATCH 00/10] netdev: get rid of read_lock(&dev_base_lock) usages Eric Dumazet
2009-11-10 18:22   ` Stephen Hemminger
2009-11-10 18:24   ` Stephen Hemminger
2009-11-10 18:39     ` Eric Dumazet
2009-11-10 18:53       ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091110155316.2c3d7b6e@nehalam \
    --to=shemminger@vyatta.com \
    --cc=bzolnier@gmail.com \
    --cc=ecashin@coraid.com \
    --cc=harvey.harrison@gmail.com \
    --cc=karaluh@karaluh.pl \
    --cc=netdev@vger.kernel.org \
    --cc=roel.kluin@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).