From: Stephen Hemminger <shemminger@vyatta.com>
To: karaluh@karaluh.pl, Ed Cashin <ecashin@coraid.com>
Cc: roel.kluin@gmail.com, harvey.harrison@gmail.com,
bzolnier@gmail.com, netdev@vger.kernel.org
Subject: Re: [PATCH 04/10] AOE: use rcu to find network device
Date: Tue, 10 Nov 2009 15:53:16 -0800 [thread overview]
Message-ID: <20091110155316.2c3d7b6e@nehalam> (raw)
In-Reply-To: <20091110150617.0e6920f0@nehalam>
On Tue, 10 Nov 2009 15:06:17 -0800
Stephen Hemminger <shemminger@vyatta.com> wrote:
> On Tue, 10 Nov 2009 15:01:49 -0500
> Ed Cashin <ecashin@coraid.com> wrote:
>
> > On Tue Nov 10 13:07:37 EST 2009, shemminger@vyatta.com wrote:
> > > This gets rid of another use of read_lock(&dev_base_lock) by using
> > > RCU. Also, it only increments the reference count of the device actually
> > > used rather than holding and releasing every device
> > >
> > > Compile tested only.
> >
> > This function runs once a minute when the aoe driver is loaded,
> > if you'd like to test it a bit more.
> >
> > It looks like there's no dev_put corresponding to the dev_hold
> > after the changes.
> >
>
> Hmm, looks like AOE actually is not ref counting the network device.
> So my patch is incorrect.
>
> As it stands (before my patch), it is UNSAFE. It can decide to queue
> packets to a device that is removed out from underneath it causing
> reference to freed memory.
>
> Moving the rcu_read_lock up to aoecmd_cfg() would solve that but the
> whole driver appears to be unsafe about device refcounting and handling
> device removal properly.
>
> It needs to:
>
> 1. Get a device ref count when it remembers a device: (ie addif)
> 2. Install a notifier that looks for device removal events
> 3. In notifier, remove interface, including flushing all pending
> skb's for that device.
>
> This obviously is beyond the scope of the RCU stuff.
Here is a patch to get you going, it does compile but it probably
won't work because the code doesn't handle the case of the last
device going away from a target. This is yet another pre-existing
bug, since if a timeout happens: ejectif() is called to remove a device,
resend() will BUG in ifrotate() if all devices are gone.
---
drivers/block/aoe/aoe.h | 2 ++
drivers/block/aoe/aoecmd.c | 19 +++++++++++++++++++
drivers/block/aoe/aoedev.c | 14 ++++++++++++++
drivers/block/aoe/aoemain.c | 24 ++++++++++++++++++++++++
4 files changed, 59 insertions(+)
--- a/drivers/block/aoe/aoecmd.c 2009-11-10 15:13:25.673859220 -0800
+++ b/drivers/block/aoe/aoecmd.c 2009-11-10 15:49:20.009047132 -0800
@@ -413,6 +413,8 @@ addif(struct aoetgt *t, struct net_devic
p = getif(t, NULL);
if (!p)
return NULL;
+
+ dev_hold(nd);
p->nd = nd;
p->maxbcnt = DEFAULTBCNT;
p->lost = 0;
@@ -424,12 +426,29 @@ static void
ejectif(struct aoetgt *t, struct aoeif *ifp)
{
struct aoeif *e;
+ struct net_device *nd;
ulong n;
e = t->ifs + NAOEIFS - 1;
+ nd = e->nd;
n = (e - ifp) * sizeof *ifp;
memmove(ifp, ifp+1, n);
e->nd = NULL;
+ dev_put(nd);
+}
+
+void aoecmd_flushnet(struct aoedev *d, struct net_device *nd)
+{
+ struct aoetgt **tt, **te;
+ tt = d->targets;
+ te = tt + NTARGETS;
+ for (; tt < te && *tt; tt++) {
+ struct aoetgt *t = *tt;
+ struct aoeif *ifp;
+
+ while ( (ifp = getif(t, nd)) )
+ ejectif(t, ifp);
+ }
}
static int
--- a/drivers/block/aoe/aoemain.c 2009-11-10 15:13:25.696859195 -0800
+++ b/drivers/block/aoe/aoemain.c 2009-11-10 15:48:43.352047188 -0800
@@ -8,6 +8,8 @@
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
#include "aoe.h"
MODULE_LICENSE("GPL");
@@ -54,11 +56,28 @@ discover_timer(ulong vp)
}
}
+/* Callback on change of state of network device. */
+static int aoe_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *nd = ptr;
+
+ if (is_aoe_netif(nd) && event == NETDEV_UNREGISTER)
+ aoedev_ejectnet(nd);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block aoe_notifier = {
+ .notifier_call = aoe_device_event,
+};
+
static void
aoe_exit(void)
{
discover_timer(TKILL);
+ unregister_netdevice_notifier(&aoe_notifier);
aoenet_exit();
unregister_blkdev(AOE_MAJOR, DEVICE_NAME);
aoechr_exit();
@@ -83,6 +102,9 @@ aoe_init(void)
ret = aoenet_init();
if (ret)
goto net_fail;
+ ret = register_netdevice_notifier(&aoe_notifier);
+ if (ret)
+ goto notifier_fail;
ret = register_blkdev(AOE_MAJOR, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ERR "aoe: can't register major\n");
@@ -94,6 +116,8 @@ aoe_init(void)
return 0;
blkreg_fail:
+ unregister_netdevice_notifier(&aoe_notifier);
+ notifier_fail:
aoenet_exit();
net_fail:
aoeblk_exit();
--- a/drivers/block/aoe/aoe.h 2009-11-10 15:36:07.775921768 -0800
+++ b/drivers/block/aoe/aoe.h 2009-11-10 15:43:14.972984754 -0800
@@ -186,6 +186,7 @@ void aoecmd_ata_rsp(struct sk_buff *);
void aoecmd_cfg_rsp(struct sk_buff *);
void aoecmd_sleepwork(struct work_struct *);
void aoecmd_cleanslate(struct aoedev *);
+void aoecmd_flushnet(struct aoedev *, struct net_device *);
struct sk_buff *aoecmd_ata_id(struct aoedev *);
int aoedev_init(void);
@@ -194,6 +195,7 @@ struct aoedev *aoedev_by_aoeaddr(int maj
struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
void aoedev_downdev(struct aoedev *d);
int aoedev_flush(const char __user *str, size_t size);
+void aoedev_ejectnet(struct net_device *);
int aoenet_init(void);
void aoenet_exit(void);
--- a/drivers/block/aoe/aoedev.c 2009-11-10 15:13:25.685859893 -0800
+++ b/drivers/block/aoe/aoedev.c 2009-11-10 15:46:19.430861404 -0800
@@ -162,6 +162,20 @@ aoedev_flush(const char __user *str, siz
return 0;
}
+void aoedev_ejectnet(struct net_device *nd)
+{
+ struct aoedev *d;
+ unsigned long flags;
+
+ spin_lock_irqsave(&devlist_lock, flags);
+ for (d = devlist; d; d = d->next) {
+ spin_lock(&d->lock);
+ aoecmd_flushnet(d, nd);
+ spin_unlock(&d->lock);
+ }
+ spin_unlock_irqrestore(&d->lock, flags);
+}
+
/* I'm not really sure that this is a realistic problem, but if the
network driver goes gonzo let's just leak memory after complaining. */
static void
next prev parent reply other threads:[~2009-11-10 23:53 UTC|newest]
Thread overview: 61+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-11-10 17:54 [PATCH 00/10] netdev: get rid of read_lock(&dev_base_lock) usages Stephen Hemminger
2009-11-10 17:54 ` [PATCH 01/10] netdev: add netdev_continue_rcu Stephen Hemminger
2009-11-10 18:19 ` Eric Dumazet
2009-11-11 6:47 ` David Miller
2009-11-10 19:39 ` Paul E. McKenney
2009-11-10 17:54 ` [PATCH 02/10] vlan: eliminate use of dev_base_lock Stephen Hemminger
2009-11-10 18:20 ` Eric Dumazet
2009-11-11 6:47 ` David Miller
2009-11-10 17:54 ` [PATCH 03/10] net: use rcu for network scheduler API Stephen Hemminger
2009-11-10 18:20 ` Eric Dumazet
2009-11-11 6:47 ` David Miller
2009-11-10 17:54 ` [PATCH 04/10] AOE: use rcu to find network device Stephen Hemminger
2009-11-10 18:23 ` Eric Dumazet
2009-11-10 20:01 ` Ed Cashin
2009-11-10 23:06 ` Stephen Hemminger
2009-11-10 23:53 ` Stephen Hemminger [this message]
2009-11-11 6:48 ` David Miller
2009-11-12 14:33 ` Ed Cashin
2009-11-12 17:10 ` Stephen Hemminger
2009-11-12 18:07 ` Ed Cashin
2009-11-12 19:09 ` Stephen Hemminger
2009-11-18 16:49 ` Ed Cashin
2009-11-11 14:22 ` Ed Cashin
2009-11-13 21:39 ` Ed Cashin
2009-11-13 22:24 ` Stephen Hemminger
2009-11-10 17:54 ` [PATCH 05/10] parisc: use RCU " Stephen Hemminger
2009-11-10 17:54 ` Stephen Hemminger
2009-11-10 18:26 ` Eric Dumazet
2009-11-10 18:26 ` Eric Dumazet
2009-11-11 6:48 ` David Miller
2009-11-10 17:54 ` [PATCH 06/10] s390: use RCU to walk list of network devices Stephen Hemminger
2009-11-10 18:27 ` Eric Dumazet
2009-11-10 18:29 ` Stephen Hemminger
2009-11-10 18:40 ` Eric Dumazet
2009-11-11 6:49 ` David Miller
2009-11-10 17:54 ` [PATCH 07/10] decnet: use RCU to find " Stephen Hemminger
2009-11-10 18:43 ` Eric Dumazet
2009-11-10 18:50 ` Stephen Hemminger
2009-11-10 18:24 ` steve
2009-11-11 17:39 ` [PATCH 1/2] decnet: add RTNL lock when reading address list Stephen Hemminger
2009-11-11 17:40 ` [PATCH 2/2] decnet: convert dndev_lock to spinlock Stephen Hemminger
2009-11-12 3:56 ` David Miller
2009-11-12 3:56 ` [PATCH 1/2] decnet: add RTNL lock when reading address list David Miller
2009-11-10 19:25 ` [PATCH 07/10] decnet: use RCU to find network devices Eric Dumazet
2009-11-11 6:49 ` David Miller
2009-11-10 17:54 ` [PATCH 08/10] ipv6: use RCU to walk list of " Stephen Hemminger
2009-11-11 6:50 ` David Miller
2009-11-12 3:34 ` [PATCH net-next-2.6] " Eric Dumazet
2009-11-14 4:39 ` David Miller
2009-11-10 17:54 ` [PATCH 09/10] IPV4: use rcu to walk list of devices in IGMP Stephen Hemminger
2009-11-10 18:47 ` Eric Dumazet
2009-11-11 6:50 ` David Miller
2009-11-10 17:54 ` [PATCH 10/10] CAN: use dev_get_by_index_rcu Stephen Hemminger
2009-11-10 18:34 ` Eric Dumazet
2009-11-11 5:54 ` Oliver Hartkopp
2009-11-11 6:50 ` David Miller
2009-11-10 18:18 ` [PATCH 00/10] netdev: get rid of read_lock(&dev_base_lock) usages Eric Dumazet
2009-11-10 18:22 ` Stephen Hemminger
2009-11-10 18:24 ` Stephen Hemminger
2009-11-10 18:39 ` Eric Dumazet
2009-11-10 18:53 ` Stephen Hemminger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20091110155316.2c3d7b6e@nehalam \
--to=shemminger@vyatta.com \
--cc=bzolnier@gmail.com \
--cc=ecashin@coraid.com \
--cc=harvey.harrison@gmail.com \
--cc=karaluh@karaluh.pl \
--cc=netdev@vger.kernel.org \
--cc=roel.kluin@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.