* [RFC] add rtnl semaphore to linux-atm
@ 2003-10-01 11:34 chas williams
2003-10-01 12:42 ` David S. Miller
0 siblings, 1 reply; 16+ messages in thread
From: chas williams @ 2003-10-01 11:34 UTC (permalink / raw)
To: netdev
i am thinking about doing the following to fix the race
during ATM_ITF_ANY operation. rtnl is held across
registration/unregistration. this means that you can get
read-only access to the device list by holding rtnl
or a read_lock on atm_dev_lock similar to the scheme
used by netdevice (or so i think).
(the register_atmdevice/unregister just make it
easier to see where one might call netdevice instead
in the future)
===== drivers/atm/atmtcp.c 1.20 vs edited =====
--- 1.20/drivers/atm/atmtcp.c Tue Sep 23 19:22:15 2003
+++ edited/drivers/atm/atmtcp.c Mon Sep 29 21:34:36 2003
@@ -378,7 +378,7 @@
struct atm_dev *dev;
dev = NULL;
- if (itf != -1) dev = atm_dev_lookup(itf);
+ if (itf != -1) dev = atm_dev_get_by_index(itf);
if (dev) {
if (dev->ops != &atmtcp_v_dev_ops) {
atm_dev_put(dev);
@@ -415,7 +415,7 @@
struct atm_dev *dev;
struct atmtcp_dev_data *dev_data;
- dev = atm_dev_lookup(itf);
+ dev = atm_dev_get_by_index(itf);
if (!dev) return -ENODEV;
if (dev->ops != &atmtcp_v_dev_ops) {
atm_dev_put(dev);
===== include/linux/atmdev.h 1.32 vs edited =====
--- 1.32/include/linux/atmdev.h Tue Sep 23 18:19:10 2003
+++ edited/include/linux/atmdev.h Mon Sep 29 21:59:18 2003
@@ -388,7 +388,7 @@
struct atm_dev *atm_dev_register(const char *type,const struct atmdev_ops *ops,
int number,unsigned long *flags); /* number == -1: pick first available */
-struct atm_dev *atm_dev_lookup(int number);
+struct atm_dev *atm_dev_get_by_index(int ifindex);
void atm_dev_deregister(struct atm_dev *dev);
void shutdown_atm_dev(struct atm_dev *dev);
void vcc_insert_socket(struct sock *sk);
@@ -435,11 +435,12 @@
{
atomic_dec(&dev->refcnt);
- if ((atomic_read(&dev->refcnt) == 1) &&
+ if ((atomic_read(&dev->refcnt) == 0) &&
test_bit(ATM_DF_CLOSE,&dev->flags))
shutdown_atm_dev(dev);
}
+#define __atm_dev_put(dev) atomic_dec(&(dev)->refcnt)
int atm_charge(struct atm_vcc *vcc,int truesize);
struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc,int pdu_size,
===== net/atm/common.c 1.54 vs edited =====
--- 1.54/net/atm/common.c Tue Sep 23 13:38:28 2003
+++ edited/net/atm/common.c Mon Sep 29 22:24:27 2003
@@ -426,7 +426,7 @@
vcc->qos.rxtp.traffic_class == ATM_ANYCLASS)
return -EINVAL;
if (itf != ATM_ITF_ANY) {
- dev = atm_dev_lookup(itf);
+ dev = atm_dev_get_by_index(itf);
if (!dev)
return -ENODEV;
error = __vcc_connect(vcc, dev, vpi, vci);
@@ -435,21 +435,19 @@
return error;
}
} else {
- struct list_head *p, *next;
+ struct list_head *p;
dev = NULL;
- spin_lock(&atm_dev_lock);
- list_for_each_safe(p, next, &atm_devs) {
+ rtnl_lock();
+ list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
atm_dev_hold(dev);
- spin_unlock(&atm_dev_lock);
if (!__vcc_connect(vcc, dev, vpi, vci))
break;
- atm_dev_put(dev);
+ __atm_dev_put(dev);
dev = NULL;
- spin_lock(&atm_dev_lock);
}
- spin_unlock(&atm_dev_lock);
+ rtnl_unlock();
if (!dev)
return -ENODEV;
}
===== net/atm/resources.c 1.21 vs edited =====
--- 1.21/net/atm/resources.c Thu Sep 11 06:41:52 2003
+++ edited/net/atm/resources.c Tue Sep 30 07:10:43 2003
@@ -24,7 +24,7 @@
LIST_HEAD(atm_devs);
-spinlock_t atm_dev_lock = SPIN_LOCK_UNLOCKED;
+static rwlock_t atm_dev_lock = RW_LOCK_UNLOCKED;
static struct atm_dev *__alloc_atm_dev(const char *type)
{
@@ -47,7 +47,7 @@
kfree(dev);
}
-static struct atm_dev *__atm_dev_lookup(int number)
+static struct atm_dev *__atm_dev_get_by_index(int number)
{
struct atm_dev *dev;
struct list_head *p;
@@ -55,27 +55,65 @@
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
if ((dev->ops) && (dev->number == number)) {
- atm_dev_hold(dev);
return dev;
}
}
return NULL;
}
-struct atm_dev *atm_dev_lookup(int number)
+struct atm_dev *atm_dev_get_by_index(int number)
{
struct atm_dev *dev;
- spin_lock(&atm_dev_lock);
- dev = __atm_dev_lookup(number);
- spin_unlock(&atm_dev_lock);
+ read_lock(&atm_dev_lock);
+ dev = __atm_dev_get_by_index(number);
+ if (dev)
+ atm_dev_hold(dev);
+ read_unlock(&atm_dev_lock);
return dev;
}
+static int register_atmdevice(struct atm_dev *dev)
+{
+ write_lock_irq(&atm_dev_lock);
+ list_add_tail(&dev->dev_list, &atm_devs);
+ atm_dev_hold(dev);
+ write_unlock_irq(&atm_dev_lock);
+
+ if (atm_proc_dev_register(dev) < 0) {
+ printk(KERN_ERR "atm_dev_register: "
+ "atm_proc_dev_register failed for dev %s\n",
+ dev->type);
+ write_lock_irq(&atm_dev_lock);
+ list_del(&dev->dev_list);
+ write_unlock_irq(&atm_dev_lock);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int atm_dev_alloc_index(struct atm_dev *dev, int number)
+{
+ if (number != -1) {
+ if ((__atm_dev_get_by_index(number)))
+ return -EBUSY;
+ } else {
+ number = 0;
+ while ((__atm_dev_get_by_index(number))) {
+ number++;
+ }
+ }
+ dev->number = number;
+
+ return 0;
+}
+
struct atm_dev *atm_dev_register(const char *type, const struct atmdev_ops *ops,
int number, unsigned long *flags)
{
- struct atm_dev *dev, *inuse;
+ struct atm_dev *dev;
+ int err;
dev = __alloc_atm_dev(type);
if (!dev) {
@@ -83,60 +121,51 @@
type);
return NULL;
}
- spin_lock(&atm_dev_lock);
- if (number != -1) {
- if ((inuse = __atm_dev_lookup(number))) {
- atm_dev_put(inuse);
- spin_unlock(&atm_dev_lock);
- __free_atm_dev(dev);
- return NULL;
- }
- dev->number = number;
- } else {
- dev->number = 0;
- while ((inuse = __atm_dev_lookup(dev->number))) {
- atm_dev_put(inuse);
- dev->number++;
- }
- }
dev->ops = ops;
if (flags)
dev->flags = *flags;
- else
- memset(&dev->flags, 0, sizeof(dev->flags));
- memset(&dev->stats, 0, sizeof(dev->stats));
- atomic_set(&dev->refcnt, 1);
- list_add_tail(&dev->dev_list, &atm_devs);
- spin_unlock(&atm_dev_lock);
- if (atm_proc_dev_register(dev) < 0) {
- printk(KERN_ERR "atm_dev_register: "
- "atm_proc_dev_register failed for dev %s\n",
- type);
- spin_lock(&atm_dev_lock);
- list_del(&dev->dev_list);
- spin_unlock(&atm_dev_lock);
+ rtnl_lock();
+
+ err = atm_dev_alloc_index(dev, number);
+ if (err < 0)
+ goto out;
+
+ err = register_atmdevice(dev);
+
+out:
+ rtnl_unlock();
+
+ if (err < 0) {
__free_atm_dev(dev);
- return NULL;
+ dev = NULL;
}
-
+
return dev;
}
+static void unregister_atmdevice(struct atm_dev *dev)
+{
+ atm_proc_dev_deregister(dev);
+
+ write_lock_irq(&atm_dev_lock);
+ list_del(&dev->dev_list);
+ write_unlock_irq(&atm_dev_lock);
+
+ atm_dev_put(dev);
+}
void atm_dev_deregister(struct atm_dev *dev)
{
unsigned long warning_time;
- atm_proc_dev_deregister(dev);
-
- spin_lock(&atm_dev_lock);
- list_del(&dev->dev_list);
- spin_unlock(&atm_dev_lock);
+ rtnl_lock();
+ unregister_atmdevice(dev);
+ rtnl_unlock();
warning_time = jiffies;
- while (atomic_read(&dev->refcnt) != 1) {
+ while (atomic_read(&dev->refcnt) != 0) {
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ / 4);
current->state = TASK_RUNNING;
@@ -153,7 +182,7 @@
void shutdown_atm_dev(struct atm_dev *dev)
{
- if (atomic_read(&dev->refcnt) > 1) {
+ if (atomic_read(&dev->refcnt) > 0) {
set_bit(ATM_DF_CLOSE, &dev->flags);
return;
}
@@ -217,23 +246,23 @@
return -EFAULT;
if (get_user(len, &((struct atm_iobuf *) arg)->length))
return -EFAULT;
- spin_lock(&atm_dev_lock);
+ read_lock(&atm_dev_lock);
list_for_each(p, &atm_devs)
size += sizeof(int);
if (size > len) {
- spin_unlock(&atm_dev_lock);
+ read_unlock(&atm_dev_lock);
return -E2BIG;
}
tmp_buf = tmp_bufp = kmalloc(size, GFP_ATOMIC);
if (!tmp_buf) {
- spin_unlock(&atm_dev_lock);
+ read_unlock(&atm_dev_lock);
return -ENOMEM;
}
list_for_each(p, &atm_devs) {
dev = list_entry(p, struct atm_dev, dev_list);
*tmp_bufp++ = dev->number;
}
- spin_unlock(&atm_dev_lock);
+ read_unlock(&atm_dev_lock);
error = (copy_to_user(buf, tmp_buf, size) ||
put_user(size, &((struct atm_iobuf *) arg)->length))
? -EFAULT : 0;
@@ -248,7 +277,7 @@
if (get_user(number, &((struct atmif_sioc *) arg)->number))
return -EFAULT;
- if (!(dev = atm_dev_lookup(number)))
+ if (!(dev = atm_dev_get_by_index(number)))
return -ENODEV;
switch (cmd) {
@@ -411,13 +440,13 @@
void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos)
{
- spin_lock(&atm_dev_lock);
+ read_lock(&atm_dev_lock);
return *pos ? dev_get_idx(*pos) : (void *) 1;
}
void atm_dev_seq_stop(struct seq_file *seq, void *v)
{
- spin_unlock(&atm_dev_lock);
+ read_unlock(&atm_dev_lock);
}
void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -430,5 +459,5 @@
EXPORT_SYMBOL(atm_dev_register);
EXPORT_SYMBOL(atm_dev_deregister);
-EXPORT_SYMBOL(atm_dev_lookup);
+EXPORT_SYMBOL(atm_dev_get_by_index);
EXPORT_SYMBOL(shutdown_atm_dev);
===== net/atm/resources.h 1.13 vs edited =====
--- 1.13/net/atm/resources.h Mon Sep 8 13:27:12 2003
+++ edited/net/atm/resources.h Mon Sep 29 22:03:48 2003
@@ -11,7 +11,6 @@
extern struct list_head atm_devs;
-extern spinlock_t atm_dev_lock;
int atm_dev_ioctl(unsigned int cmd, unsigned long arg);
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-01 11:34 [RFC] add rtnl semaphore to linux-atm chas williams
@ 2003-10-01 12:42 ` David S. Miller
2003-10-01 13:07 ` chas williams
2003-10-03 2:26 ` Mitchell Blank Jr
0 siblings, 2 replies; 16+ messages in thread
From: David S. Miller @ 2003-10-01 12:42 UTC (permalink / raw)
To: chas3; +Cc: chas, netdev
On Wed, 01 Oct 2003 07:34:25 -0400
chas williams <chas@cmf.nrl.navy.mil> wrote:
> i am thinking about doing the following to fix the race
> during ATM_ITF_ANY operation. rtnl is held across
> registration/unregistration. this means that you can get
> read-only access to the device list by holding rtnl
> or a read_lock on atm_dev_lock similar to the scheme
> used by netdevice (or so i think).
This looks like it would work.
Although, unless VCC connect can potentially sleep, it might
be better to keep exporting the rwlock and take it as a reader
instead of grabbing the rtnl semaphore.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-01 12:42 ` David S. Miller
@ 2003-10-01 13:07 ` chas williams
2003-10-01 13:14 ` David S. Miller
2003-10-03 2:26 ` Mitchell Blank Jr
1 sibling, 1 reply; 16+ messages in thread
From: chas williams @ 2003-10-01 13:07 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev
In message <20031001054226.126cea7b.davem@redhat.com>,"David S. Miller" writes:
>Although, unless VCC connect can potentially sleep, it might
>be better to keep exporting the rwlock and take it as a reader
>instead of grabbing the rtnl semaphore.
i had initially written it that way but remembered at one point i
was going to use the rtnl semaphore to handle this problem. any
opinions on what is 'better'?
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-01 13:07 ` chas williams
@ 2003-10-01 13:14 ` David S. Miller
0 siblings, 0 replies; 16+ messages in thread
From: David S. Miller @ 2003-10-01 13:14 UTC (permalink / raw)
To: chas williams; +Cc: netdev
On Wed, 01 Oct 2003 09:07:45 -0400
chas williams <chas@cmf.nrl.navy.mil> wrote:
> i had initially written it that way but remembered at one point i
> was going to use the rtnl semaphore to handle this problem. any
> opinions on what is 'better'?
Blocking all network configuration operations (even ones not
for your subsystem) is a little bit anti-social in SMP cases.
If you take the rwlock as a reader, you only interfere with a
very minute class of network configuration code paths (those that
need to take the rwlock in question as a writer).
For example, if you use the rwlock-as-reader approach, someone doing
IPV4 routing table updates (ie. routing daemon changing a couple
thousand routes after a BGP flap) won't be perturbed while the ATM
operation is in progress.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-01 12:42 ` David S. Miller
2003-10-01 13:07 ` chas williams
@ 2003-10-03 2:26 ` Mitchell Blank Jr
2003-10-03 13:58 ` David S. Miller
1 sibling, 1 reply; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-03 2:26 UTC (permalink / raw)
To: David S. Miller; +Cc: chas3, chas, netdev
David S. Miller wrote:
> Although, unless VCC connect can potentially sleep, it might
> be better to keep exporting the rwlock and take it as a reader
> instead of grabbing the rtnl semaphore.
VCC connect can definately sleep - a good example is
drirvers/usb/misc/speedtch.c but there are probably other ones.
My personal recommendations:
* There should be a per-atm-device semaphore held across calls into the
driver's ->open, ->close, ->change_qos and maybe a couple other things
to serialize those operations (for the sake of keeping the drivers
sane - there's no reason there should be multiple operations pending)
* The ATM layer shouldn't need to keep any other sorts of locks across
calls into atm_dev->open or such. To avoid race conditions the ATM
code needs to:
1. Take whatever internal spinlock it needs
2. Add the itf/vpi/vci tuple to whatever datastructure we're holding
the vcc list in - but mark it as "inactive" so we don't find it
while others are searching the list. Obviously the open will
fail if we find a matching tuple (whether or not it was active)
3. Drop the spinlock
4. Take the per-device semaphore
5. Call atmdev->open
6. Drop the per-device semaphore
7. Re-take the internal spinlock
8. If open succeeded now mark the vcc as "active". If the open failed
just delete it
9. Drop the internal spinlock
To do a close the method is similar:
1. Take the internal spinlock
2. Find the tuple, mark it as inactive
3. Drop the spinlock
4. Take the per-device semaphore
5. Call atmdev->close
6. Drop the per-device semaphore
7. Re-take the internal spinlock
8. Delete vcc from the list
9. Drop the internal spinlock
Then ->open and ->close can sleep like they need to but won't block
other accesses to the list in the mean time.
* If ATM_ITF_ANY is a pain to fix it should just be removed - it's
basically a misfeature and is probably never used by anybody. I've
already posted a patch to linux-atm-general that basically does this
(it changes "ATM_ITF_ANY" to mean "first interface we find")
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-03 2:26 ` Mitchell Blank Jr
@ 2003-10-03 13:58 ` David S. Miller
2003-10-03 21:45 ` Mitchell Blank Jr
2003-10-04 6:59 ` Mitchell Blank Jr
0 siblings, 2 replies; 16+ messages in thread
From: David S. Miller @ 2003-10-03 13:58 UTC (permalink / raw)
To: Mitchell Blank Jr; +Cc: chas3, chas, netdev
On Thu, 2 Oct 2003 19:26:15 -0700
Mitchell Blank Jr <mitch@sfgoth.com> wrote:
> My personal recommendations:
> * There should be a per-atm-device semaphore held across calls into the
> driver's ->open, ->close, ->change_qos and maybe a couple other things
> to serialize those operations (for the sake of keeping the drivers
> sane - there's no reason there should be multiple operations pending)
Ok, but what Chas is trying to do is move the ATM device stuff over
to a model that makes use of the existing network device infrastructure
for solving these kinds of problems.
Part of that is using the rtnl semaphore etc.
I would rather Chas use the rtnl semaphore for synchronization
than to ultra-optimize this code by using the rwlock as I had suggested
to him.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-03 13:58 ` David S. Miller
@ 2003-10-03 21:45 ` Mitchell Blank Jr
2003-10-04 5:16 ` David S. Miller
2003-10-04 6:59 ` Mitchell Blank Jr
1 sibling, 1 reply; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-03 21:45 UTC (permalink / raw)
To: David S. Miller; +Cc: chas3, chas, netdev
David S. Miller wrote:
> > My personal recommendations:
> > * There should be a per-atm-device semaphore held across calls into the
> > driver's ->open, ->close, ->change_qos and maybe a couple other things
> > to serialize those operations (for the sake of keeping the drivers
> > sane - there's no reason there should be multiple operations pending)
>
> Ok, but what Chas is trying to do is move the ATM device stuff over
> to a model that makes use of the existing network device infrastructure
> for solving these kinds of problems.
>
> Part of that is using the rtnl semaphore etc.
>
> I would rather Chas use the rtnl semaphore for synchronization
> than to ultra-optimize this code by using the rwlock as I had suggested
> to him.
I have no problem with using rtnl_sem to syncronize the atmdev->{open,
close,change_qos}() paths instead of a per-device semaphore.
We *can't* use it to protect the state of the vcc list though because we
need to do lookups in both interrupt and bh context. Using a sleeping
lock alone is a non-starter.
That's why we need to do the three-step spinlock->semaphore->spinlock
dance in the vcc open and close paths.
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-03 21:45 ` Mitchell Blank Jr
@ 2003-10-04 5:16 ` David S. Miller
2003-10-04 5:55 ` Mitchell Blank Jr
0 siblings, 1 reply; 16+ messages in thread
From: David S. Miller @ 2003-10-04 5:16 UTC (permalink / raw)
To: Mitchell Blank Jr; +Cc: chas3, chas, netdev
On Fri, 3 Oct 2003 14:45:41 -0700
Mitchell Blank Jr <mitch@sfgoth.com> wrote:
> We *can't* use it to protect the state of the vcc list though because we
> need to do lookups in both interrupt and bh context. Using a sleeping
> lock alone is a non-starter.
>
> That's why we need to do the three-step spinlock->semaphore->spinlock
> dance in the vcc open and close paths.
The semaphore protects "modifications", it basically serializes them.
You still need a rwlock to protect the actual lists and this rwlock
is what the interrupt/bh context code takes to traverse the lists
and tables.
This is how the netdev list works, for example.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-04 5:16 ` David S. Miller
@ 2003-10-04 5:55 ` Mitchell Blank Jr
2003-10-04 6:56 ` Mitchell Blank Jr
0 siblings, 1 reply; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-04 5:55 UTC (permalink / raw)
To: David S. Miller; +Cc: chas3, chas, netdev
David S. Miller wrote:
> > We *can't* use it to protect the state of the vcc list though because we
> > need to do lookups in both interrupt and bh context. Using a sleeping
> > lock alone is a non-starter.
> >
> > That's why we need to do the three-step spinlock->semaphore->spinlock
> > dance in the vcc open and close paths.
>
> The semaphore protects "modifications", it basically serializes them.
>
> You still need a rwlock to protect the actual lists and this rwlock
> is what the interrupt/bh context code takes to traverse the lists
> and tables.
I think we're saying approximately the same thing at this point.
I'm just trying to make the point that the spinlock (or rwlock, whatever)
can't be held when we call atm_dev->{open,close}() since they can sleep.
Which is why we need to do the three-step process I described in the
earlier email.
Once you do that there's no reason (that I know of) to take rtnl_sem
except to serialize the actual calls into atm_dev's ops. There's no
problem if multiple opens both get their vpi/vci's allocated in
"inactive" state as long as the actual connects happen in series.
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-04 5:55 ` Mitchell Blank Jr
@ 2003-10-04 6:56 ` Mitchell Blank Jr
0 siblings, 0 replies; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-04 6:56 UTC (permalink / raw)
To: David S. Miller; +Cc: chas3, chas, netdev
Mitchell Blank Jr wrote:
> I'm just trying to make the point that the spinlock (or rwlock, whatever)
> can't be held when we call atm_dev->{open,close}() since they can sleep.
> Which is why we need to do the three-step process I described in the
> earlier email.
Actually I guess there's two ways of doing it... so instead of
Method 1:
1. Take rwlock for writing
2. Search for {dev,vpi,vci} tuple. If found, fail - otherwise add it
as "inactive"
3. Drop rwlock
4. Take semaphore
5. Call atm_dev->open()
6. Drop semaphore
7. Take rwlock for writing
8. If open succeeded clear "inactive" flag. If failed remove entry
9. Drop rwlock
Method 2:
1. Take semaphore
2. Take rwlock for reading
3. Search for tuple, if found fail
4. Drop rwlock
5. Call atm_dev->open
6. Take rwlock for writing
7. Add tuple to list
8. Drop rwlock
9. Drop semaphore
The second method has the advantage that you don't need the "inactive"
flag. On the other hand, you have to wait for the semaphore before
you can return EADDRINUSE.
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-03 13:58 ` David S. Miller
2003-10-03 21:45 ` Mitchell Blank Jr
@ 2003-10-04 6:59 ` Mitchell Blank Jr
2003-10-04 12:50 ` chas williams
1 sibling, 1 reply; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-04 6:59 UTC (permalink / raw)
To: David S. Miller; +Cc: chas3, chas, netdev
David S. Miller wrote:
> Part of that is using the rtnl semaphore etc.
I've thought about this some more and I think we really DO want a
per-atm-device semaphore for this rather than overloading rtnl_sem.
A device like an ADSL card might need to do a complete physical-layer
negotiation when you open a VCC (assuming there were no VCCs open
before) This could take on the order of a minute if it fails. You
don't want to be hogging rtnl_sem for that long.
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-04 6:59 ` Mitchell Blank Jr
@ 2003-10-04 12:50 ` chas williams
2003-10-04 19:42 ` Mitchell Blank Jr
0 siblings, 1 reply; 16+ messages in thread
From: chas williams @ 2003-10-04 12:50 UTC (permalink / raw)
To: Mitchell Blank Jr; +Cc: David S. Miller, netdev
just to summarize the last couple messages and my thoughts about
all of this:
atm_dev will become a netdevice in the future. there is no good reason
it shouldnt. the atm layer needs to start doing things the 'netdevice'
way.
ATM_ITF_ANY is rarely used. we just need it to work properly with
a minimum of effort. the rtnl semaphore blocks modifications to the
device list so routines that might sleep can safely iterate it. there are
(currently) no atm devices with slow open routines so please dont bother
with hypothetical devices. people who use ATM_ITF_ANY are going to pay
a penalty. that's just the way it is. its a bit of a bad idea really.
what if you want something like round-robin instead of in-order?
should vcc->open() and vcc->close() sleep? this is a problem for some
people. i can see why you might want to sleep in close() -- like waiting
for all the skb's to transmit/return, but this is only because vcc's dont
hold a per skb reference count and dont know when its safe to go away (and
this becuase the atm transmit side doesnt skb_clone() like it should).
serialization for open/change_qos/close should be handled in the
driver (assuming we are discussing serialization across different
vcc's and not a single vcc -- a single vcc case is already
handled via lock_sock). if your driver h/w doesnt need this, then
it shouldnt be thrust upon you. further, i believe this is wrong
thinking. what data are you protecting with this lock? (the he
driver has a 'global' lock, but its used to protect the register
space, which has a serialization effect).
vcc_sklist holds a list of sockets active or otherwise. the atm layer
already has flags (ATM_VF_READY) that indicate if a vcc is ready to recv.
it is unfortunate that the bh's of most drivers dont bother to check this
bit (assuming some other mechanism isnt used to prevent the bottom half
from running while opening/preparing a vcc). in the future (and this
isnt a hypothetical driver) vcc's will probably live slightly longer than
the close routine (those outstanding skbs) and cant be removed from the
vcc_sklist until every last reference is gone. yes, this means that close
could finish but you wouldnt be able to open the vpi/vci again right away.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-04 12:50 ` chas williams
@ 2003-10-04 19:42 ` Mitchell Blank Jr
2003-10-05 12:52 ` chas williams
0 siblings, 1 reply; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-04 19:42 UTC (permalink / raw)
To: chas3; +Cc: David S. Miller, netdev
chas williams wrote:
> atm_dev will become a netdevice in the future. there is no good reason
> it shouldnt.
Agreed. I'm not sure if it's a giant win (since atm devices are pretty
unique) but you seem pretty gung-ho about converting it so I'll trust
you on it.
> there are
> (currently) no atm devices with slow open routines so please dont bother
> with hypothetical devices.
Well that might be true in the tree. I think as more DSL adpaters are
supported this will change. The atm_dev->open() is really the only sane
place currently to kick-off DSL negotiation (so the link is brought up
when pppd or whatever is started)
> people who use ATM_ITF_ANY are going to pay
> a penalty. that's just the way it is. its a bit of a bad idea really.
> what if you want something like round-robin instead of in-order?
Exactly - its a userspace problem.
> i can see why you might want to sleep in close() -- like waiting
> for all the skb's to transmit/return, but this is only because vcc's dont
> hold a per skb reference count and dont know when its safe to go away
I can't speak for all the cards but at least lanai.c has to sleep in
->close() due to hardware issues. Each VCC has its own cell buffers
allocated which the card DMAs into. When you close a vcc you need to
wait for an internal RX queue to drain before you can safely free the
buffer or you might end up scribbling on random memory.
The only alternative I can think of would be to have the buffer free
kicked off later from a tasklet or someting but that could get pretty
gross.
Plus as I mentioned DSL adapters have good reason to sleep in open/close.
Since we require VCCs to be opened and closed from process context this
isn't a problem (as long as we do the locking correctly)
> serialization for open/change_qos/close should be handled in the
> driver (assuming we are discussing serialization across different
> vcc's and not a single vcc -- a single vcc case is already
> handled via lock_sock). if your driver h/w doesnt need this, then
> it shouldnt be thrust upon you.
Its a matter of opinion. My feelings are:
1. Serializing these operations through a semaphore shouldn't affect
performance one bit - even in conditions where open/close happen
relatively frequently (say, LANE) there would still be negligable
contention on the semaphore
2. Given the rather spotty history of ATM drivers and SMP I'd rather
keep the driver API as safe as possible. Basically the driver
should only have to worry about locking when dealing with the
rx/tx paths (and there are still lots of problems there I think)
Everything else can be safely serialized by the ATM layer
> vcc_sklist holds a list of sockets active or otherwise. the atm layer
> already has flags (ATM_VF_READY) that indicate if a vcc is ready to recv.
Unfortunately the flag is currently managed by the driver instead of the
ATM layer. If you're willing to strip out all of the ATM_VF_* use inside
of the drivers then yes we can use that flag.
> it is unfortunate that the bh's of most drivers dont bother to check this
> bit
This is just a matter of adding an efficient API for searching for vpi/vci's
(as we recently discussed on l-a-g) If that was done then we could get rid
of the duplicated data strucutres that exist in most of the drivers (and
get rid of a whole class of races in the rx paths)
Then the ATM layer can easily handle checking the ATM_VF_READY flag.
> in the future (and this
> isnt a hypothetical driver) vcc's will probably live slightly longer than
> the close routine (those outstanding skbs) and cant be removed from the
> vcc_sklist until every last reference is gone. yes, this means that close
> could finish but you wouldnt be able to open the vpi/vci again right away.
I don't think that's the case at all - you could safely remove the vcc from
the list at ->close time (but not free its memory until the last reference
disappears) Then an ->open would see the vpi/vci as free immediately (which
should be safe)
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-04 19:42 ` Mitchell Blank Jr
@ 2003-10-05 12:52 ` chas williams
2003-10-06 9:03 ` Mitchell Blank Jr
0 siblings, 1 reply; 16+ messages in thread
From: chas williams @ 2003-10-05 12:52 UTC (permalink / raw)
To: Mitchell Blank Jr; +Cc: David S. Miller, netdev
In message <20031004194242.GC94203@gaz.sfgoth.com>,Mitchell Blank Jr writes:
>Agreed. I'm not sure if it's a giant win (since atm devices are pretty
>unique) but you seem pretty gung-ho about converting it so I'll trust
>you on it.
well if you ignore the sysfs and notifier handling provided by netdevice
and rtnetlink then no you dont really gain anything. but i can see
a use for both of these things. like to watching the carrier status
of an atm card.
>Well that might be true in the tree. I think as more DSL adpaters are
>supported this will change. The atm_dev->open() is really the only sane
>place currently to kick-off DSL negotiation (so the link is brought up
>when pppd or whatever is started)
i am not sure the negotiation should be handled by the kernel.
we will cross this bridge when we get there. i dont see a
rush of people trying to add dsl drivers. in fact, the
community seems to be pretty stingy with documentation about
the dsl hardware.
>> people who use ATM_ITF_ANY are going to pay a penalty
>Exactly - its a userspace problem.
so the idea is to fix it minimally. people who use it should be
encouraged to change. i would either get rid of completely or
make it work as before. not change the way it works.
>I can't speak for all the cards but at least lanai.c has to sleep in
>->close() due to hardware issues. Each VCC has its own cell buffers
>allocated which the card DMAs into. When you close a vcc you need to
>wait for an internal RX queue to drain before you can safely free the
>buffer or you might end up scribbling on random memory.
right. the he has the same problem. however, you should be sleeping
in close. the vcc/sk should have a count of the outstanding skb's (i.e.
loaned to the card). the he sends an interrupt when its done with
a skb so you drop the refcount on the vcc/sk. when this reaches
0 the vcc/sk they go away and you dont have a problem. no sleeping
in the vcc->close() since the removal of all outstanding skb's would
be handled asychronously. currently this is impossible since the
skb's coming in to the atm card's are typically owned by other layer.
(which sort of explains vcc->pop()).
> 2. Given the rather spotty history of ATM drivers and SMP I'd rather
> keep the driver API as safe as possible. Basically the driver
> should only have to worry about locking when dealing with the
> rx/tx paths (and there are still lots of problems there I think)
> Everything else can be safely serialized by the ATM layer
i dont believe any of the smp problems have come from open/close/change_qos.
almost all smp problems can be traced to a lack of locking on the
appropriate data structures and the improper handling of skb's in the
tx/rx paths. (and YOU CANT SLEEP IN SEND!) smp actually seems to be
pretty stable now (however we do have a local 2.4 tree that is more
2.6 as far as atm). 2-way, 4-way, 128-way (this one has 4 adapters
with a preliminary version of load-balancing). these hosts are atm only.
>Unfortunately the flag is currently managed by the driver instead of the
>ATM layer. If you're willing to strip out all of the ATM_VF_* use inside
>of the drivers then yes we can use that flag.
i believe that was the plan.
>This is just a matter of adding an efficient API for searching for vpi/vci's
>(as we recently discussed on l-a-g) If that was done then we could get rid
>of the duplicated data strucutres that exist in most of the drivers (and
>get rid of a whole class of races in the rx paths)
well the linear seach is already fairly efficient for most people since
most people dont have more than a handful of vcc's open at a time.
my machine is pure atm and typically i only have about 12 vcc's open
at a time.
>I don't think that's the case at all - you could safely remove the vcc from
>the list at ->close time (but not free its memory until the last reference
>disappears) Then an ->open would see the vpi/vci as free immediately (which
>should be safe)
not sure about this. the fore200e seems to have a problem with
reopening on a vcc while there still might be outstanding skb's on a
vcc from a previos open/close.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-05 12:52 ` chas williams
@ 2003-10-06 9:03 ` Mitchell Blank Jr
2003-10-06 14:46 ` chas williams
0 siblings, 1 reply; 16+ messages in thread
From: Mitchell Blank Jr @ 2003-10-06 9:03 UTC (permalink / raw)
To: chas williams; +Cc: David S. Miller, netdev
chas williams wrote:
> i am not sure the negotiation should be handled by the kernel.
> we will cross this bridge when we get there. i dont see a
> rush of people trying to add dsl drivers. in fact, the
> community seems to be pretty stingy with documentation about
> the dsl hardware.
Yeah, I've been trying to get that resolved but so far noone is
anwering my emails. There are two DSL drivers in some state of
completion that could be merged if the companies involved to
just release them.
> >> people who use ATM_ITF_ANY are going to pay a penalty
> >Exactly - its a userspace problem.
>
> so the idea is to fix it minimally. people who use it should be
> encouraged to change. i would either get rid of completely or
> make it work as before. not change the way it works.
I don't think there are any current users that would be broken by
a "just pick the first interface" policy. I just don't want to
entirely remove it in case someone is using '*.0.50' in their clip
configs or something.
There are no users inside the atm tools that need ATM_ITF_ANY. Anyone
using it directly in a PVC address is already broken and non-determanistic
if they have more than one interface, so I'm not worried if the semantics
in that case are only 90% identical instead of 100%
> i dont believe any of the smp problems have come from open/close/change_qos.
I doubt many crashes happen there (in practice mostly those operations
will be serialized by userland - it's rare to have multiple threads
opening/closing vccs at once) but that doesn't mean that races aren't
hiding. Again, there's zero performance implication to serializing
them so we might as well make the driver API a little more foolproof.
> well the linear seach is already fairly efficient for most people since
> most people dont have more than a handful of vcc's open at a time.
Yes, but the worst-case is unacceptable. There are at least some people
who have used the code to terminate VCCs coming from DSL customers (a
project like that is actually what got me started with hacking on
linux-atm 5 years ago :-). They could have hundreds of VCCs active at
once.
> >I don't think that's the case at all - you could safely remove the vcc from
> >the list at ->close time (but not free its memory until the last reference
> >disappears) Then an ->open would see the vpi/vci as free immediately (which
> >should be safe)
>
> not sure about this. the fore200e seems to have a problem with
> reopening on a vcc while there still might be outstanding skb's on a
> vcc from a previos open/close.
Hmmmm... ok. I really believe that we need to sleep in close until the
VCC is truly free. If userland can't do a close followed immediately by
an open on the same vci then something is busted.
-Mitch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC] add rtnl semaphore to linux-atm
2003-10-06 9:03 ` Mitchell Blank Jr
@ 2003-10-06 14:46 ` chas williams
0 siblings, 0 replies; 16+ messages in thread
From: chas williams @ 2003-10-06 14:46 UTC (permalink / raw)
To: Mitchell Blank Jr; +Cc: David S. Miller, netdev
In message <20031006090304.GD4651@gaz.sfgoth.com>,Mitchell Blank Jr writes:
>There are no users inside the atm tools that need ATM_ITF_ANY. Anyone
>using it directly in a PVC address is already broken and non-determanistic
>if they have more than one interface, so I'm not worried if the semantics
>in that case are only 90% identical instead of 100%
i think the vswitch stuff used it. but it can be made to work w/o
too much trouble.
>I doubt many crashes happen there (in practice mostly those operations
>will be serialized by userland - it's rare to have multiple threads
>opening/closing vccs at once) but that doesn't mean that races aren't
>hiding. Again, there's zero performance implication to serializing
>them so we might as well make the driver API a little more foolproof.
so if it isnt a problem why fix it? further, if, as you say, open and
close could sleep and possibly for long periods of time then holding
this semaphore will block other open/close during this time. that's
going to be very annoying. (i have already seen this problem with the
old-style SOCKOPS lock_kernel() nonsense. on vcc's that are slow to
close the machine basically comes to a complete stop until that vcc
is closed). so either you make open/close non sleeping and add a
semaphore and just dont add a semaphore.
>Yes, but the worst-case is unacceptable. There are at least some people
>who have used the code to terminate VCCs coming from DSL customers (a
>project like that is actually what got me started with hacking on
>linux-atm 5 years ago :-). They could have hundreds of VCCs active at
>once.
i agree. our atm router typically has something like 100 open at a
time. so a linear search here is bad, but the he driver does a little
work to make this somewhat more efficient (like caching the mapping
since this cant change while under the lock).
>Hmmmm... ok. I really believe that we need to sleep in close until the
>VCC is truly free. If userland can't do a close followed immediately by
>an open on the same vci then something is busted.
see above why sleeping is bad. tcp sockets can also get 'stuck' in a
close where you cant reopen immediately, TIME_WAIT i believe.
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2003-10-06 14:46 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-10-01 11:34 [RFC] add rtnl semaphore to linux-atm chas williams
2003-10-01 12:42 ` David S. Miller
2003-10-01 13:07 ` chas williams
2003-10-01 13:14 ` David S. Miller
2003-10-03 2:26 ` Mitchell Blank Jr
2003-10-03 13:58 ` David S. Miller
2003-10-03 21:45 ` Mitchell Blank Jr
2003-10-04 5:16 ` David S. Miller
2003-10-04 5:55 ` Mitchell Blank Jr
2003-10-04 6:56 ` Mitchell Blank Jr
2003-10-04 6:59 ` Mitchell Blank Jr
2003-10-04 12:50 ` chas williams
2003-10-04 19:42 ` Mitchell Blank Jr
2003-10-05 12:52 ` chas williams
2003-10-06 9:03 ` Mitchell Blank Jr
2003-10-06 14:46 ` chas williams
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).