* [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue
@ 2024-09-05 13:46 Oliver Neukum
2024-09-10 9:58 ` Paolo Abeni
2024-09-10 22:44 ` Jakub Kicinski
0 siblings, 2 replies; 6+ messages in thread
From: Oliver Neukum @ 2024-09-05 13:46 UTC (permalink / raw)
To: davem, edumazet, kuba, pabeni, netdev, linux-usb, linux-kernel
Cc: Oliver Neukum, stable
The work can submit URBs and the URBs can schedule the work.
This cycle needs to be broken, when a device is to be stopped.
Use a flag to do so.
This is a design issue as old as the driver.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
---
v2: fix PM reference issue
drivers/net/usb/usbnet.c | 37 ++++++++++++++++++++++++++++---------
include/linux/usb/usbnet.h | 17 +++++++++++++++++
2 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 18eb5ba436df..2506aa8c603e 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -464,10 +464,15 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
void usbnet_defer_kevent (struct usbnet *dev, int work)
{
set_bit (work, &dev->flags);
- if (!schedule_work (&dev->kevent))
- netdev_dbg(dev->net, "kevent %s may have been dropped\n", usbnet_event_names[work]);
- else
- netdev_dbg(dev->net, "kevent %s scheduled\n", usbnet_event_names[work]);
+ if (!usbnet_going_away(dev)) {
+ if (!schedule_work(&dev->kevent))
+ netdev_dbg(dev->net,
+ "kevent %s may have been dropped\n",
+ usbnet_event_names[work]);
+ else
+ netdev_dbg(dev->net,
+ "kevent %s scheduled\n", usbnet_event_names[work]);
+ }
}
EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
@@ -535,7 +540,8 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
tasklet_schedule (&dev->bh);
break;
case 0:
- __usbnet_queue_skb(&dev->rxq, skb, rx_start);
+ if (!usbnet_going_away(dev))
+ __usbnet_queue_skb(&dev->rxq, skb, rx_start);
}
} else {
netif_dbg(dev, ifdown, dev->net, "rx: stopped\n");
@@ -843,9 +849,18 @@ int usbnet_stop (struct net_device *net)
/* deferred work (timer, softirq, task) must also stop */
dev->flags = 0;
- del_timer_sync (&dev->delay);
- tasklet_kill (&dev->bh);
+ del_timer_sync(&dev->delay);
+ tasklet_kill(&dev->bh);
cancel_work_sync(&dev->kevent);
+
+ /* We have cyclic dependencies. Those calls are needed
+ * to break a cycle. We cannot fall into the gaps because
+ * we have a flag
+ */
+ tasklet_kill(&dev->bh);
+ del_timer_sync(&dev->delay);
+ cancel_work_sync(&dev->kevent);
+
if (!pm)
usb_autopm_put_interface(dev->intf);
@@ -1171,7 +1186,8 @@ usbnet_deferred_kevent (struct work_struct *work)
status);
} else {
clear_bit (EVENT_RX_HALT, &dev->flags);
- tasklet_schedule (&dev->bh);
+ if (!usbnet_going_away(dev))
+ tasklet_schedule(&dev->bh);
}
}
@@ -1196,7 +1212,8 @@ usbnet_deferred_kevent (struct work_struct *work)
usb_autopm_put_interface(dev->intf);
fail_lowmem:
if (resched)
- tasklet_schedule (&dev->bh);
+ if (!usbnet_going_away(dev))
+ tasklet_schedule(&dev->bh);
}
}
@@ -1559,6 +1576,7 @@ static void usbnet_bh (struct timer_list *t)
} else if (netif_running (dev->net) &&
netif_device_present (dev->net) &&
netif_carrier_ok(dev->net) &&
+ !usbnet_going_away(dev) &&
!timer_pending(&dev->delay) &&
!test_bit(EVENT_RX_PAUSED, &dev->flags) &&
!test_bit(EVENT_RX_HALT, &dev->flags)) {
@@ -1606,6 +1624,7 @@ void usbnet_disconnect (struct usb_interface *intf)
usb_set_intfdata(intf, NULL);
if (!dev)
return;
+ usbnet_mark_going_away(dev);
xdev = interface_to_usbdev (intf);
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9f08a584d707..d02d6f16da46 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -76,8 +76,25 @@ struct usbnet {
# define EVENT_LINK_CHANGE 11
# define EVENT_SET_RX_MODE 12
# define EVENT_NO_IP_ALIGN 13
+/* This one is special, as it indicates that the device is going away
+ * there are cyclic dependencies between tasklet, timer and bh
+ * that must be broken
+ */
+# define EVENT_UNPLUG 31
};
+static inline bool usbnet_going_away(struct usbnet *ubn)
+{
+ smp_mb__before_atomic(); /* against usbnet_mark_going_away() */
+ return test_bit(EVENT_UNPLUG, &ubn->flags);
+}
+
+static inline void usbnet_mark_going_away(struct usbnet *ubn)
+{
+ set_bit(EVENT_UNPLUG, &ubn->flags);
+ smp_mb__after_atomic(); /* against usbnet_going_away() */
+}
+
static inline struct usb_driver *driver_of(struct usb_interface *intf)
{
return to_usb_driver(intf->dev.driver);
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue
2024-09-05 13:46 [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue Oliver Neukum
@ 2024-09-10 9:58 ` Paolo Abeni
2024-09-12 9:30 ` Oliver Neukum
2024-09-10 22:44 ` Jakub Kicinski
1 sibling, 1 reply; 6+ messages in thread
From: Paolo Abeni @ 2024-09-10 9:58 UTC (permalink / raw)
To: Oliver Neukum, davem, edumazet, kuba, netdev, linux-usb,
linux-kernel
Cc: stable
On 9/5/24 15:46, Oliver Neukum wrote:
> The work can submit URBs and the URBs can schedule the work.
> This cycle needs to be broken, when a device is to be stopped.
> Use a flag to do so.
> This is a design issue as old as the driver.
>
> Signed-off-by: Oliver Neukum <oneukum@suse.com>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> CC: stable@vger.kernel.org
> ---
>
> v2: fix PM reference issue
>
> drivers/net/usb/usbnet.c | 37 ++++++++++++++++++++++++++++---------
> include/linux/usb/usbnet.h | 17 +++++++++++++++++
> 2 files changed, 45 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
> index 18eb5ba436df..2506aa8c603e 100644
> --- a/drivers/net/usb/usbnet.c
> +++ b/drivers/net/usb/usbnet.c
> @@ -464,10 +464,15 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
> void usbnet_defer_kevent (struct usbnet *dev, int work)
> {
> set_bit (work, &dev->flags);
> - if (!schedule_work (&dev->kevent))
> - netdev_dbg(dev->net, "kevent %s may have been dropped\n", usbnet_event_names[work]);
> - else
> - netdev_dbg(dev->net, "kevent %s scheduled\n", usbnet_event_names[work]);
> + if (!usbnet_going_away(dev)) {
> + if (!schedule_work(&dev->kevent))
> + netdev_dbg(dev->net,
> + "kevent %s may have been dropped\n",
> + usbnet_event_names[work]);
> + else
> + netdev_dbg(dev->net,
> + "kevent %s scheduled\n", usbnet_event_names[work]);
> + }
> }
> EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
>
> @@ -535,7 +540,8 @@ static int rx_submit (struct usbnet *dev, struct urb *urb, gfp_t flags)
> tasklet_schedule (&dev->bh);
> break;
> case 0:
> - __usbnet_queue_skb(&dev->rxq, skb, rx_start);
> + if (!usbnet_going_away(dev))
> + __usbnet_queue_skb(&dev->rxq, skb, rx_start);
> }
> } else {
> netif_dbg(dev, ifdown, dev->net, "rx: stopped\n");
> @@ -843,9 +849,18 @@ int usbnet_stop (struct net_device *net)
>
> /* deferred work (timer, softirq, task) must also stop */
> dev->flags = 0;
> - del_timer_sync (&dev->delay);
> - tasklet_kill (&dev->bh);
> + del_timer_sync(&dev->delay);
> + tasklet_kill(&dev->bh);
> cancel_work_sync(&dev->kevent);
> +
> + /* We have cyclic dependencies. Those calls are needed
> + * to break a cycle. We cannot fall into the gaps because
> + * we have a flag
> + */
> + tasklet_kill(&dev->bh);
> + del_timer_sync(&dev->delay);
> + cancel_work_sync(&dev->kevent);
I guess you do the shutdown twice because a running tasklet or timer
could re-schedule the others? If so, what prevent the rescheduling to
happen in the 2nd iteration? why can't you add usbnet_going_away()
checks on tasklet and timer reschedule point?
Thanks,
Paolo
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue
2024-09-10 9:58 ` Paolo Abeni
@ 2024-09-12 9:30 ` Oliver Neukum
0 siblings, 0 replies; 6+ messages in thread
From: Oliver Neukum @ 2024-09-12 9:30 UTC (permalink / raw)
To: Paolo Abeni, Oliver Neukum, davem, edumazet, kuba, netdev,
linux-usb, linux-kernel
Cc: stable
On 10.09.24 11:58, Paolo Abeni wrote:
> I guess you do the shutdown twice because a running tasklet or timer could re-schedule the others? If so, what prevent the rescheduling to happen in the 2nd iteration? why can't you add usbnet_going_away() checks on tasklet and timer reschedule point?
Hi,
I am not sure I fully understand the question. Technically
the flag prevents it in cooperation with del_timer_sync(),
which will wait for the timer handler to run to completion.
Hence if the timer handler has passed the the check the first
time, it will see the flag the second time.
I am not sure that answers the question, because AFAICT I have
added the checks, but there is an inevitable window between
the check and acting upon it.
Regards
Oliver
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue
2024-09-05 13:46 [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue Oliver Neukum
2024-09-10 9:58 ` Paolo Abeni
@ 2024-09-10 22:44 ` Jakub Kicinski
2024-09-12 9:37 ` Oliver Neukum
1 sibling, 1 reply; 6+ messages in thread
From: Jakub Kicinski @ 2024-09-10 22:44 UTC (permalink / raw)
To: Oliver Neukum
Cc: davem, edumazet, pabeni, netdev, linux-usb, linux-kernel, stable
On Thu, 5 Sep 2024 15:46:50 +0200 Oliver Neukum wrote:
> +static inline bool usbnet_going_away(struct usbnet *ubn)
> +{
> + smp_mb__before_atomic(); /* against usbnet_mark_going_away() */
> + return test_bit(EVENT_UNPLUG, &ubn->flags);
> +}
> +
> +static inline void usbnet_mark_going_away(struct usbnet *ubn)
> +{
> + set_bit(EVENT_UNPLUG, &ubn->flags);
> + smp_mb__after_atomic(); /* against usbnet_going_away() */
> +}
I have sort of an inverse question to what Paolo asked :)
AFAIU we need the double-cancel because checking the flag and
scheduling are not atomic. But if we do that why the memory
barriers? They make it seem like we're doing something clever
with memory ordering, while really we're just depending on normal
properties of the tasklet/timer/work APIs.
FTR disable_work_sync() would work nicely here but it'd be
a PITA for backports.
Also - is this based on some report or syzbot? I'm a bit tempted
to put this in net-next given how unlikely the race is vs how
commonly used the driver is.
--
pw-bot: cr
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue
2024-09-10 22:44 ` Jakub Kicinski
@ 2024-09-12 9:37 ` Oliver Neukum
2024-09-12 23:59 ` Jakub Kicinski
0 siblings, 1 reply; 6+ messages in thread
From: Oliver Neukum @ 2024-09-12 9:37 UTC (permalink / raw)
To: Jakub Kicinski, Oliver Neukum
Cc: davem, edumazet, pabeni, netdev, linux-usb, linux-kernel, stable
On 11.09.24 00:44, Jakub Kicinski wrote:
Hi,
> I have sort of an inverse question to what Paolo asked :)
This is getting interesting.
> AFAIU we need the double-cancel because checking the flag and
> scheduling are not atomic. But if we do that why the memory
Right.
> barriers? They make it seem like we're doing something clever
> with memory ordering, while really we're just depending on normal
> properties of the tasklet/timer/work APIs.
Good question. I added this because they are used in usbnet_defer_kevent()
which can be used in hard irq context. Are you saying I should check
whether this is actually needed?
> FTR disable_work_sync() would work nicely here but it'd be
> a PITA for backports.
So should I use it?
> Also - is this based on some report or syzbot? I'm a bit tempted
> to put this in net-next given how unlikely the race is vs how
> commonly used the driver is.
Having found the thing with the random MAC I decided to look
closely at the driver for overlooked stuff.
Regards
Oliver
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue
2024-09-12 9:37 ` Oliver Neukum
@ 2024-09-12 23:59 ` Jakub Kicinski
0 siblings, 0 replies; 6+ messages in thread
From: Jakub Kicinski @ 2024-09-12 23:59 UTC (permalink / raw)
To: Oliver Neukum
Cc: davem, edumazet, pabeni, netdev, linux-usb, linux-kernel, stable
On Thu, 12 Sep 2024 11:37:14 +0200 Oliver Neukum wrote:
> > barriers? They make it seem like we're doing something clever
> > with memory ordering, while really we're just depending on normal
> > properties of the tasklet/timer/work APIs.
>
> Good question. I added this because they are used in usbnet_defer_kevent()
> which can be used in hard irq context. Are you saying I should check
> whether this is actually needed?
I am slightly bolder, I'm saying that my reading of the code is
that it is in fact not needed :)
We build our "proof of correctness" on tasklet/timer/work APIs
which already provide all necessary barriers.
> > FTR disable_work_sync() would work nicely here but it'd be
> > a PITA for backports.
>
> So should I use it?
Up to you. It'd avoid work rescheduling but the backport would
be a pain, and off top of my head timer doesn't have a disable
so we'd still need the flag.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-09-12 23:59 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-05 13:46 [PATCHv2 net] usbnet: fix cyclical race on disconnect with work queue Oliver Neukum
2024-09-10 9:58 ` Paolo Abeni
2024-09-12 9:30 ` Oliver Neukum
2024-09-10 22:44 ` Jakub Kicinski
2024-09-12 9:37 ` Oliver Neukum
2024-09-12 23:59 ` Jakub Kicinski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).