* [RFC] fixes for hangs and error reporting in CDC_WDM
@ 2020-09-22 10:13 Oliver Neukum
2020-09-22 10:13 ` [RFC 1/6] CDC-WDM: fix hangs in flush() in multithreaded cases Oliver Neukum
` (6 more replies)
0 siblings, 7 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb
Stress testing has shown that CDC-WDM has some issues with hangs
and error reporting
1. wakeups are not correctly handled in multhreaded environments
2. unresponsive hardware is not handled
3. errors are not correctly reported. This needs flush() to be
implemented.
For easier review all squashed together:
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -58,6 +58,9 @@ MODULE_DEVICE_TABLE (usb, wdm_ids);
#define WDM_MAX 16
+/* flush() is uninterruptible, but we cannot wait forever */
+#define WDM_FLUSH_TIMEOUT (30 * HZ)
+
/* CDC-WMC r1.1 requires wMaxCommand to be "at least 256 decimal (0x100)" */
#define WDM_DEFAULT_BUFSIZE 256
@@ -151,7 +154,7 @@ static void wdm_out_callback(struct urb *urb)
kfree(desc->outbuf);
desc->outbuf = NULL;
clear_bit(WDM_IN_USE, &desc->flags);
- wake_up(&desc->wait);
+ wake_up_all(&desc->wait);
}
static void wdm_in_callback(struct urb *urb)
@@ -393,6 +396,9 @@ static ssize_t wdm_write
if (test_bit(WDM_RESETTING, &desc->flags))
r = -EIO;
+ if (test_bit(WDM_DISCONNECTING, &desc->flags))
+ r = -ENODEV;
+
if (r < 0) {
rv = r;
goto out_free_mem_pm;
@@ -424,6 +430,7 @@ static ssize_t wdm_write
if (rv < 0) {
desc->outbuf = NULL;
clear_bit(WDM_IN_USE, &desc->flags);
+ wake_up_all(&desc->wait); /* for flush() */
dev_err(&desc->intf->dev, "Tx URB error: %d\n", rv);
rv = usb_translate_errors(rv);
goto out_free_mem_pm;
@@ -583,11 +590,39 @@ static ssize_t wdm_read
return rv;
}
+/*
+ * The difference to flush is that we wait forever. If you don't like
+ * that behavior, you need to send a signal.
+ */
+
+static int wdm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct wdm_device *desc = file->private_data;
+ int rv;
+
+ rv = wait_event_interruptible(desc->wait,
+ !test_bit(WDM_IN_USE, &desc->flags) ||
+ test_bit(WDM_DISCONNECTING, &desc->flags));
+
+ if (test_bit(WDM_DISCONNECTING, &desc->flags))
+ return -ENODEV;
+ if (rv < 0)
+ return -EINTR;
+
+ spin_lock_irq(&desc->iuspin);
+ rv = desc->werr;
+ desc->werr = 0;
+ spin_unlock_irq(&desc->iuspin);
+
+ return usb_translate_errors(rv);
+}
+
static int wdm_flush(struct file *file, fl_owner_t id)
{
struct wdm_device *desc = file->private_data;
+ int rv;
- wait_event(desc->wait,
+ rv = wait_event_timeout(desc->wait,
/*
* needs both flags. We cannot do with one
* because resetting it would cause a race
@@ -595,16 +630,25 @@ static int wdm_flush(struct file *file, fl_owner_t id)
* a disconnect
*/
!test_bit(WDM_IN_USE, &desc->flags) ||
- test_bit(WDM_DISCONNECTING, &desc->flags));
+ test_bit(WDM_DISCONNECTING, &desc->flags),
+ WDM_FLUSH_TIMEOUT);
- /* cannot dereference desc->intf if WDM_DISCONNECTING */
+ /*
+ * to report the correct error.
+ * This is best effort
+ * We are inevitably racing with the hardware.
+ */
if (test_bit(WDM_DISCONNECTING, &desc->flags))
return -ENODEV;
- if (desc->werr < 0)
- dev_err(&desc->intf->dev, "Error in flush path: %d\n",
- desc->werr);
+ if (!rv)
+ return -EIO;
+
+ spin_lock_irq(&desc->iuspin);
+ rv = desc->werr;
+ desc->werr = 0;
+ spin_unlock_irq(&desc->iuspin);
- return usb_translate_errors(desc->werr);
+ return usb_translate_errors(rv);
}
static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
@@ -729,6 +773,7 @@ static const struct file_operations wdm_fops = {
.owner = THIS_MODULE,
.read = wdm_read,
.write = wdm_write,
+ .fsync = wdm_fsync,
.open = wdm_open,
.flush = wdm_flush,
^ permalink raw reply [flat|nested] 8+ messages in thread
* [RFC 1/6] CDC-WDM: fix hangs in flush() in multithreaded cases
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
@ 2020-09-22 10:13 ` Oliver Neukum
2020-09-22 10:13 ` [RFC 2/6] CDC-WDM: introduce a timeout in flush() Oliver Neukum
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb; +Cc: Oliver Neukum
In a multithreaded scenario an arbitrary number of threads
can be in wdm_write() and in wdm_flush(). Hence whenever
WDM_IN_USE is reset, all possible waiters need to be notified.
This is true whether this is due to IO completing or
to an error starting IO.
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-wdm.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index e3db6fbeadef..adb3fc307083 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -151,7 +151,7 @@ static void wdm_out_callback(struct urb *urb)
kfree(desc->outbuf);
desc->outbuf = NULL;
clear_bit(WDM_IN_USE, &desc->flags);
- wake_up(&desc->wait);
+ wake_up_all(&desc->wait);
}
static void wdm_in_callback(struct urb *urb)
@@ -424,6 +424,7 @@ static ssize_t wdm_write
if (rv < 0) {
desc->outbuf = NULL;
clear_bit(WDM_IN_USE, &desc->flags);
+ wake_up_all(&desc->wait); /* for flush() */
dev_err(&desc->intf->dev, "Tx URB error: %d\n", rv);
rv = usb_translate_errors(rv);
goto out_free_mem_pm;
@@ -586,6 +587,7 @@ static ssize_t wdm_read
static int wdm_flush(struct file *file, fl_owner_t id)
{
struct wdm_device *desc = file->private_data;
+ int rv;
wait_event(desc->wait,
/*
@@ -600,11 +602,12 @@ static int wdm_flush(struct file *file, fl_owner_t id)
/* cannot dereference desc->intf if WDM_DISCONNECTING */
if (test_bit(WDM_DISCONNECTING, &desc->flags))
return -ENODEV;
- if (desc->werr < 0)
+ rv = desc->werr;
+ if (rv < 0)
dev_err(&desc->intf->dev, "Error in flush path: %d\n",
- desc->werr);
+ rv);
- return usb_translate_errors(desc->werr);
+ return usb_translate_errors(rv);
}
static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 2/6] CDC-WDM: introduce a timeout in flush()
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
2020-09-22 10:13 ` [RFC 1/6] CDC-WDM: fix hangs in flush() in multithreaded cases Oliver Neukum
@ 2020-09-22 10:13 ` Oliver Neukum
2020-09-22 10:13 ` [RFC 3/6] CDC-WDM: remove use of intf->dev after potential disconnect Oliver Neukum
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb; +Cc: Oliver Neukum
Malicious or defective hardware may cease communication while
flush() is running. In last consequence we need a timeout,
as hardware that remains silent must be ignored.
Making the wait for IO interruptible would not solve the
issue. While it would avoid a hang, it would not allow any progress
and we would end up with an unclosable fd.
The 30 seconds are coming out of thin air.
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-wdm.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index adb3fc307083..b3a3f249a915 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -58,6 +58,9 @@ MODULE_DEVICE_TABLE (usb, wdm_ids);
#define WDM_MAX 16
+/* flush() is uninterruptible, but we cannot wait forever */
+#define WDM_FLUSH_TIMEOUT (30 * HZ)
+
/* CDC-WMC r1.1 requires wMaxCommand to be "at least 256 decimal (0x100)" */
#define WDM_DEFAULT_BUFSIZE 256
@@ -589,7 +592,7 @@ static int wdm_flush(struct file *file, fl_owner_t id)
struct wdm_device *desc = file->private_data;
int rv;
- wait_event(desc->wait,
+ rv = wait_event_timeout(desc->wait,
/*
* needs both flags. We cannot do with one
* because resetting it would cause a race
@@ -597,11 +600,15 @@ static int wdm_flush(struct file *file, fl_owner_t id)
* a disconnect
*/
!test_bit(WDM_IN_USE, &desc->flags) ||
- test_bit(WDM_DISCONNECTING, &desc->flags));
+ test_bit(WDM_DISCONNECTING, &desc->flags),
+ WDM_FLUSH_TIMEOUT);
/* cannot dereference desc->intf if WDM_DISCONNECTING */
if (test_bit(WDM_DISCONNECTING, &desc->flags))
return -ENODEV;
+ if (!rv)
+ return -EIO;
+
rv = desc->werr;
if (rv < 0)
dev_err(&desc->intf->dev, "Error in flush path: %d\n",
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 3/6] CDC-WDM: remove use of intf->dev after potential disconnect
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
2020-09-22 10:13 ` [RFC 1/6] CDC-WDM: fix hangs in flush() in multithreaded cases Oliver Neukum
2020-09-22 10:13 ` [RFC 2/6] CDC-WDM: introduce a timeout in flush() Oliver Neukum
@ 2020-09-22 10:13 ` Oliver Neukum
2020-09-22 10:13 ` [RFC 4/6] CDC-WDM: fix race reporting errors in flush Oliver Neukum
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb; +Cc: Oliver Neukum
After a disconnect intf->dev is not a valid pointer any longer.
As flush() uses it only for logging purposes logging is not
worth it. Remove the dev_err()
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-wdm.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index b3a3f249a915..1ca2c85a977d 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -603,16 +603,17 @@ static int wdm_flush(struct file *file, fl_owner_t id)
test_bit(WDM_DISCONNECTING, &desc->flags),
WDM_FLUSH_TIMEOUT);
- /* cannot dereference desc->intf if WDM_DISCONNECTING */
+ /*
+ * to report the correct error.
+ * This is best effort
+ * We are inevitably racing with the hardware.
+ */
if (test_bit(WDM_DISCONNECTING, &desc->flags))
return -ENODEV;
if (!rv)
return -EIO;
rv = desc->werr;
- if (rv < 0)
- dev_err(&desc->intf->dev, "Error in flush path: %d\n",
- rv);
return usb_translate_errors(rv);
}
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 4/6] CDC-WDM: fix race reporting errors in flush
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
` (2 preceding siblings ...)
2020-09-22 10:13 ` [RFC 3/6] CDC-WDM: remove use of intf->dev after potential disconnect Oliver Neukum
@ 2020-09-22 10:13 ` Oliver Neukum
2020-09-22 10:13 ` [RFC 5/6] CDC-WDM: correct error reporting in write() Oliver Neukum
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb; +Cc: Oliver Neukum
In case a race was lost and multiple fds used,
an error could be reported multiple times. To fix
this a spinlock must be taken.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-wdm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 1ca2c85a977d..5fb855404403 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -613,7 +613,10 @@ static int wdm_flush(struct file *file, fl_owner_t id)
if (!rv)
return -EIO;
+ spin_lock_irq(&desc->iuspin);
rv = desc->werr;
+ desc->werr = 0;
+ spin_unlock_irq(&desc->iuspin);
return usb_translate_errors(rv);
}
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 5/6] CDC-WDM: correct error reporting in write()
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
` (3 preceding siblings ...)
2020-09-22 10:13 ` [RFC 4/6] CDC-WDM: fix race reporting errors in flush Oliver Neukum
@ 2020-09-22 10:13 ` Oliver Neukum
2020-09-22 10:13 ` [RFC 6/6] CDC-WDM: implement fsync Oliver Neukum
2020-09-22 10:42 ` [RFC] fixes for hangs and error reporting in CDC_WDM Tetsuo Handa
6 siblings, 0 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb; +Cc: Oliver Neukum
In case our wait was interrupted by a disconnect, we should report
that.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-wdm.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 5fb855404403..242f83118208 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -396,6 +396,9 @@ static ssize_t wdm_write
if (test_bit(WDM_RESETTING, &desc->flags))
r = -EIO;
+ if (test_bit(WDM_DISCONNECTING, &desc->flags))
+ r = -ENODEV;
+
if (r < 0) {
rv = r;
goto out_free_mem_pm;
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [RFC 6/6] CDC-WDM: implement fsync
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
` (4 preceding siblings ...)
2020-09-22 10:13 ` [RFC 5/6] CDC-WDM: correct error reporting in write() Oliver Neukum
@ 2020-09-22 10:13 ` Oliver Neukum
2020-09-22 10:42 ` [RFC] fixes for hangs and error reporting in CDC_WDM Tetsuo Handa
6 siblings, 0 replies; 8+ messages in thread
From: Oliver Neukum @ 2020-09-22 10:13 UTC (permalink / raw)
To: penguin-kernel, bjorn, linux-usb; +Cc: Oliver Neukum
Some users want to be very sure that data has gone out to the
device. This needs fsync.
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-wdm.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 242f83118208..6ea03c12380c 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -590,6 +590,33 @@ static ssize_t wdm_read
return rv;
}
+/*
+ * The difference to flush is that we wait forever. If you don't like
+ * that behavior, you need to send a signal.
+ */
+
+static int wdm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct wdm_device *desc = file->private_data;
+ int rv;
+
+ rv = wait_event_interruptible(desc->wait,
+ !test_bit(WDM_IN_USE, &desc->flags) ||
+ test_bit(WDM_DISCONNECTING, &desc->flags));
+
+ if (test_bit(WDM_DISCONNECTING, &desc->flags))
+ return -ENODEV;
+ if (rv < 0)
+ return -EINTR;
+
+ spin_lock_irq(&desc->iuspin);
+ rv = desc->werr;
+ desc->werr = 0;
+ spin_unlock_irq(&desc->iuspin);
+
+ return usb_translate_errors(rv);
+}
+
static int wdm_flush(struct file *file, fl_owner_t id)
{
struct wdm_device *desc = file->private_data;
@@ -746,6 +773,7 @@ static const struct file_operations wdm_fops = {
.owner = THIS_MODULE,
.read = wdm_read,
.write = wdm_write,
+ .fsync = wdm_fsync,
.open = wdm_open,
.flush = wdm_flush,
.release = wdm_release,
--
2.16.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [RFC] fixes for hangs and error reporting in CDC_WDM
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
` (5 preceding siblings ...)
2020-09-22 10:13 ` [RFC 6/6] CDC-WDM: implement fsync Oliver Neukum
@ 2020-09-22 10:42 ` Tetsuo Handa
6 siblings, 0 replies; 8+ messages in thread
From: Tetsuo Handa @ 2020-09-22 10:42 UTC (permalink / raw)
To: Oliver Neukum, bjorn, linux-usb
On 2020/09/22 19:13, Oliver Neukum wrote:
> +/* flush() is uninterruptible, but we cannot wait forever */
> +#define WDM_FLUSH_TIMEOUT (30 * HZ)
> static int wdm_flush(struct file *file, fl_owner_t id)
> {
> struct wdm_device *desc = file->private_data;
> + int rv;
>
> - wait_event(desc->wait,
> + rv = wait_event_timeout(desc->wait,
> /*
> * needs both flags. We cannot do with one
> * because resetting it would cause a race
> @@ -595,16 +630,25 @@ static int wdm_flush(struct file *file, fl_owner_t id)
> * a disconnect
> */
> !test_bit(WDM_IN_USE, &desc->flags) ||
> - test_bit(WDM_DISCONNECTING, &desc->flags));
> + test_bit(WDM_DISCONNECTING, &desc->flags),
> + WDM_FLUSH_TIMEOUT);
>
Generally looks OK.
Any chance we can use wait_event_killable_timeout() or wait_event_killable() here?
syzkaller sends SIGKILL after 5 seconds from process creation. Blocking for 30
seconds in TASK_UNINTERRUPTIBLE state is not happy when killed by e.g. OOM killer.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-09-22 10:43 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-09-22 10:13 [RFC] fixes for hangs and error reporting in CDC_WDM Oliver Neukum
2020-09-22 10:13 ` [RFC 1/6] CDC-WDM: fix hangs in flush() in multithreaded cases Oliver Neukum
2020-09-22 10:13 ` [RFC 2/6] CDC-WDM: introduce a timeout in flush() Oliver Neukum
2020-09-22 10:13 ` [RFC 3/6] CDC-WDM: remove use of intf->dev after potential disconnect Oliver Neukum
2020-09-22 10:13 ` [RFC 4/6] CDC-WDM: fix race reporting errors in flush Oliver Neukum
2020-09-22 10:13 ` [RFC 5/6] CDC-WDM: correct error reporting in write() Oliver Neukum
2020-09-22 10:13 ` [RFC 6/6] CDC-WDM: implement fsync Oliver Neukum
2020-09-22 10:42 ` [RFC] fixes for hangs and error reporting in CDC_WDM Tetsuo Handa
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).