From: Oliver Neukum <oneukum@suse.com>
To: Jonas Karlsson <jonas.karlsson@actia.se>,
Peter Chen <peter.chen@nxp.com>
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>,
Greg KH <gregkh@linuxfoundation.org>,
"linux-usb@vger.kernel.org" <linux-usb@vger.kernel.org>,
Alan Stern <stern@rowland.harvard.edu>
Subject: Re: USB transaction errors causing RCU stalls and kernel panics
Date: Tue, 10 Mar 2020 12:04:30 +0100 [thread overview]
Message-ID: <1583838270.11582.11.camel@suse.com> (raw)
In-Reply-To: <d1f68ef3316e484b9cc1360f71886719@actia.se>
[-- Attachment #1: Type: text/plain, Size: 458 bytes --]
Am Dienstag, den 10.03.2020, 10:04 +0000 schrieb Jonas Karlsson:
> Yes, I have applied that commit. The logs I have attached so far have had that commit applied.
> It reduces the amount of Unknown event type 37 messages significantly.
I am a bit confused. If this still happens after you disabled
autosuspend, the initial diagnosis can't be right. It looks
like we are entering some kind of busy loop. Can you test
the attached patches?
Regards
Oliver
[-- Attachment #2: 0001-cdc-acm-close-race-betrween-suspend-and-acm_softint.patch --]
[-- Type: text/x-patch, Size: 1396 bytes --]
From e5f91fa32e5294b764ed93615c20834ee3ba690e Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 5 Mar 2020 11:16:02 +0100
Subject: [PATCH 1/2] cdc-acm: close race betrween suspend() and acm_softint
Suspend increments a counter, then kills the URBs,
then kills the scheduled work. The scheduled work, however,
may reschedule the URBs. Fix this by having the work
check the counter.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-acm.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 62f4fb9b362f..7d0167382c87 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -557,14 +557,14 @@ static void acm_softint(struct work_struct *work)
struct acm *acm = container_of(work, struct acm, work);
if (test_bit(EVENT_RX_STALL, &acm->flags)) {
- if (!(usb_autopm_get_interface(acm->data))) {
+ smp_mb(); /* against acm_suspend() */
+ if (!acm->susp_count) {
for (i = 0; i < acm->rx_buflimit; i++)
usb_kill_urb(acm->read_urbs[i]);
usb_clear_halt(acm->dev, acm->in);
acm_submit_read_urbs(acm, GFP_KERNEL);
- usb_autopm_put_interface(acm->data);
+ clear_bit(EVENT_RX_STALL, &acm->flags);
}
- clear_bit(EVENT_RX_STALL, &acm->flags);
}
if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
--
2.16.4
[-- Attachment #3: 0002-cdc-acm-introduce-a-cool-down.patch --]
[-- Type: text/x-patch, Size: 4603 bytes --]
From 46f8fe7dbc51d6476bd8589dcbf0fc8a76e94102 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 10 Mar 2020 11:55:21 +0100
Subject: [PATCH 2/2] cdc-acm: introduce a cool down
Immediate submission in case of a babbling device can lead
to a busy loop. Introducing a delayed work.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-acm.c | 29 +++++++++++++++++++++++++++--
drivers/usb/class/cdc-acm.h | 5 ++++-
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7d0167382c87..a1fbf6bf5cf4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -412,9 +412,12 @@ static void acm_ctrl_irq(struct urb *urb)
exit:
retval = usb_submit_urb(urb, GFP_ATOMIC);
- if (retval && retval != -EPERM)
+ if (retval && retval != -EPERM && retval != -ENODEV)
dev_err(&acm->control->dev,
"%s - usb_submit_urb failed: %d\n", __func__, retval);
+ else
+ dev_vdbg(&acm->control->dev,
+ "control resubmission terminated %d\n", retval);
}
static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
@@ -430,6 +433,8 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
dev_err(&acm->data->dev,
"urb %d failed submission with %d\n",
index, res);
+ } else {
+ dev_vdbg(&acm->data->dev, "intended failure %d\n", res);
}
set_bit(index, &acm->read_urbs_free);
return res;
@@ -471,6 +476,7 @@ static void acm_read_bulk_callback(struct urb *urb)
int status = urb->status;
bool stopped = false;
bool stalled = false;
+ bool cooldown = false;
dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
rb->index, urb->actual_length, status);
@@ -497,6 +503,13 @@ static void acm_read_bulk_callback(struct urb *urb)
__func__, status);
stopped = true;
break;
+ case -EOVERFLOW:
+ dev_dbg(&acm->data->dev,
+ "%s - cooling babbling device\n", __func__);
+ usb_mark_last_busy(acm->dev);
+ set_bit(rb->index, &acm->urbs_in_error_delay);
+ cooldown = true;
+ break;
default:
dev_dbg(&acm->data->dev,
"%s - nonzero urb status received: %d\n",
@@ -518,9 +531,11 @@ static void acm_read_bulk_callback(struct urb *urb)
*/
smp_mb__after_atomic();
- if (stopped || stalled) {
+ if (stopped || stalled || cooldown) {
if (stalled)
schedule_work(&acm->work);
+ else if (cooldown)
+ schedule_delayed_work(&acm->dwork, HZ / 2);
return;
}
@@ -567,6 +582,12 @@ static void acm_softint(struct work_struct *work)
}
}
+ if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) {
+ for (i = 0; i < ACM_NR; i++)
+ if (test_and_clear_bit(i, &acm->urbs_in_error_delay))
+ acm_submit_read_urb(acm, i, GFP_NOIO);
+ }
+
if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
tty_port_tty_wakeup(&acm->port);
}
@@ -1325,6 +1346,7 @@ static int acm_probe(struct usb_interface *intf,
acm->readsize = readsize;
acm->rx_buflimit = num_rx_buf;
INIT_WORK(&acm->work, acm_softint);
+ INIT_DELAYED_WORK(&acm->dwork, acm_softint);
init_waitqueue_head(&acm->wioctl);
spin_lock_init(&acm->write_lock);
spin_lock_init(&acm->read_lock);
@@ -1534,6 +1556,7 @@ static void acm_disconnect(struct usb_interface *intf)
acm_kill_urbs(acm);
cancel_work_sync(&acm->work);
+ cancel_delayed_work_sync(&acm->dwork);
tty_unregister_device(acm_tty_driver, acm->minor);
@@ -1576,6 +1599,8 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
acm_kill_urbs(acm);
cancel_work_sync(&acm->work);
+ cancel_delayed_work_sync(&acm->dwork);
+ acm->urbs_in_error_delay = 0;
return 0;
}
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index ca1c026382c2..cd5e9d8ab237 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -109,8 +109,11 @@ struct acm {
# define EVENT_TTY_WAKEUP 0
# define EVENT_RX_STALL 1
# define ACM_THROTTLED 2
+# define ACM_ERROR_DELAY 3
+ unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */
struct usb_cdc_line_coding line; /* bits, stop, parity */
- struct work_struct work; /* work queue entry for line discipline waking up */
+ struct work_struct work; /* work queue entry for various purposes*/
+ struct delayed_work dwork; /* for cool downs needed in error recovery */
unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */
unsigned int ctrlout; /* output control lines (DTR, RTS) */
struct async_icount iocount; /* counters for control line changes */
--
2.16.4
next prev parent reply other threads:[~2020-03-10 11:22 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-03 15:05 USB transaction errors causing RCU stalls and kernel panics Jonas Karlsson
2020-03-03 16:39 ` Greg KH
2020-03-03 20:08 ` Jonas Karlsson
2020-03-04 6:37 ` Greg KH
2020-03-04 10:29 ` Oliver Neukum
2020-03-04 12:11 ` Mathias Nyman
2020-03-04 14:12 ` Oliver Neukum
2020-03-04 16:21 ` Mathias Nyman
2020-03-06 1:31 ` Peter Chen
2020-03-09 14:21 ` Jonas Karlsson
2020-03-10 8:14 ` Peter Chen
2020-03-10 10:04 ` Jonas Karlsson
2020-03-10 11:04 ` Oliver Neukum [this message]
2020-03-10 11:21 ` Oliver Neukum
2020-03-10 12:26 ` Jonas Karlsson
2020-03-10 16:04 ` Jonas Karlsson
2020-03-10 16:11 ` Fabio Estevam
2020-03-11 6:25 ` Jonas Karlsson
2020-03-11 10:28 ` Oliver Neukum
2020-03-11 14:59 ` Jonas Karlsson
2020-03-12 13:45 ` Oliver Neukum
2020-03-12 15:37 ` Jonas Karlsson
2020-03-13 9:27 ` Oliver Neukum
2020-03-16 7:07 ` Jonas Karlsson
2020-03-23 11:37 ` Jonas Karlsson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1583838270.11582.11.camel@suse.com \
--to=oneukum@suse.com \
--cc=gregkh@linuxfoundation.org \
--cc=jonas.karlsson@actia.se \
--cc=linux-usb@vger.kernel.org \
--cc=mathias.nyman@linux.intel.com \
--cc=peter.chen@nxp.com \
--cc=stern@rowland.harvard.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).