From: Oliver Neukum <oneukum@suse.de>
To: Jonas Karlsson <jonas.karlsson@actia.se>,
Peter Chen <peter.chen@nxp.com>
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>,
Greg KH <gregkh@linuxfoundation.org>,
"linux-usb@vger.kernel.org" <linux-usb@vger.kernel.org>,
Alan Stern <stern@rowland.harvard.edu>
Subject: Re: USB transaction errors causing RCU stalls and kernel panics
Date: Tue, 10 Mar 2020 12:21:46 +0100 [thread overview]
Message-ID: <1583839306.11582.12.camel@suse.de> (raw)
In-Reply-To: <1583838270.11582.11.camel@suse.com>
[-- Attachment #1: Type: text/plain, Size: 586 bytes --]
Am Dienstag, den 10.03.2020, 12:04 +0100 schrieb Oliver Neukum:
> Am Dienstag, den 10.03.2020, 10:04 +0000 schrieb Jonas Karlsson:
>
> > Yes, I have applied that commit. The logs I have attached so far have had that commit applied.
> > It reduces the amount of Unknown event type 37 messages significantly.
>
> I am a bit confused. If this still happens after you disabled
> autosuspend, the initial diagnosis can't be right. It looks
> like we are entering some kind of busy loop. Can you test
> the attached patches?
Correction: please test these three patches.
Regards
Oliver
[-- Attachment #2: 0001-cdc-acm-close-race-betrween-suspend-and-acm_softint.patch --]
[-- Type: text/x-patch, Size: 1396 bytes --]
From e5f91fa32e5294b764ed93615c20834ee3ba690e Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 5 Mar 2020 11:16:02 +0100
Subject: [PATCH 1/3] cdc-acm: close race betrween suspend() and acm_softint
Suspend increments a counter, then kills the URBs,
then kills the scheduled work. The scheduled work, however,
may reschedule the URBs. Fix this by having the work
check the counter.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-acm.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 62f4fb9b362f..7d0167382c87 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -557,14 +557,14 @@ static void acm_softint(struct work_struct *work)
struct acm *acm = container_of(work, struct acm, work);
if (test_bit(EVENT_RX_STALL, &acm->flags)) {
- if (!(usb_autopm_get_interface(acm->data))) {
+ smp_mb(); /* against acm_suspend() */
+ if (!acm->susp_count) {
for (i = 0; i < acm->rx_buflimit; i++)
usb_kill_urb(acm->read_urbs[i]);
usb_clear_halt(acm->dev, acm->in);
acm_submit_read_urbs(acm, GFP_KERNEL);
- usb_autopm_put_interface(acm->data);
+ clear_bit(EVENT_RX_STALL, &acm->flags);
}
- clear_bit(EVENT_RX_STALL, &acm->flags);
}
if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
--
2.16.4
[-- Attachment #3: 0002-cdc-acm-introduce-a-cool-down.patch --]
[-- Type: text/x-patch, Size: 4603 bytes --]
From 46f8fe7dbc51d6476bd8589dcbf0fc8a76e94102 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 10 Mar 2020 11:55:21 +0100
Subject: [PATCH 2/3] cdc-acm: introduce a cool down
Immediate submission in case of a babbling device can lead
to a busy loop. Introducing a delayed work.
Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
drivers/usb/class/cdc-acm.c | 29 +++++++++++++++++++++++++++--
drivers/usb/class/cdc-acm.h | 5 ++++-
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7d0167382c87..a1fbf6bf5cf4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -412,9 +412,12 @@ static void acm_ctrl_irq(struct urb *urb)
exit:
retval = usb_submit_urb(urb, GFP_ATOMIC);
- if (retval && retval != -EPERM)
+ if (retval && retval != -EPERM && retval != -ENODEV)
dev_err(&acm->control->dev,
"%s - usb_submit_urb failed: %d\n", __func__, retval);
+ else
+ dev_vdbg(&acm->control->dev,
+ "control resubmission terminated %d\n", retval);
}
static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
@@ -430,6 +433,8 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
dev_err(&acm->data->dev,
"urb %d failed submission with %d\n",
index, res);
+ } else {
+ dev_vdbg(&acm->data->dev, "intended failure %d\n", res);
}
set_bit(index, &acm->read_urbs_free);
return res;
@@ -471,6 +476,7 @@ static void acm_read_bulk_callback(struct urb *urb)
int status = urb->status;
bool stopped = false;
bool stalled = false;
+ bool cooldown = false;
dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
rb->index, urb->actual_length, status);
@@ -497,6 +503,13 @@ static void acm_read_bulk_callback(struct urb *urb)
__func__, status);
stopped = true;
break;
+ case -EOVERFLOW:
+ dev_dbg(&acm->data->dev,
+ "%s - cooling babbling device\n", __func__);
+ usb_mark_last_busy(acm->dev);
+ set_bit(rb->index, &acm->urbs_in_error_delay);
+ cooldown = true;
+ break;
default:
dev_dbg(&acm->data->dev,
"%s - nonzero urb status received: %d\n",
@@ -518,9 +531,11 @@ static void acm_read_bulk_callback(struct urb *urb)
*/
smp_mb__after_atomic();
- if (stopped || stalled) {
+ if (stopped || stalled || cooldown) {
if (stalled)
schedule_work(&acm->work);
+ else if (cooldown)
+ schedule_delayed_work(&acm->dwork, HZ / 2);
return;
}
@@ -567,6 +582,12 @@ static void acm_softint(struct work_struct *work)
}
}
+ if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) {
+ for (i = 0; i < ACM_NR; i++)
+ if (test_and_clear_bit(i, &acm->urbs_in_error_delay))
+ acm_submit_read_urb(acm, i, GFP_NOIO);
+ }
+
if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
tty_port_tty_wakeup(&acm->port);
}
@@ -1325,6 +1346,7 @@ static int acm_probe(struct usb_interface *intf,
acm->readsize = readsize;
acm->rx_buflimit = num_rx_buf;
INIT_WORK(&acm->work, acm_softint);
+ INIT_DELAYED_WORK(&acm->dwork, acm_softint);
init_waitqueue_head(&acm->wioctl);
spin_lock_init(&acm->write_lock);
spin_lock_init(&acm->read_lock);
@@ -1534,6 +1556,7 @@ static void acm_disconnect(struct usb_interface *intf)
acm_kill_urbs(acm);
cancel_work_sync(&acm->work);
+ cancel_delayed_work_sync(&acm->dwork);
tty_unregister_device(acm_tty_driver, acm->minor);
@@ -1576,6 +1599,8 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
acm_kill_urbs(acm);
cancel_work_sync(&acm->work);
+ cancel_delayed_work_sync(&acm->dwork);
+ acm->urbs_in_error_delay = 0;
return 0;
}
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index ca1c026382c2..cd5e9d8ab237 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -109,8 +109,11 @@ struct acm {
# define EVENT_TTY_WAKEUP 0
# define EVENT_RX_STALL 1
# define ACM_THROTTLED 2
+# define ACM_ERROR_DELAY 3
+ unsigned long urbs_in_error_delay; /* these need to be restarted after a delay */
struct usb_cdc_line_coding line; /* bits, stop, parity */
- struct work_struct work; /* work queue entry for line discipline waking up */
+ struct work_struct work; /* work queue entry for various purposes*/
+ struct delayed_work dwork; /* for cool downs needed in error recovery */
unsigned int ctrlin; /* input control lines (DCD, DSR, RI, break, overruns) */
unsigned int ctrlout; /* output control lines (DTR, RTS) */
struct async_icount iocount; /* counters for control line changes */
--
2.16.4
[-- Attachment #4: 0003-cdc-acm-also-cool-doen-for-EPROTO.patch --]
[-- Type: text/x-patch, Size: 714 bytes --]
From c5a8ca0a79adfea9dda5ee51f23cf09891687a24 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 10 Mar 2020 12:19:53 +0100
Subject: [PATCH 3/3] cdc-acm: also cool doen for EPROTO
---
drivers/usb/class/cdc-acm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index a1fbf6bf5cf4..82484cc6c36f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -504,6 +504,7 @@ static void acm_read_bulk_callback(struct urb *urb)
stopped = true;
break;
case -EOVERFLOW:
+ case -EPROTO:
dev_dbg(&acm->data->dev,
"%s - cooling babbling device\n", __func__);
usb_mark_last_busy(acm->dev);
--
2.16.4
next prev parent reply other threads:[~2020-03-10 11:40 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-03 15:05 USB transaction errors causing RCU stalls and kernel panics Jonas Karlsson
2020-03-03 16:39 ` Greg KH
2020-03-03 20:08 ` Jonas Karlsson
2020-03-04 6:37 ` Greg KH
2020-03-04 10:29 ` Oliver Neukum
2020-03-04 12:11 ` Mathias Nyman
2020-03-04 14:12 ` Oliver Neukum
2020-03-04 16:21 ` Mathias Nyman
2020-03-06 1:31 ` Peter Chen
2020-03-09 14:21 ` Jonas Karlsson
2020-03-10 8:14 ` Peter Chen
2020-03-10 10:04 ` Jonas Karlsson
2020-03-10 11:04 ` Oliver Neukum
2020-03-10 11:21 ` Oliver Neukum [this message]
2020-03-10 12:26 ` Jonas Karlsson
2020-03-10 16:04 ` Jonas Karlsson
2020-03-10 16:11 ` Fabio Estevam
2020-03-11 6:25 ` Jonas Karlsson
2020-03-11 10:28 ` Oliver Neukum
2020-03-11 14:59 ` Jonas Karlsson
2020-03-12 13:45 ` Oliver Neukum
2020-03-12 15:37 ` Jonas Karlsson
2020-03-13 9:27 ` Oliver Neukum
2020-03-16 7:07 ` Jonas Karlsson
2020-03-23 11:37 ` Jonas Karlsson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1583839306.11582.12.camel@suse.de \
--to=oneukum@suse.de \
--cc=gregkh@linuxfoundation.org \
--cc=jonas.karlsson@actia.se \
--cc=linux-usb@vger.kernel.org \
--cc=mathias.nyman@linux.intel.com \
--cc=peter.chen@nxp.com \
--cc=stern@rowland.harvard.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).