linux-usb.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Oliver Neukum <oneukum@suse.de>
To: Jonas Karlsson <jonas.karlsson@actia.se>,
	Peter Chen <peter.chen@nxp.com>
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>,
	Greg KH <gregkh@linuxfoundation.org>,
	"linux-usb@vger.kernel.org" <linux-usb@vger.kernel.org>,
	Alan Stern <stern@rowland.harvard.edu>
Subject: Re: USB transaction errors causing RCU stalls and kernel panics
Date: Tue, 10 Mar 2020 12:21:46 +0100	[thread overview]
Message-ID: <1583839306.11582.12.camel@suse.de> (raw)
In-Reply-To: <1583838270.11582.11.camel@suse.com>

[-- Attachment #1: Type: text/plain, Size: 586 bytes --]

Am Dienstag, den 10.03.2020, 12:04 +0100 schrieb Oliver Neukum:
> Am Dienstag, den 10.03.2020, 10:04 +0000 schrieb Jonas Karlsson:
> 
> > Yes, I have applied that commit. The logs I have attached so far have had that commit applied.
> > It reduces the amount of Unknown event type 37 messages significantly.
> 
> I am a bit confused. If this still happens after you disabled
> autosuspend, the initial diagnosis can't be right. It looks
> like we are entering some kind of busy loop. Can you test
> the attached patches?

Correction: please test these three patches.

	Regards
		Oliver

[-- Attachment #2: 0001-cdc-acm-close-race-betrween-suspend-and-acm_softint.patch --]
[-- Type: text/x-patch, Size: 1396 bytes --]

From e5f91fa32e5294b764ed93615c20834ee3ba690e Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 5 Mar 2020 11:16:02 +0100
Subject: [PATCH 1/3] cdc-acm: close race betrween suspend() and acm_softint

Suspend increments a counter, then kills the URBs,
then kills the scheduled work. The scheduled work, however,
may reschedule the URBs. Fix this by having the work
check the counter.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
 drivers/usb/class/cdc-acm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 62f4fb9b362f..7d0167382c87 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -557,14 +557,14 @@ static void acm_softint(struct work_struct *work)
 	struct acm *acm = container_of(work, struct acm, work);
 
 	if (test_bit(EVENT_RX_STALL, &acm->flags)) {
-		if (!(usb_autopm_get_interface(acm->data))) {
+		smp_mb(); /* against acm_suspend() */
+		if (!acm->susp_count) {
 			for (i = 0; i < acm->rx_buflimit; i++)
 				usb_kill_urb(acm->read_urbs[i]);
 			usb_clear_halt(acm->dev, acm->in);
 			acm_submit_read_urbs(acm, GFP_KERNEL);
-			usb_autopm_put_interface(acm->data);
+			clear_bit(EVENT_RX_STALL, &acm->flags);
 		}
-		clear_bit(EVENT_RX_STALL, &acm->flags);
 	}
 
 	if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
-- 
2.16.4


[-- Attachment #3: 0002-cdc-acm-introduce-a-cool-down.patch --]
[-- Type: text/x-patch, Size: 4603 bytes --]

From 46f8fe7dbc51d6476bd8589dcbf0fc8a76e94102 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 10 Mar 2020 11:55:21 +0100
Subject: [PATCH 2/3] cdc-acm: introduce a cool down

Immediate submission in case of a babbling device can lead
to a busy loop. Introducing a delayed work.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
---
 drivers/usb/class/cdc-acm.c | 29 +++++++++++++++++++++++++++--
 drivers/usb/class/cdc-acm.h |  5 ++++-
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7d0167382c87..a1fbf6bf5cf4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -412,9 +412,12 @@ static void acm_ctrl_irq(struct urb *urb)
 
 exit:
 	retval = usb_submit_urb(urb, GFP_ATOMIC);
-	if (retval && retval != -EPERM)
+	if (retval && retval != -EPERM && retval != -ENODEV)
 		dev_err(&acm->control->dev,
 			"%s - usb_submit_urb failed: %d\n", __func__, retval);
+	else
+		dev_vdbg(&acm->control->dev,
+			"control resubmission terminated %d\n", retval);
 }
 
 static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
@@ -430,6 +433,8 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
 			dev_err(&acm->data->dev,
 				"urb %d failed submission with %d\n",
 				index, res);
+		} else {
+			dev_vdbg(&acm->data->dev, "intended failure %d\n", res);
 		}
 		set_bit(index, &acm->read_urbs_free);
 		return res;
@@ -471,6 +476,7 @@ static void acm_read_bulk_callback(struct urb *urb)
 	int status = urb->status;
 	bool stopped = false;
 	bool stalled = false;
+	bool cooldown = false;
 
 	dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
 		rb->index, urb->actual_length, status);
@@ -497,6 +503,13 @@ static void acm_read_bulk_callback(struct urb *urb)
 			__func__, status);
 		stopped = true;
 		break;
+	case -EOVERFLOW:
+		dev_dbg(&acm->data->dev,
+			"%s - cooling babbling device\n", __func__);
+		usb_mark_last_busy(acm->dev);
+		set_bit(rb->index, &acm->urbs_in_error_delay);
+		cooldown = true;
+		break;
 	default:
 		dev_dbg(&acm->data->dev,
 			"%s - nonzero urb status received: %d\n",
@@ -518,9 +531,11 @@ static void acm_read_bulk_callback(struct urb *urb)
 	 */
 	smp_mb__after_atomic();
 
-	if (stopped || stalled) {
+	if (stopped || stalled || cooldown) {
 		if (stalled)
 			schedule_work(&acm->work);
+		else if (cooldown)
+			schedule_delayed_work(&acm->dwork, HZ / 2);
 		return;
 	}
 
@@ -567,6 +582,12 @@ static void acm_softint(struct work_struct *work)
 		}
 	}
 
+	if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) {
+		for (i = 0; i < ACM_NR; i++) 
+			if (test_and_clear_bit(i, &acm->urbs_in_error_delay))
+					acm_submit_read_urb(acm, i, GFP_NOIO);
+	}
+
 	if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
 		tty_port_tty_wakeup(&acm->port);
 }
@@ -1325,6 +1346,7 @@ static int acm_probe(struct usb_interface *intf,
 	acm->readsize = readsize;
 	acm->rx_buflimit = num_rx_buf;
 	INIT_WORK(&acm->work, acm_softint);
+	INIT_DELAYED_WORK(&acm->dwork, acm_softint);
 	init_waitqueue_head(&acm->wioctl);
 	spin_lock_init(&acm->write_lock);
 	spin_lock_init(&acm->read_lock);
@@ -1534,6 +1556,7 @@ static void acm_disconnect(struct usb_interface *intf)
 
 	acm_kill_urbs(acm);
 	cancel_work_sync(&acm->work);
+	cancel_delayed_work_sync(&acm->dwork);
 
 	tty_unregister_device(acm_tty_driver, acm->minor);
 
@@ -1576,6 +1599,8 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
 
 	acm_kill_urbs(acm);
 	cancel_work_sync(&acm->work);
+	cancel_delayed_work_sync(&acm->dwork);
+	acm->urbs_in_error_delay = 0;
 
 	return 0;
 }
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index ca1c026382c2..cd5e9d8ab237 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -109,8 +109,11 @@ struct acm {
 #		define EVENT_TTY_WAKEUP	0
 #		define EVENT_RX_STALL	1
 #		define ACM_THROTTLED	2
+#		define ACM_ERROR_DELAY	3
+	unsigned long urbs_in_error_delay;		/* these need to be restarted after a delay */
 	struct usb_cdc_line_coding line;		/* bits, stop, parity */
-	struct work_struct work;			/* work queue entry for line discipline waking up */
+	struct work_struct work;			/* work queue entry for various purposes*/
+	struct delayed_work dwork;			/* for cool downs needed in error recovery */
 	unsigned int ctrlin;				/* input control lines (DCD, DSR, RI, break, overruns) */
 	unsigned int ctrlout;				/* output control lines (DTR, RTS) */
 	struct async_icount iocount;			/* counters for control line changes */
-- 
2.16.4


[-- Attachment #4: 0003-cdc-acm-also-cool-doen-for-EPROTO.patch --]
[-- Type: text/x-patch, Size: 714 bytes --]

From c5a8ca0a79adfea9dda5ee51f23cf09891687a24 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 10 Mar 2020 12:19:53 +0100
Subject: [PATCH 3/3] cdc-acm: also cool doen for EPROTO

---
 drivers/usb/class/cdc-acm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index a1fbf6bf5cf4..82484cc6c36f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -504,6 +504,7 @@ static void acm_read_bulk_callback(struct urb *urb)
 		stopped = true;
 		break;
 	case -EOVERFLOW:
+	case -EPROTO:
 		dev_dbg(&acm->data->dev,
 			"%s - cooling babbling device\n", __func__);
 		usb_mark_last_busy(acm->dev);
-- 
2.16.4


  reply	other threads:[~2020-03-10 11:40 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-03 15:05 USB transaction errors causing RCU stalls and kernel panics Jonas Karlsson
2020-03-03 16:39 ` Greg KH
2020-03-03 20:08   ` Jonas Karlsson
2020-03-04  6:37     ` Greg KH
2020-03-04 10:29     ` Oliver Neukum
2020-03-04 12:11     ` Mathias Nyman
2020-03-04 14:12       ` Oliver Neukum
2020-03-04 16:21         ` Mathias Nyman
2020-03-06  1:31           ` Peter Chen
2020-03-09 14:21             ` Jonas Karlsson
2020-03-10  8:14               ` Peter Chen
2020-03-10 10:04                 ` Jonas Karlsson
2020-03-10 11:04                   ` Oliver Neukum
2020-03-10 11:21                     ` Oliver Neukum [this message]
2020-03-10 12:26                       ` Jonas Karlsson
2020-03-10 16:04                         ` Jonas Karlsson
2020-03-10 16:11                           ` Fabio Estevam
2020-03-11  6:25                             ` Jonas Karlsson
2020-03-11 10:28                               ` Oliver Neukum
2020-03-11 14:59                                 ` Jonas Karlsson
2020-03-12 13:45                                   ` Oliver Neukum
2020-03-12 15:37                                     ` Jonas Karlsson
2020-03-13  9:27                                       ` Oliver Neukum
2020-03-16  7:07                                     ` Jonas Karlsson
2020-03-23 11:37                                       ` Jonas Karlsson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1583839306.11582.12.camel@suse.de \
    --to=oneukum@suse.de \
    --cc=gregkh@linuxfoundation.org \
    --cc=jonas.karlsson@actia.se \
    --cc=linux-usb@vger.kernel.org \
    --cc=mathias.nyman@linux.intel.com \
    --cc=peter.chen@nxp.com \
    --cc=stern@rowland.harvard.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).