* [PATCH] xhci: Cope with VIA VL805 readahead
@ 2017-10-10 18:09 Robin Murphy
2017-10-11 9:23 ` David Laight
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Robin Murphy @ 2017-10-10 18:09 UTC (permalink / raw)
To: mathias.nyman, gregkh; +Cc: linux-usb, linux-kernel, David.Laight, angelsl
The VIA VL805 host controller is well-known for causing problems on
systems with IOMMUs enabled, ranging from triggering endless streams of
fault messages to locking itself up completely. It appears that the root
of the problem might be an over-aggressive prefetching of TRBs, wherein
consuming commands near the end of a queue segment causes it to read off
the end of the segment, even across a page boundary. This blows up when
DMA mapping ops are backed by an IOMMU, since there is no guarantee that
addresses outside the allocated segment are accessible at all.
Some trial-and-error investigation reveals that we can avoid such
cross-page reads by not using the last few TRBs in a segment; to that
end, factor out the implicit index of the end-of-segemnt link TRB, and
implement a quirk to move it slightly further forward when necessary.
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
drivers/usb/host/xhci-mem.c | 32 +++++++++++++++++++-------------
drivers/usb/host/xhci-pci.c | 5 +++++
drivers/usb/host/xhci-ring.c | 10 +++++++++-
drivers/usb/host/xhci.c | 10 +++++-----
drivers/usb/host/xhci.h | 2 ++
5 files changed, 40 insertions(+), 19 deletions(-)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 2a82c927ded2..bb62f100d028 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -23,6 +23,7 @@
#include <linux/usb.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/kernel.h>
#include <linux/dmapool.h>
#include <linux/dma-mapping.h>
@@ -108,17 +109,18 @@ static void xhci_free_segments_for_ring(struct xhci_hcd *xhci,
static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
struct xhci_segment *next, enum xhci_ring_type type)
{
+ unsigned int link_idx;
u32 val;
if (!prev || !next)
return;
prev->next = next;
if (type != TYPE_EVENT) {
- prev->trbs[TRBS_PER_SEGMENT-1].link.segment_ptr =
- cpu_to_le64(next->dma);
+ link_idx = xhci_segment_link_idx(xhci);
+ prev->trbs[link_idx].link.segment_ptr = cpu_to_le64(next->dma);
/* Set the last TRB in the segment to have a TRB type ID of Link TRB */
- val = le32_to_cpu(prev->trbs[TRBS_PER_SEGMENT-1].link.control);
+ val = le32_to_cpu(prev->trbs[link_idx].link.control);
val &= ~TRB_TYPE_BITMASK;
val |= TRB_TYPE(TRB_LINK);
/* Always set the chain bit with 0.95 hardware */
@@ -127,7 +129,7 @@ static void xhci_link_segments(struct xhci_hcd *xhci, struct xhci_segment *prev,
(type == TYPE_ISOC &&
(xhci->quirks & XHCI_AMD_0x96_HOST)))
val |= TRB_CHAIN;
- prev->trbs[TRBS_PER_SEGMENT-1].link.control = cpu_to_le32(val);
+ prev->trbs[link_idx].link.control = cpu_to_le32(val);
}
}
@@ -140,20 +142,22 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct xhci_ring *ring,
unsigned int num_segs)
{
struct xhci_segment *next;
+ unsigned int link_idx;
if (!ring || !first || !last)
return;
next = ring->enq_seg->next;
+ link_idx = xhci_segment_link_idx(xhci);
xhci_link_segments(xhci, ring->enq_seg, first, ring->type);
xhci_link_segments(xhci, last, next, ring->type);
ring->num_segs += num_segs;
- ring->num_trbs_free += (TRBS_PER_SEGMENT - 1) * num_segs;
+ ring->num_trbs_free += link_idx * num_segs;
if (ring->type != TYPE_EVENT && ring->enq_seg == ring->last_seg) {
- ring->last_seg->trbs[TRBS_PER_SEGMENT-1].link.control
+ ring->last_seg->trbs[link_idx].link.control
&= ~cpu_to_le32(LINK_TOGGLE);
- last->trbs[TRBS_PER_SEGMENT-1].link.control
+ last->trbs[link_idx].link.control
|= cpu_to_le32(LINK_TOGGLE);
ring->last_seg = last;
}
@@ -300,7 +304,8 @@ void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring)
}
static void xhci_initialize_ring_info(struct xhci_ring *ring,
- unsigned int cycle_state)
+ unsigned int cycle_state,
+ unsigned int link_idx)
{
/* The ring is empty, so the enqueue pointer == dequeue pointer */
ring->enqueue = ring->first_seg->trbs;
@@ -320,7 +325,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring,
* Each segment has a link TRB, and leave an extra TRB for SW
* accounting purpose
*/
- ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1;
+ ring->num_trbs_free = ring->num_segs * link_idx - 1;
}
/* Allocate segments and link them for a ring */
@@ -373,6 +378,7 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
enum xhci_ring_type type, unsigned int max_packet, gfp_t flags)
{
struct xhci_ring *ring;
+ unsigned int link_idx;
int ret;
ring = kzalloc(sizeof *(ring), flags);
@@ -393,12 +399,13 @@ static struct xhci_ring *xhci_ring_alloc(struct xhci_hcd *xhci,
goto fail;
/* Only event ring does not use link TRB */
+ link_idx = xhci_segment_link_idx(xhci);
if (type != TYPE_EVENT) {
/* See section 4.9.2.1 and 6.4.4.1 */
- ring->last_seg->trbs[TRBS_PER_SEGMENT - 1].link.control |=
+ ring->last_seg->trbs[link_idx].link.control |=
cpu_to_le32(LINK_TOGGLE);
}
- xhci_initialize_ring_info(ring, cycle_state);
+ xhci_initialize_ring_info(ring, cycle_state, link_idx);
trace_xhci_ring_alloc(ring);
return ring;
@@ -428,8 +435,7 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring,
unsigned int num_segs_needed;
int ret;
- num_segs_needed = (num_trbs + (TRBS_PER_SEGMENT - 1) - 1) /
- (TRBS_PER_SEGMENT - 1);
+ num_segs_needed = DIV_ROUND_UP(num_trbs, xhci_segment_link_idx(xhci));
/* Allocate number of segments we needed, or double the ring size */
num_segs = ring->num_segs > num_segs_needed ?
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 8071c8fdd15e..458404a22cf1 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -212,6 +212,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == 0x3432)
xhci->quirks |= XHCI_BROKEN_STREAMS;
+ /* VIA VL805 reads past the end of queue segments */
+ if (pdev->vendor == PCI_VENDOR_ID_VIA &&
+ pdev->device == 0x3483)
+ xhci->quirks |= XHCI_READAHEAD_QUIRK;
+
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
pdev->device == 0x1042)
xhci->quirks |= XHCI_BROKEN_STREAMS;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a9443651ce0f..42aee439ed8c 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -89,6 +89,14 @@ dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg,
return seg->dma + (segment_offset * sizeof(*trb));
}
+unsigned int xhci_segment_link_idx(struct xhci_hcd *xhci)
+{
+ if (xhci->quirks & XHCI_READAHEAD_QUIRK)
+ return TRBS_PER_SEGMENT - 4;
+
+ return TRBS_PER_SEGMENT - 1;
+}
+
static bool trb_is_noop(union xhci_trb *trb)
{
return TRB_TYPE_NOOP_LE32(trb->generic.field[3]);
@@ -1780,7 +1788,7 @@ struct xhci_segment *trb_in_td(struct xhci_hcd *xhci,
return NULL;
/* We may get an event for a Link TRB in the middle of a TD */
end_seg_dma = xhci_trb_virt_to_dma(cur_seg,
- &cur_seg->trbs[TRBS_PER_SEGMENT - 1]);
+ &cur_seg->trbs[xhci_segment_link_idx(xhci)]);
/* If the end TRB isn't in this segment, this is set to 0 */
end_trb_dma = xhci_trb_virt_to_dma(cur_seg, end_trb);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index b2ff1ff1a02f..74b4500641c2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -788,14 +788,14 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci)
{
struct xhci_ring *ring;
struct xhci_segment *seg;
+ unsigned int link_idx;
ring = xhci->cmd_ring;
seg = ring->deq_seg;
+ link_idx = xhci_segment_link_idx(xhci);
do {
- memset(seg->trbs, 0,
- sizeof(union xhci_trb) * (TRBS_PER_SEGMENT - 1));
- seg->trbs[TRBS_PER_SEGMENT - 1].link.control &=
- cpu_to_le32(~TRB_CYCLE);
+ memset(seg->trbs, 0, sizeof(*seg->trbs) * link_idx);
+ seg->trbs[link_idx].link.control &= cpu_to_le32(~TRB_CYCLE);
seg = seg->next;
} while (seg != ring->deq_seg);
@@ -805,7 +805,7 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci)
ring->enq_seg = ring->deq_seg;
ring->enqueue = ring->dequeue;
- ring->num_trbs_free = ring->num_segs * (TRBS_PER_SEGMENT - 1) - 1;
+ ring->num_trbs_free = ring->num_segs * link_idx - 1;
/*
* Ring is now zeroed, so the HW should look for change of ownership
* when the cycle bit is set to 1.
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 2abaa4d6d39d..7ef69ea0b480 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1828,6 +1828,7 @@ struct xhci_hcd {
#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
#define XHCI_U2_DISABLE_WAKE (1 << 27)
#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL (1 << 28)
+#define XHCI_READAHEAD_QUIRK (1 << 29)
unsigned int num_active_eps;
unsigned int limit_active_eps;
@@ -2031,6 +2032,7 @@ struct xhci_segment *trb_in_td(struct xhci_hcd *xhci,
union xhci_trb *end_trb, dma_addr_t suspect_dma, bool debug);
int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code);
void xhci_ring_cmd_db(struct xhci_hcd *xhci);
+unsigned int xhci_segment_link_idx(struct xhci_hcd *xhci);
int xhci_queue_slot_control(struct xhci_hcd *xhci, struct xhci_command *cmd,
u32 trb_type, u32 slot_id);
int xhci_queue_address_device(struct xhci_hcd *xhci, struct xhci_command *cmd,
--
2.13.4.dirty
^ permalink raw reply related [flat|nested] 6+ messages in thread* RE: [PATCH] xhci: Cope with VIA VL805 readahead
2017-10-10 18:09 [PATCH] xhci: Cope with VIA VL805 readahead Robin Murphy
@ 2017-10-11 9:23 ` David Laight
2017-10-11 14:40 ` Mathias Nyman
2017-10-12 10:35 ` Hao Wei Tee
2 siblings, 0 replies; 6+ messages in thread
From: David Laight @ 2017-10-11 9:23 UTC (permalink / raw)
To: 'Robin Murphy', mathias.nyman@intel.com,
gregkh@linuxfoundation.org
Cc: linux-usb@vger.kernel.org, linux-kernel@vger.kernel.org,
angelsl@in04.sg
From: Robin Murphy
> Sent: 10 October 2017 19:09
>
> The VIA VL805 host controller is well-known for causing problems on
> systems with IOMMUs enabled, ranging from triggering endless streams of
> fault messages to locking itself up completely. It appears that the root
> of the problem might be an over-aggressive prefetching of TRBs, wherein
> consuming commands near the end of a queue segment causes it to read off
> the end of the segment, even across a page boundary. This blows up when
> DMA mapping ops are backed by an IOMMU, since there is no guarantee that
> addresses outside the allocated segment are accessible at all.
>
> Some trial-and-error investigation reveals that we can avoid such
> cross-page reads by not using the last few TRBs in a segment; to that
> end, factor out the implicit index of the end-of-segemnt link TRB, and
> implement a quirk to move it slightly further forward when necessary.
Does this fix all of your problems?
Or is there a second issue when the iommu is disabled?
...
> +unsigned int xhci_segment_link_idx(struct xhci_hcd *xhci)
> +{
> + if (xhci->quirks & XHCI_READAHEAD_QUIRK)
> + return TRBS_PER_SEGMENT - 4;
> +
> + return TRBS_PER_SEGMENT - 1;
> +}
There is no point calculating this every time it is needed.
Save the value in the xhci structure.
I wonder whether it is actually worth just setting TRBS_PER_SEGMENT
to 252 but allocating a full page (256 TRBs)
(ie doing it unconditionally for all devices).
I suspect the performance drop will be immeasurable small.
David
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] xhci: Cope with VIA VL805 readahead
2017-10-10 18:09 [PATCH] xhci: Cope with VIA VL805 readahead Robin Murphy
2017-10-11 9:23 ` David Laight
@ 2017-10-11 14:40 ` Mathias Nyman
2017-10-11 15:46 ` David Laight
2017-10-12 10:35 ` Hao Wei Tee
2 siblings, 1 reply; 6+ messages in thread
From: Mathias Nyman @ 2017-10-11 14:40 UTC (permalink / raw)
To: Robin Murphy, mathias.nyman, gregkh
Cc: linux-usb, linux-kernel, David.Laight, angelsl
On 10.10.2017 21:09, Robin Murphy wrote:
> The VIA VL805 host controller is well-known for causing problems on
> systems with IOMMUs enabled, ranging from triggering endless streams of
> fault messages to locking itself up completely. It appears that the root
> of the problem might be an over-aggressive prefetching of TRBs, wherein
> consuming commands near the end of a queue segment causes it to read off
> the end of the segment, even across a page boundary. This blows up when
> DMA mapping ops are backed by an IOMMU, since there is no guarantee that
> addresses outside the allocated segment are accessible at all.
>
> Some trial-and-error investigation reveals that we can avoid such
> cross-page reads by not using the last few TRBs in a segment; to that
> end, factor out the implicit index of the end-of-segemnt link TRB, and
> implement a quirk to move it slightly further forward when necessary.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/usb/host/xhci-mem.c | 32 +++++++++++++++++++-------------
> drivers/usb/host/xhci-pci.c | 5 +++++
> drivers/usb/host/xhci-ring.c | 10 +++++++++-
> drivers/usb/host/xhci.c | 10 +++++-----
> drivers/usb/host/xhci.h | 2 ++
> 5 files changed, 40 insertions(+), 19 deletions(-)
>
Thanks for testing all this.
If possible I'd like to try and find some other solution before chopping the Segment
size to smaller than 256.
I think that your first proposal of adding the guard page to the segment pool could be an option.
other things that could be checked:
- check if we can avoid prefetching over segment by altering the link TRB chain or ioc bits.
- check if we prefetch only over mid-ring link TRBs or also over last link TRB with cycle change.
If only mid ring then we could try to allocate two consecutive pages for the segments.
- check if prefething over segment is related to manually setting the TR dequeue command.
Set TR deq ponter command fushes xHC chached TRBs and might be the cause for the prefetch.
- does VIA VL805 have some odd xhci page size (xhci PAGEZISE register), does it affect anything.
But if nothing else seems to work then chopping the page size could be an option
-Mathias
^ permalink raw reply [flat|nested] 6+ messages in thread
* RE: [PATCH] xhci: Cope with VIA VL805 readahead
2017-10-11 14:40 ` Mathias Nyman
@ 2017-10-11 15:46 ` David Laight
2017-10-12 8:25 ` Mathias Nyman
0 siblings, 1 reply; 6+ messages in thread
From: David Laight @ 2017-10-11 15:46 UTC (permalink / raw)
To: 'Mathias Nyman', Robin Murphy, mathias.nyman@intel.com,
gregkh@linuxfoundation.org
Cc: linux-usb@vger.kernel.org, linux-kernel@vger.kernel.org,
angelsl@in04.sg
From: Mathias Nyman
> Sent: 11 October 2017 15:41
..
> If possible I'd like to try and find some other solution before chopping the Segment
> size to smaller than 256.
> I think that your first proposal of adding the guard page to the segment pool could be an option.
Would be a waste of a page - which could be used for a lot of extra TRB.
IIRC the rings used to have 16 TRB - that caused some serious problems,
but I can't quite remember all of them.
I don't remember anything that made 256 'good' - except that it was a 4k page.
Did you add something to copy badly fragmented buffers to get around the
problem with misaligned LINKs. ISTR my original fix didn't work for requests
for very long disk transfers.
> other things that could be checked:
> - check if we can avoid prefetching over segment by altering the link TRB chain or ioc bits.
> - check if we prefetch only over mid-ring link TRBs or also over last link TRB with cycle change.
> If only mid ring then we could try to allocate two consecutive pages for the segments.
> - check if prefething over segment is related to manually setting the TR dequeue command.
> Set TR deq ponter command fushes xHC chached TRBs and might be the cause for the prefetch.
I'd guess that it is reading multiple TRB and can't know one is a LINK.
> - does VIA VL805 have some odd xhci page size (xhci PAGEZISE register), does it affect anything.
It is 'funny' that this device will try to prefetch TRB across 64k boundaries
but data buffers aren't allowed to cross them.
David
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] xhci: Cope with VIA VL805 readahead
2017-10-11 15:46 ` David Laight
@ 2017-10-12 8:25 ` Mathias Nyman
0 siblings, 0 replies; 6+ messages in thread
From: Mathias Nyman @ 2017-10-12 8:25 UTC (permalink / raw)
To: David Laight, Robin Murphy, mathias.nyman@intel.com,
gregkh@linuxfoundation.org
Cc: linux-usb@vger.kernel.org, linux-kernel@vger.kernel.org,
angelsl@in04.sg
On 11.10.2017 18:46, David Laight wrote:
> From: Mathias Nyman
>> Sent: 11 October 2017 15:41
> ..
>> If possible I'd like to try and find some other solution before chopping the Segment
>> size to smaller than 256.
>> I think that your first proposal of adding the guard page to the segment pool could be an option.
>
> Would be a waste of a page - which could be used for a lot of extra TRB.
Yes, for VIA VL805 it would.
Reducing the TRBs per segment isn't outruled. I'm just trying
to see if we can find a different solution.
> IIRC the rings used to have 16 TRB - that caused some serious problems,
> but I can't quite remember all of them.
It used to be 64 TRBs,
issues were:
- event ring fill up before we could handle the events.
- more frequent dynamic ring expansion issues
> I don't remember anything that made 256 'good' - except that it was a 4k page.
Event ring doesn't get filled up.
Segment uses the whole page
Link trbs are not that frequent, less 64k aligment issues with possible bounce buffers.
Debugging is easier, I see directly from offset how near the end of the ring we are,
just like when looking at the dmesg of this case.
Future plans to index TRBs, converting from dma address to index really easy if
amount of TRBs is power of 2.
>
> Did you add something to copy badly fragmented buffers to get around the
> problem with misaligned LINKs. ISTR my original fix didn't work for requests
> for very long disk transfers.
Yes, xhci_align_td() cuts the data length of the TRB before link TRB to
be at a packet boundary. If there is no packet boundary then we use a bounce buffer.
-Mathias
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] xhci: Cope with VIA VL805 readahead
2017-10-10 18:09 [PATCH] xhci: Cope with VIA VL805 readahead Robin Murphy
2017-10-11 9:23 ` David Laight
2017-10-11 14:40 ` Mathias Nyman
@ 2017-10-12 10:35 ` Hao Wei Tee
2 siblings, 0 replies; 6+ messages in thread
From: Hao Wei Tee @ 2017-10-12 10:35 UTC (permalink / raw)
To: Robin Murphy, mathias.nyman, gregkh; +Cc: linux-usb, linux-kernel, David.Laight
On 11/10/2017 02:09, Robin Murphy wrote:
> The VIA VL805 host controller is well-known for causing problems on
> systems with IOMMUs enabled, ranging from triggering endless streams of
> fault messages to locking itself up completely. It appears that the root
> of the problem might be an over-aggressive prefetching of TRBs, wherein
> consuming commands near the end of a queue segment causes it to read off
> the end of the segment, even across a page boundary. This blows up when
> DMA mapping ops are backed by an IOMMU, since there is no guarantee that
> addresses outside the allocated segment are accessible at all.
>
> Some trial-and-error investigation reveals that we can avoid such
> cross-page reads by not using the last few TRBs in a segment; to that
> end, factor out the implicit index of the end-of-segemnt link TRB, and
> implement a quirk to move it slightly further forward when necessary.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> drivers/usb/host/xhci-mem.c | 32 +++++++++++++++++++-------------
> drivers/usb/host/xhci-pci.c | 5 +++++
> drivers/usb/host/xhci-ring.c | 10 +++++++++-
> drivers/usb/host/xhci.c | 10 +++++-----
> drivers/usb/host/xhci.h | 2 ++
> 5 files changed, 40 insertions(+), 19 deletions(-)
Seems like the invalid DMA I'm getting on xhci_reset during the xhci driver's
initialisation/probing process isn't the same one you're getting.. This patch
doesn't appear to change that at all.
Oh well.
Thanks,
--
Hao Wei
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2017-10-12 10:35 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-10 18:09 [PATCH] xhci: Cope with VIA VL805 readahead Robin Murphy
2017-10-11 9:23 ` David Laight
2017-10-11 14:40 ` Mathias Nyman
2017-10-11 15:46 ` David Laight
2017-10-12 8:25 ` Mathias Nyman
2017-10-12 10:35 ` Hao Wei Tee
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox