From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stefan Herbrechtsmeier Date: Fri, 20 Jul 2012 13:37:37 +0200 Subject: [U-Boot] [PATCH v2 2/5] ehci-hcd: Boost transfer speed In-Reply-To: <1024949320.326453.1342783584348.JavaMail.root@advansee.com> References: <1024949320.326453.1342783584348.JavaMail.root@advansee.com> Message-ID: <50094301.5010507@herbrechtsmeier.net> List-Id: MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: u-boot@lists.denx.de Am 20.07.2012 13:26, schrieb Beno?t Th?baudeau: > This patch takes advantage of the hardware EHCI qTD queuing mechanism to avoid > software overhead and to make transfers as fast as possible. > > The only drawback is a call to memalign. However, this is fast compared to the > transfer timings, and the heap size to allocate is small, e.g. a little bit more > than 100 kB for a transfer length of 65535 packets of 512 bytes. > > Tested on i.MX25 and i.MX35. In my test conditions, the speedup was about 15x > using page-aligned buffers, which is really appreciable when accessing large > files. > > Signed-off-by: Beno?t Th?baudeau > Cc: Marek Vasut > Cc: Ilya Yanok > Cc: Stefan Herbrechtsmeier > --- > Changes for v2: > - Use DIV_ROUND_UP to make code more readable. > > .../drivers/usb/host/ehci-hcd.c | 92 ++++++++++++++------ > 1 file changed, 63 insertions(+), 29 deletions(-) > > diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c > index 5b3b906..cf9ab92 100644 > --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c > +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c > @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, > int length, struct devrequest *req) > { > ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN); > - ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN); > + struct qTD *qtd; > + int qtd_count = 0; > int qtd_counter = 0; > > volatile struct qTD *vtd; > @@ -229,8 +230,23 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, > le16_to_cpu(req->value), le16_to_cpu(req->value), > le16_to_cpu(req->index)); > > + if (req != NULL) /* SETUP + ACK */ > + qtd_count += 1 + 1; > + if (length > 0 || req == NULL) { /* buffer */ > + if ((uint32_t)buffer & 4095) /* page-unaligned */ > + qtd_count += DIV_ROUND_UP(((uint32_t)buffer & 4095) + > + length, (QT_BUFFER_CNT - 1) * 4096); > + else /* page-aligned */ > + qtd_count += DIV_ROUND_UP(length, QT_BUFFER_CNT * 4096); > + } > + qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD)); > + if (qtd == NULL) { > + printf("unable to allocate TDs\n"); > + return -1; > + } > + > memset(qh, 0, sizeof(struct QH)); > - memset(qtd, 0, 3 * sizeof(*qtd)); > + memset(qtd, 0, qtd_count * sizeof(*qtd)); > > toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe)); > > @@ -291,31 +307,46 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, > } > > if (length > 0 || req == NULL) { > - /* > - * Setup request qTD (3.5 in ehci-r10.pdf) > - * > - * qt_next ................ 03-00 H > - * qt_altnext ............. 07-04 H > - * qt_token ............... 0B-08 H > - * > - * [ buffer, buffer_hi ] loaded with "buffer". > - */ > - qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE); > - qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE); > - token = (toggle << 31) | > - (length << 16) | > - ((req == NULL ? 1 : 0) << 15) | > - (0 << 12) | > - (3 << 10) | > - ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); > - qtd[qtd_counter].qt_token = cpu_to_hc32(token); > - if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) { > - printf("unable construct DATA td\n"); > - goto fail; > - } > - /* Update previous qTD! */ > - *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); > - tdp = &qtd[qtd_counter++].qt_next; > + uint8_t *buf_ptr = buffer; > + int left_length = length; > + > + do { > + int xfr_bytes = min(left_length, > + (QT_BUFFER_CNT * 4096 - > + ((uint32_t)buf_ptr & 4095)) & > + ~4095); Why you align the length to 4096? > + > + /* > + * Setup request qTD (3.5 in ehci-r10.pdf) > + * > + * qt_next ................ 03-00 H > + * qt_altnext ............. 07-04 H > + * qt_token ............... 0B-08 H > + * > + * [ buffer, buffer_hi ] loaded with "buffer". > + */ > + qtd[qtd_counter].qt_next = > + cpu_to_hc32(QT_NEXT_TERMINATE); > + qtd[qtd_counter].qt_altnext = > + cpu_to_hc32(QT_NEXT_TERMINATE); > + token = (toggle << 31) | > + (xfr_bytes << 16) | > + ((req == NULL ? 1 : 0) << 15) | > + (0 << 12) | > + (3 << 10) | > + ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0); > + qtd[qtd_counter].qt_token = cpu_to_hc32(token); > + if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr, > + xfr_bytes) != 0) { > + printf("unable construct DATA td\n"); > + goto fail; > + } > + /* Update previous qTD! */ > + *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]); > + tdp = &qtd[qtd_counter++].qt_next; > + buf_ptr += xfr_bytes; > + left_length -= xfr_bytes; > + } while (left_length > 0); > } > > if (req != NULL) { > @@ -346,7 +377,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, > flush_dcache_range((uint32_t)qh_list, > ALIGN_END_ADDR(struct QH, qh_list, 1)); > flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1)); > - flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3)); > + flush_dcache_range((uint32_t)qtd, > + ALIGN_END_ADDR(struct qTD, qtd, qtd_count)); > > /* Set async. queue head pointer. */ > ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list); > @@ -377,7 +409,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, > invalidate_dcache_range((uint32_t)qh, > ALIGN_END_ADDR(struct QH, qh, 1)); > invalidate_dcache_range((uint32_t)qtd, > - ALIGN_END_ADDR(struct qTD, qtd, 3)); > + ALIGN_END_ADDR(struct qTD, qtd, qtd_count)); > > token = hc32_to_cpu(vtd->qt_token); > if (!(token & 0x80)) > @@ -450,9 +482,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer, > ehci_readl(&hcor->or_portsc[1])); > } > > + free(qtd); > return (dev->status != USB_ST_NOT_PROC) ? 0 : -1; > > fail: > + free(qtd); > return -1; > } >