linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ARM: add PrimeCell generic DMA to PL011
@ 2010-10-06  9:32 Linus Walleij
  2010-10-12 19:30 ` Russell King - ARM Linux
  2010-12-22 14:10 ` Russell King - ARM Linux
  0 siblings, 2 replies; 11+ messages in thread
From: Linus Walleij @ 2010-10-06  9:32 UTC (permalink / raw)
  To: linux-arm-kernel

This extends the PL011 UART driver with generic DMA engine support
using the PrimeCell DMA engine interface.

Tested-by: Jerzy Kasenberg <jerzy.kasenberg@tieto.com>
Tested-by: Grzegorz Sygieda <grzegorz.sygieda@tieto.com>
Tested-by: Marcin Mielczarczyk <marcin.mielczarczyk@tieto.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
---
Changes from previous version that was in the patch set for
PrimeCell DMA (I've lost count):

This adds support for the ST-Ericsson specific DMA watermarking
via a vendor data-specified setup function, a design pattern
likely to be useful for other users as well.
---
 drivers/serial/amba-pl011.c |  871 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/amba/serial.h |   21 +
 2 files changed, 883 insertions(+), 9 deletions(-)

diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 6ca7a44..d99fed0 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -7,6 +7,7 @@
  *
  *  Copyright 1999 ARM Limited
  *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2010 ST-Ericsson SA
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -48,6 +49,11 @@
 #include <linux/amba/serial.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
 
 #include <asm/io.h>
 #include <asm/sizes.h>
@@ -63,6 +69,27 @@
 #define UART_DR_ERROR		(UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE)
 #define UART_DUMMY_DR_RX	(1 << 16)
 
+struct uart_amba_port;
+typedef	void (*dma_init_fn)(struct uart_amba_port *uap);
+
+/* Deals with DMA transactions */
+struct pl011_dma_rx_transaction {
+	struct completion complete;
+	bool use_buffer_b;
+	struct scatterlist scatter_a;
+	struct scatterlist scatter_b;
+	char *rx_dma_buf_a;
+	char *rx_dma_buf_b;
+	dma_cookie_t cookie;
+};
+
+struct pl011_dma_tx_transaction {
+	struct completion complete;
+	struct scatterlist scatter;
+	char *tx_dma_buf;
+	dma_cookie_t cookie;
+};
+
 /*
  * We wrap our port structure around the generic uart_port.
  */
@@ -75,7 +102,18 @@ struct uart_amba_port {
 	unsigned int		lcrh_tx;	/* vendor-specific */
 	unsigned int		lcrh_rx;	/* vendor-specific */
 	bool			oversampling;   /* vendor-specific */
+	dma_init_fn		dma_init;	/* vendor-specific */
 	bool			autorts;
+	unsigned int		fifosize;
+	/* DMA stuff */
+	bool			enable_dma;
+	bool			rx_dma_running;
+#ifdef CONFIG_DMA_ENGINE
+	struct dma_chan		*dma_rx_channel;
+	struct dma_chan		*dma_tx_channel;
+	struct pl011_dma_rx_transaction dmarx;
+	struct pl011_dma_tx_transaction dmatx;
+#endif
 };
 
 /* There is by now@least one vendor with differing details, so handle it */
@@ -85,6 +123,7 @@ struct vendor_data {
 	unsigned int		lcrh_tx;
 	unsigned int		lcrh_rx;
 	bool			oversampling;
+	dma_init_fn		dma_init;
 };
 
 static struct vendor_data vendor_arm = {
@@ -95,14 +134,747 @@ static struct vendor_data vendor_arm = {
 	.oversampling		= false,
 };
 
+static void pl011_st_dma_startup(struct uart_amba_port *uap);
+
 static struct vendor_data vendor_st = {
 	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
 	.fifosize		= 64,
 	.lcrh_tx		= ST_UART011_LCRH_TX,
 	.lcrh_rx		= ST_UART011_LCRH_RX,
 	.oversampling		= true,
+	.dma_init		= pl011_st_dma_startup,
 };
 
+/*
+ * All the DMA operation mode stuff goes inside this ifdef.
+ * This assumes that you have a generic DMA device interface,
+ * no custom DMA interfaces are supported.
+ *
+ * If we had discardable probe() functions akin to
+ * platform_device_probe() in the PrimeCell/AMBA bus, we could
+ * discard most of this code after use, but since we haven't,
+ * we have to keep it all around.
+ */
+#ifdef CONFIG_DMA_ENGINE
+
+#define PL011_DMA_BUFFER_SIZE PAGE_SIZE
+
+static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
+{
+	/* DMA is the sole user of the platform data right now */
+	struct amba_pl011_data *plat = uap->port.dev->platform_data;
+	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
+	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
+	struct dma_slave_config rx_conf = {
+		.src_addr = uap->port.mapbase + UART01x_DR,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+		.direction = DMA_FROM_DEVICE,
+		.src_maxburst = uap->fifosize >> 1,
+	};
+	struct dma_slave_config tx_conf = {
+		.dst_addr = uap->port.mapbase + UART01x_DR,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+		.direction = DMA_TO_DEVICE,
+		.dst_maxburst = uap->fifosize >> 1,
+	};
+	dma_cap_mask_t mask;
+	int sglen;
+
+	/* We need platform data */
+	if (!plat) {
+		dev_err(uap->port.dev, "no DMA platform data!\n");
+		return;
+	}
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/*
+	 * We need both RX and TX channels to do DMA, else do none
+	 * of them.
+	 */
+	uap->dma_rx_channel = dma_request_channel(mask,
+						  plat->dma_filter,
+						  plat->dma_rx_param);
+	if (!uap->dma_rx_channel) {
+		dev_err(uap->port.dev, "no RX DMA channel!\n");
+		return;
+	}
+	uap->dma_rx_channel->device->device_control(uap->dma_rx_channel,
+						    DMA_SLAVE_CONFIG,
+						    (unsigned long) &rx_conf);
+
+	uap->dma_tx_channel = dma_request_channel(mask,
+						  plat->dma_filter,
+						  plat->dma_tx_param);
+	if (!uap->dma_tx_channel) {
+		dev_err(uap->port.dev, "no TX DMA channel!\n");
+		goto err_no_txchan;
+	}
+	uap->dma_tx_channel->device->device_control(uap->dma_tx_channel,
+						    DMA_SLAVE_CONFIG,
+						    (unsigned long) &tx_conf);
+
+	/* Allocate DMA RX and TX buffers */
+	dmarx->rx_dma_buf_a = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
+	if (!dmarx->rx_dma_buf_a) {
+		dev_err(uap->port.dev, "failed to allocate DMA RX buffer A\n");
+		goto err_no_rxbuf_a;
+	}
+
+	dmarx->rx_dma_buf_b = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
+	if (!dmarx->rx_dma_buf_b) {
+		dev_err(uap->port.dev, "failed to allocate DMA RX buffer B\n");
+		goto err_no_rxbuf_b;
+	}
+
+	dmatx->tx_dma_buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
+	if (!dmatx->tx_dma_buf) {
+		dev_err(uap->port.dev, "failed to allocate DMA TX buffer\n");
+		goto err_no_txbuf;
+	}
+
+	/* Provide single SG list with one item to the buffers */
+	sg_init_one(&dmarx->scatter_a, dmarx->rx_dma_buf_a,
+		    PL011_DMA_BUFFER_SIZE);
+	sg_init_one(&dmarx->scatter_b, dmarx->rx_dma_buf_b,
+		    PL011_DMA_BUFFER_SIZE);
+	sg_init_one(&dmatx->scatter, dmatx->tx_dma_buf, PL011_DMA_BUFFER_SIZE);
+
+	/* Map DMA buffers */
+	sglen = dma_map_sg(uap->port.dev, &dmarx->scatter_a,
+			   1, DMA_FROM_DEVICE);
+	if (sglen != 1)
+		goto err_rx_sgmap_a;
+
+	sglen = dma_map_sg(uap->port.dev, &dmarx->scatter_b,
+			   1, DMA_FROM_DEVICE);
+	if (sglen != 1)
+		goto err_rx_sgmap_b;
+
+	sglen = dma_map_sg(uap->port.dev, &dmatx->scatter,
+			   1, DMA_TO_DEVICE);
+	if (sglen != 1)
+		goto err_tx_sgmap;
+
+	/* Initially we say the transfers are incomplete */
+	init_completion(&uap->dmatx.complete);
+	complete(&uap->dmatx.complete);
+
+	/* The DMA buffer is now the FIFO the TTY subsystem can use */
+	uap->port.fifosize = PL011_DMA_BUFFER_SIZE;
+
+	uap->enable_dma = true;
+	dev_info(uap->port.dev, "setup for DMA on RX %s, TX %s\n",
+		 dma_chan_name(uap->dma_rx_channel),
+		 dma_chan_name(uap->dma_tx_channel));
+	return;
+
+err_tx_sgmap:
+	dma_unmap_sg(uap->port.dev, &dmarx->scatter_b,
+		     1, DMA_FROM_DEVICE);
+err_rx_sgmap_b:
+	dma_unmap_sg(uap->port.dev, &dmarx->scatter_a,
+		     1, DMA_FROM_DEVICE);
+err_rx_sgmap_a:
+	kfree(dmatx->tx_dma_buf);
+err_no_txbuf:
+	kfree(dmarx->rx_dma_buf_b);
+err_no_rxbuf_b:
+	kfree(dmarx->rx_dma_buf_a);
+err_no_rxbuf_a:
+	dma_release_channel(uap->dma_tx_channel);
+	uap->dma_tx_channel = NULL;
+err_no_txchan:
+	dma_release_channel(uap->dma_rx_channel);
+	uap->dma_rx_channel = NULL;
+	return;
+}
+
+/*
+ * Stack up the UARTs and let the above initcall be done at
+ * device initcall time, because the serial driver is called as
+ * an arch initcall, and at this time the DMA subsystem is not yet
+ * registered. At this point the driver will switch over to using
+ * DMA where desired.
+ */
+
+struct dma_uap {
+	struct list_head node;
+	struct uart_amba_port *uap;
+};
+
+struct list_head pl011_dma_uarts = LIST_HEAD_INIT(pl011_dma_uarts);
+
+static int pl011_dma_initcall(void)
+{
+	struct list_head *node, *tmp;
+
+	list_for_each_safe(node, tmp, &pl011_dma_uarts) {
+		struct dma_uap *dmau = list_entry(node, struct dma_uap, node);
+		pl011_dma_probe_initcall(dmau->uap);
+		list_del(node);
+		kfree(dmau);
+	}
+	return 0;
+}
+
+device_initcall(pl011_dma_initcall);
+
+static void pl011_dma_probe(struct uart_amba_port *uap)
+{
+	struct dma_uap *dmau = kzalloc(sizeof(struct dma_uap), GFP_KERNEL);
+
+	if (dmau == NULL)
+		return;
+	dmau->uap = uap;
+	list_add_tail(&dmau->node, &pl011_dma_uarts);
+}
+
+static void pl011_dma_remove(struct uart_amba_port *uap)
+{
+	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
+	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
+
+	/* TODO: remove the initcall if it has not yet executed */
+	/* Unmap and free DMA buffers */
+	if (uap->dma_rx_channel)
+		dma_release_channel(uap->dma_rx_channel);
+	if (uap->dma_tx_channel)
+		dma_release_channel(uap->dma_tx_channel);
+	if (dmatx->tx_dma_buf) {
+		dma_unmap_sg(uap->port.dev, &dmatx->scatter,
+			     1, DMA_TO_DEVICE);
+		kfree(dmatx->tx_dma_buf);
+	}
+	if (dmarx->rx_dma_buf_b) {
+		dma_unmap_sg(uap->port.dev, &dmarx->scatter_b,
+			     1, DMA_FROM_DEVICE);
+		kfree(dmarx->rx_dma_buf_b);
+	}
+	if (dmarx->rx_dma_buf_a) {
+		dma_unmap_sg(uap->port.dev, &dmarx->scatter_a,
+			     1, DMA_FROM_DEVICE);
+		kfree(dmarx->rx_dma_buf_a);
+	}
+}
+
+/* Forward declare this for the refill routine */
+static int pl011_dma_tx_refill(struct uart_amba_port *uap);
+
+/*
+ * Move the tail when this IRQ occurs, if not empty refill and
+ * fire another transaction
+ */
+static void pl011_dma_tx_callback(void *data)
+{
+	struct uart_amba_port *uap = data;
+	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
+	struct circ_buf *xmit = &uap->port.state->xmit;
+	u16 val;
+	int ret;
+
+	/* Temporarily disable TX DMA */
+	val = readw(uap->port.membase + UART011_DMACR);
+	val &= ~(UART011_TXDMAE);
+	writew(val, uap->port.membase + UART011_DMACR);
+
+	/* Refill the TX if the buffer is not empty */
+	if (!uart_circ_empty(xmit)) {
+		ret = pl011_dma_tx_refill(uap);
+		if (ret == -EBUSY)
+			/*
+			 * If DMA cannot be used right now, we complete this
+			 * transaction and let the TTY layer retry. If the
+			 * firs following xfer fails to set up for DMA, it
+			 * will fall through to interrupt mode.
+			 */
+			dev_dbg(uap->port.dev, "DMA busy\n");
+	} else {
+		complete(&dmatx->complete);
+	}
+}
+
+static int pl011_dma_tx_refill(struct uart_amba_port *uap)
+{
+	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
+	struct dma_chan *chan = uap->dma_tx_channel;
+	struct dma_async_tx_descriptor *desc;
+	struct circ_buf *xmit = &uap->port.state->xmit;
+	unsigned int count;
+	unsigned long flags;
+	u16 val;
+
+	/* Don't bother about using DMA on XON/XOFF */
+	if (uap->port.x_char) {
+		/* If we can't get it into the FIFO, retry later */
+		if (readw(uap->port.membase + UART01x_FR) &
+		    UART01x_FR_TXFF) {
+			complete(&dmatx->complete);
+			return 0;
+		}
+		writew(uap->port.x_char, uap->port.membase + UART01x_DR);
+		uap->port.icount.tx++;
+		uap->port.x_char = 0;
+		complete(&dmatx->complete);
+		return 0;
+	}
+
+	/*
+	 * Try to avoid the overhead involved in using DMA if the
+	 * transaction fits in the first half of the FIFO and it's not
+	 * full. Unfortunately there is only one single bit in the
+	 * hardware to tell whether the FIFO is full or not, so
+	 * we don't know exactly how many chars we can fit in.
+	 */
+	if (uart_circ_chars_pending(xmit) < (uap->fifosize >> 1)) {
+		while (uart_circ_chars_pending(xmit)) {
+			if (readw(uap->port.membase + UART01x_FR) &
+			    UART01x_FR_TXFF) {
+				/*
+				 * Ooops TX FIFO is full, we'd better stop
+				 * this. Let's enable TX interrupt here to get
+				 * informed when there is again some space in
+				 * the TX FIFO so we can continue the transfer.
+				 * This interrupt will be cleared just before
+				 * setting up DMA, as it could interfere with
+				 * TX interrupt handling routine.
+				 */
+				uap->im |= UART011_TXIM;
+				writew(uap->im,
+				       uap->port.membase + UART011_IMSC);
+				break;
+			}
+			writew(xmit->buf[xmit->tail],
+			       uap->port.membase + UART01x_DR);
+			uap->port.icount.tx++;
+			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		}
+		complete(&dmatx->complete);
+		return 0;
+	}
+
+	/*
+	 * Clear TX interrupt to be sure that DMA will not interfere with
+	 * TX ISR
+	 */
+	local_irq_save(flags);
+	uap->im &= ~UART011_TXIM;
+	writew(uap->im, uap->port.membase + UART011_IMSC);
+	local_irq_restore(flags);
+
+	/* Sync the buffer for the CPU so we can write into it */
+	dma_sync_sg_for_cpu(uap->port.dev,
+			    &dmatx->scatter,
+			    1,
+			    DMA_TO_DEVICE);
+
+	/* Else proceed to copy the TX chars to the DMA buffer and fire DMA */
+	count = uart_circ_chars_pending(xmit);
+	if (count > PL011_DMA_BUFFER_SIZE)
+		count = PL011_DMA_BUFFER_SIZE;
+
+	if (xmit->tail < xmit->head)
+		memcpy(&dmatx->tx_dma_buf[0], &xmit->buf[xmit->tail], count);
+	else {
+		size_t first = UART_XMIT_SIZE - xmit->tail;
+		size_t second = xmit->head;
+
+		memcpy(&dmatx->tx_dma_buf[0], &xmit->buf[xmit->tail], first);
+		memcpy(&dmatx->tx_dma_buf[first], &xmit->buf[0], second);
+	}
+
+	dmatx->scatter.length = count;
+
+	/* Synchronize the scatterlist, invalidate buffers, caches etc */
+	dma_sync_sg_for_device(uap->port.dev,
+			       &dmatx->scatter,
+			       1,
+			       DMA_TO_DEVICE);
+
+	/* Prepare the scatterlist */
+	desc = chan->device->device_prep_slave_sg(chan,
+						  &dmatx->scatter,
+						  1,
+						  DMA_TO_DEVICE,
+						  DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		/* "Complete" DMA (errorpath) */
+		complete(&dmatx->complete);
+		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+		return -EBUSY;
+	}
+
+	/* Some data to go along to the callback */
+	desc->callback = pl011_dma_tx_callback;
+	desc->callback_param = uap;
+
+	/* Here is where overloaded DMA controllers can fail */
+	dmatx->cookie = desc->tx_submit(desc);
+	if (dma_submit_error(dmatx->cookie)) {
+		/* "Complete" DMA (errorpath) */
+		complete(&dmatx->complete);
+		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+		return dmatx->cookie;
+	}
+
+	/*
+	 * Now we know that DMA will fire, so advance the ring buffer
+	 * with the stuff we just dispatched
+	 */
+	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
+	uap->port.icount.tx += count;
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&uap->port);
+
+	/* Fire the DMA transaction */
+	chan->device->device_issue_pending(chan);
+
+	val = readw(uap->port.membase + UART011_DMACR);
+	val |= UART011_TXDMAE;
+	writew(val, uap->port.membase + UART011_DMACR);
+	return 0;
+}
+
+static void pl011_dma_rx_callback(void *data);
+
+static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
+{
+	struct dma_chan *rxchan = uap->dma_rx_channel;
+	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *scatter = dmarx->use_buffer_b ?
+		&dmarx->scatter_b : &dmarx->scatter_a;
+	u16 val;
+
+	/* Start the RX DMA job */
+	desc = rxchan->device->device_prep_slave_sg(rxchan,
+						    scatter,
+						    1,
+						    DMA_FROM_DEVICE,
+						    DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	/*
+	 * If the DMA engine is busy and cannot prepare a
+	 * channel, no big deal, the driver will fall back
+	 * to interrupt mode as a result of this error code.
+	 */
+	if (!desc) {
+		uap->rx_dma_running = false;
+		rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
+		return -EBUSY;
+	}
+
+	/* Some data to go along to the callback */
+	desc->callback = pl011_dma_rx_callback;
+	desc->callback_param = uap;
+	/* This is another point where an overloaded engine can fail */
+	dmarx->cookie = desc->tx_submit(desc);
+	if (dma_submit_error(dmarx->cookie)) {
+		uap->rx_dma_running = false;
+		rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
+		return -EBUSY;
+	}
+
+	rxchan->device->device_issue_pending(rxchan);
+
+	val = readw(uap->port.membase + UART011_DMACR);
+	val |= UART011_RXDMAE;
+	writew(val, uap->port.membase + UART011_DMACR);
+	uap->rx_dma_running = true;
+
+	return 0;
+}
+
+/*
+ * This is called when either the DMA job is complete, or
+ * the FIFO timeout interrupt occurred. This must be called
+ * with the port spinlock uap->port.lock held.
+ */
+static void pl011_dma_rx_chars(struct uart_amba_port *uap,
+			       u32 pending, bool use_buffer_b,
+			       bool readfifo)
+{
+	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
+	struct tty_struct *tty = uap->port.state->port.tty;
+	char *buf = use_buffer_b ? dmarx->rx_dma_buf_b : dmarx->rx_dma_buf_a;
+	struct scatterlist *scatter = use_buffer_b ?
+		&dmarx->scatter_b : &dmarx->scatter_a;
+	unsigned int status, ch, flag;
+	u32 count = pending;
+	u32 bufp = 0;
+	u32 fifotaken = 0; /* only used for vdbg() */
+
+	/* Sync in buffer */
+	dma_sync_sg_for_cpu(uap->port.dev,
+			    scatter,
+			    1,
+			    DMA_FROM_DEVICE);
+
+	status = readw(uap->port.membase + UART01x_FR);
+
+	/*
+	 * First take all chars in the DMA pipe, then look
+	 * in the FIFO. So loop while we have chars in the
+	 * DMA buffer or the FIFO. If we came here from a
+	 * DMA buffer full interrupt, there is already another
+	 * DMA job triggered to read the FIFO, so don't look
+	 * at it.
+	 */
+	while (count ||
+	       (readfifo && (status & UART01x_FR_RXFE) == 0)) {
+
+		flag = TTY_NORMAL;
+		uap->port.icount.rx++;
+
+		if (count) {
+			/* Take chars from the DMA buffer */
+			int inserted = tty_insert_flip_string(
+					uap->port.state->port.tty, buf, count);
+
+			/*
+			 * Check if insertion is successful to avoid
+			 * infinite loop. This can happen when TTY is full.
+			 */
+			if (unlikely(inserted == 0))
+				count = 0;
+			else {
+				count -= inserted;
+				bufp += inserted;
+			}
+			continue;
+		} else {
+			/* Take chars from the FIFO and update status */
+			ch = readw(uap->port.membase + UART01x_DR);
+			status = readw(uap->port.membase + UART01x_FR);
+			fifotaken++;
+
+			/*
+			 * Error conditions will only occur in the FIFO,
+			 * these will trigger an immediate interrupt and
+			 * stop the DMA job, so we will always find the
+			 * error in the FIFO, never in the DMA buffer.
+			 */
+			if (unlikely(ch & UART_DR_ERROR)) {
+				if (ch & UART011_DR_BE) {
+					ch &= ~(UART011_DR_FE | UART011_DR_PE);
+					uap->port.icount.brk++;
+					if (uart_handle_break(&uap->port))
+						continue;
+				} else if (ch & UART011_DR_PE)
+					uap->port.icount.parity++;
+				else if (ch & UART011_DR_FE)
+					uap->port.icount.frame++;
+				if (ch & UART011_DR_OE)
+					uap->port.icount.overrun++;
+
+				ch &= uap->port.read_status_mask;
+
+				if (ch & UART011_DR_BE)
+					flag = TTY_BREAK;
+				else if (ch & UART011_DR_PE)
+					flag = TTY_PARITY;
+				else if (ch & UART011_DR_FE)
+					flag = TTY_FRAME;
+			}
+		}
+
+		if (uart_handle_sysrq_char(&uap->port, ch & 255))
+			continue;
+
+		uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
+	}
+
+	spin_unlock(&uap->port.lock);
+	dev_vdbg(uap->port.dev,
+		 "Took %d chars from DMA buffer and %d chars from the FIFO\n",
+		 bufp, fifotaken);
+	tty_flip_buffer_push(tty);
+	spin_lock(&uap->port.lock);
+}
+
+static void pl011_dma_rx_irq(struct uart_amba_port *uap)
+{
+	struct dma_chan *rxchan = uap->dma_rx_channel;
+	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
+	struct scatterlist *scatter = dmarx->use_buffer_b ?
+		&dmarx->scatter_b : &dmarx->scatter_a;
+	u32 pending;
+	int ret;
+	struct dma_tx_state state;
+	enum dma_status dmastat;
+	u16 val;
+
+	/* Use PrimeCell DMA extensions to stop the transfer */
+	ret = rxchan->device->device_control(rxchan, DMA_PAUSE, 0);
+	if (ret)
+		dev_err(uap->port.dev, "unable to pause DMA transfer\n");
+	dmastat = rxchan->device->device_tx_status(rxchan,
+						   dmarx->cookie, &state);
+
+	/* Disable RX DMA temporarily */
+	val = readw(uap->port.membase + UART011_DMACR);
+	val &= ~(UART011_RXDMAE);
+	writew(val, uap->port.membase + UART011_DMACR);
+	uap->rx_dma_running = false;
+
+	if (dmastat != DMA_PAUSED)
+		dev_err(uap->port.dev, "unable to pause DMA transfer\n");
+	pending = scatter->length - state.residue;
+
+	BUG_ON(pending > PL011_DMA_BUFFER_SIZE);
+
+	ret = rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
+	if (ret)
+		dev_err(uap->port.dev, "unable to terminate DMA transfer\n");
+
+	/*
+	 * This will take the chars we have so far and insert
+	 * into the framework.
+	 */
+	pl011_dma_rx_chars(uap, pending, dmarx->use_buffer_b, true);
+
+	/* Switch buffer & re-trigger DMA job */
+	dmarx->use_buffer_b = !dmarx->use_buffer_b;
+	ret = pl011_dma_rx_trigger_dma(uap);
+	if (ret) {
+		dev_dbg(uap->port.dev, "could not retrigger RX DMA job "
+			"fall back to interrupt mode\n");
+		uap->im |= UART011_RXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+	}
+}
+
+static void pl011_dma_rx_callback(void *data)
+{
+	struct uart_amba_port *uap = data;
+	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
+	bool lastbuf = dmarx->use_buffer_b;
+	int ret;
+
+	/*
+	 * This completion interrupt occurs typically when the
+	 * RX buffer is totally stuffed but no timeout has yet
+	 * occurred. When that happens, we just want the RX
+	 * routine to flush out the secondary DMA buffer while
+	 * we immediately trigger the next DMA job.
+	 */
+	uap->rx_dma_running = false;
+	dmarx->use_buffer_b = !lastbuf;
+	ret = pl011_dma_rx_trigger_dma(uap);
+
+	spin_lock_irq(&uap->port.lock);
+	pl011_dma_rx_chars(uap, PL011_DMA_BUFFER_SIZE, lastbuf, false);
+	spin_unlock_irq(&uap->port.lock);
+	/*
+	 * Do this check after we picked the DMA chars so we don't
+	 * get some IRQ immediately from RX.
+	 */
+	if (ret) {
+		dev_dbg(uap->port.dev, "could not retrigger RX DMA job "
+			"fall back to interrupt mode\n");
+		uap->im |= UART011_RXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+	}
+}
+
+static void pl011_st_dma_startup(struct uart_amba_port *uap)
+{
+	/* Set DMABREQ threshold */
+	writew(ST_UART011_DMAWM_RX_16 | ST_UART011_DMAWM_TX_16,
+	       uap->port.membase + ST_UART011_DMAWM);
+}
+
+static void pl011_dma_startup(struct uart_amba_port *uap)
+{
+	u16 val;
+	int ret = 0;
+
+	if (!uap->enable_dma)
+		return;
+
+	/* Turn on DMA error (RX/TX will be enabled on demand) */
+	val = readw(uap->port.membase + UART011_DMACR);
+	val |= UART011_DMAONERR;
+	writew(val, uap->port.membase + UART011_DMACR);
+
+	/* call vendor specific dma init */
+	if (uap->dma_init)
+		uap->dma_init(uap);
+
+	ret = pl011_dma_rx_trigger_dma(uap);
+	if (ret)
+		dev_dbg(uap->port.dev, "could not trigger initial "
+			"RX DMA job, fall back to interrupt mode\n");
+}
+
+static void pl011_dma_shutdown(struct uart_amba_port *uap)
+{
+	struct dma_chan *rxchan = uap->dma_rx_channel;
+	struct dma_chan *txchan = uap->dma_tx_channel;
+	u16 val;
+
+	if (!uap->enable_dma)
+		return;
+
+	/* Disable RX and TX DMA */
+	while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
+		barrier();
+	val = readw(uap->port.membase + UART011_DMACR);
+	val &= ~(UART011_DMAONERR | UART011_RXDMAE | UART011_TXDMAE);
+	writew(val, uap->port.membase + UART011_DMACR);
+	/* Terminate any RX and TX DMA jobs */
+	rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
+	txchan->device->device_control(txchan, DMA_TERMINATE_ALL, 0);
+}
+
+static int pl011_dma_tx_chars(struct uart_amba_port *uap)
+{
+	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
+
+	/* Try to wait for completion, return if something is in progress */
+	if (!try_wait_for_completion(&dmatx->complete))
+		return -EINPROGRESS;
+
+	/* Set up and fire the DMA job */
+	init_completion(&dmatx->complete);
+	return pl011_dma_tx_refill(uap);
+}
+
+#else
+/* Blank functions if the DMA engine is not available */
+static inline void pl011_dma_probe(struct uart_amba_port *uap)
+{
+}
+
+static inline void pl011_dma_remove(struct uart_amba_port *uap)
+{
+}
+
+static inline void pl011_dma_rx_irq(struct uart_amba_port *uap)
+{
+}
+
+static inline int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
+{
+	return -EIO;
+}
+
+static inline void pl011_dma_startup(struct uart_amba_port *uap)
+{
+}
+
+static inline void pl011_dma_shutdown(struct uart_amba_port *uap)
+{
+}
+
+static inline int pl011_dma_tx_chars(struct uart_amba_port *uap)
+{
+	return -EIO;
+}
+#endif
+
+
 static void pl011_stop_tx(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
@@ -111,10 +883,18 @@ static void pl011_stop_tx(struct uart_port *port)
 	writew(uap->im, uap->port.membase + UART011_IMSC);
 }
 
+static void pl011_tx_chars(struct uart_amba_port *uap);
+
 static void pl011_start_tx(struct uart_port *port)
 {
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
 
+	if (uap->enable_dma) {
+		/* Immediately push out chars in DMA mode */
+		pl011_tx_chars(uap);
+		return;
+	}
+	/* In interrupt mode, let the interrupt pull chars */
 	uap->im |= UART011_TXIM;
 	writew(uap->im, uap->port.membase + UART011_IMSC);
 }
@@ -140,6 +920,7 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
 {
 	struct tty_struct *tty = uap->port.state->port.tty;
 	unsigned int status, ch, flag, max_count = 256;
+	int ret;
 
 	status = readw(uap->port.membase + UART01x_FR);
 	while ((status & UART01x_FR_RXFE) == 0 && max_count--) {
@@ -184,6 +965,21 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
 	}
 	spin_unlock(&uap->port.lock);
 	tty_flip_buffer_push(tty);
+	/*
+	 * If we were temporarily out of DMA mode for a while,
+	 * attempt to switch back to DMA mode again.
+	 */
+	if (uap->enable_dma) {
+		uap->im &= ~UART011_RXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+		ret = pl011_dma_rx_trigger_dma(uap);
+		if (ret) {
+			dev_dbg(uap->port.dev, "could not trigger RX DMA job "
+				"fall back to interrupt mode again\n");
+			uap->im |= UART011_RXIM;
+			writew(uap->im, uap->port.membase + UART011_IMSC);
+		}
+	}
 	spin_lock(&uap->port.lock);
 }
 
@@ -192,6 +988,25 @@ static void pl011_tx_chars(struct uart_amba_port *uap)
 	struct circ_buf *xmit = &uap->port.state->xmit;
 	int count;
 
+	if (uap->enable_dma) {
+		int ret;
+
+		ret = pl011_dma_tx_chars(uap);
+		if (!ret)
+			return;
+		if (ret == -EINPROGRESS)
+			return;
+
+		/*
+		 * On any other error (including -EBUSY which is emitted
+		 * in case the DMA engine is out of physical channels
+		 * for example) we fall through to interrupt mode
+		 */
+		dev_dbg(uap->port.dev, "DMA unavailable for TX\n");
+		uap->im |= UART011_TXIM;
+		writew(uap->im, uap->port.membase + UART011_IMSC);
+	}
+
 	if (uap->port.x_char) {
 		writew(uap->port.x_char, uap->port.membase + UART01x_DR);
 		uap->port.icount.tx++;
@@ -203,8 +1018,10 @@ static void pl011_tx_chars(struct uart_amba_port *uap)
 		return;
 	}
 
-	count = uap->port.fifosize >> 1;
+	count = uap->fifosize >> 1;
 	do {
+		if (readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF)
+			break;
 		writew(xmit->buf[xmit->tail], uap->port.membase + UART01x_DR);
 		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
 		uap->port.icount.tx++;
@@ -249,7 +1066,7 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
 	unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT;
 	int handled = 0;
 
-	spin_lock(&uap->port.lock);
+	spin_lock_irq(&uap->port.lock);
 
 	status = readw(uap->port.membase + UART011_MIS);
 	if (status) {
@@ -258,13 +1075,30 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
 					  UART011_RXIS),
 			       uap->port.membase + UART011_ICR);
 
-			if (status & (UART011_RTIS|UART011_RXIS))
-				pl011_rx_chars(uap);
+			if (status & (UART011_RTIS|UART011_RXIS)) {
+				if (uap->enable_dma && uap->rx_dma_running)
+					pl011_dma_rx_irq(uap);
+				else
+					pl011_rx_chars(uap);
+			}
 			if (status & (UART011_DSRMIS|UART011_DCDMIS|
 				      UART011_CTSMIS|UART011_RIMIS))
 				pl011_modem_status(uap);
-			if (status & UART011_TXIS)
+			if (status & UART011_TXIS) {
+				/*
+				 * When DMA is enabled we still use TX
+				 * interrupt to send small amounts of data,
+				 * and as a fallback when the DMA channel is
+				 * not available. This interrupt is cleared
+				 * here and will be enabled when it's needed.
+				 */
+				if (uap->enable_dma) {
+					uap->im &= ~UART011_TXIM;
+					writew(uap->im,
+					       uap->port.membase + UART011_IMSC);
+				}
 				pl011_tx_chars(uap);
+			}
 
 			if (pass_counter-- == 0)
 				break;
@@ -274,7 +1108,7 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
 		handled = 1;
 	}
 
-	spin_unlock(&uap->port.lock);
+	spin_unlock_irq(&uap->port.lock);
 
 	return IRQ_RETVAL(handled);
 }
@@ -423,16 +1257,28 @@ static int pl011_startup(struct uart_port *port)
 	cr = UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE;
 	writew(cr, uap->port.membase + UART011_CR);
 
+	/* Clear pending error interrupts*/
+	writew(0xFFFF & ~(UART011_TXIS | UART011_RTIS | UART011_RXIS),
+	       uap->port.membase + UART011_ICR);
+
 	/*
 	 * initialise the old status of the modem signals
 	 */
 	uap->old_status = readw(uap->port.membase + UART01x_FR) & UART01x_FR_MODEM_ANY;
 
+	/* Startup DMA */
+	pl011_dma_startup(uap);
+
 	/*
-	 * Finally, enable interrupts
+	 * Finally, enable interrupts, only timeouts when using DMA
+	 * if initial RX DMA job failed, start in interrupt mode
+	 * as well.
 	 */
 	spin_lock_irq(&uap->port.lock);
-	uap->im = UART011_RXIM | UART011_RTIM;
+	if (uap->enable_dma && uap->rx_dma_running)
+		uap->im = UART011_RTIM;
+	else
+		uap->im = UART011_RXIM | UART011_RTIM;
 	writew(uap->im, uap->port.membase + UART011_IMSC);
 	spin_unlock_irq(&uap->port.lock);
 
@@ -467,6 +1313,8 @@ static void pl011_shutdown(struct uart_port *port)
 	writew(0xffff, uap->port.membase + UART011_ICR);
 	spin_unlock_irq(&uap->port.lock);
 
+	pl011_dma_shutdown(uap);
+
 	/*
 	 * Free the interrupt
 	 */
@@ -532,7 +1380,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
 		if (!(termios->c_cflag & PARODD))
 			lcr_h |= UART01x_LCRH_EPS;
 	}
-	if (port->fifosize > 1)
+	if (uap->fifosize > 1)
 		lcr_h |= UART01x_LCRH_FEN;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -862,6 +1710,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	uap->lcrh_rx = vendor->lcrh_rx;
 	uap->lcrh_tx = vendor->lcrh_tx;
 	uap->oversampling = vendor->oversampling;
+	uap->dma_init = vendor->dma_init;
 	uap->port.dev = &dev->dev;
 	uap->port.mapbase = dev->res.start;
 	uap->port.membase = base;
@@ -871,6 +1720,8 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	uap->port.ops = &amba_pl011_pops;
 	uap->port.flags = UPF_BOOT_AUTOCONF;
 	uap->port.line = i;
+	uap->fifosize = vendor->fifosize;
+	pl011_dma_probe(uap);
 
 	amba_ports[i] = uap;
 
@@ -879,6 +1730,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
 	if (ret) {
 		amba_set_drvdata(dev, NULL);
 		amba_ports[i] = NULL;
+		pl011_dma_remove(uap);
 		clk_put(uap->clk);
  unmap:
 		iounmap(base);
@@ -902,6 +1754,7 @@ static int pl011_remove(struct amba_device *dev)
 		if (amba_ports[i] == uap)
 			amba_ports[i] = NULL;
 
+	pl011_dma_remove(uap);
 	iounmap(uap->port.membase);
 	clk_put(uap->clk);
 	kfree(uap);
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index 6021588..47c176c 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -113,6 +113,21 @@
 #define UART01x_LCRH_PEN	0x02
 #define UART01x_LCRH_BRK	0x01
 
+#define ST_UART011_DMAWM_RX_1	(0 << 3)
+#define ST_UART011_DMAWM_RX_2	(1 << 3)
+#define ST_UART011_DMAWM_RX_4	(2 << 3)
+#define ST_UART011_DMAWM_RX_8	(3 << 3)
+#define ST_UART011_DMAWM_RX_16	(4 << 3)
+#define ST_UART011_DMAWM_RX_32	(5 << 3)
+#define ST_UART011_DMAWM_RX_48	(6 << 3)
+#define ST_UART011_DMAWM_TX_1	0
+#define ST_UART011_DMAWM_TX_2	1
+#define ST_UART011_DMAWM_TX_4	2
+#define ST_UART011_DMAWM_TX_8	3
+#define ST_UART011_DMAWM_TX_16	4
+#define ST_UART011_DMAWM_TX_32	5
+#define ST_UART011_DMAWM_TX_48	6
+
 #define UART010_IIR_RTIS	0x08
 #define UART010_IIR_TIS		0x04
 #define UART010_IIR_RIS		0x02
@@ -180,6 +195,12 @@ struct amba_device; /* in uncompress this is included but amba/bus.h is not */
 struct amba_pl010_data {
 	void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
 };
+struct dma_chan;
+struct amba_pl011_data {
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
+};
 #endif
 
 #endif
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-10-06  9:32 [PATCH] ARM: add PrimeCell generic DMA to PL011 Linus Walleij
@ 2010-10-12 19:30 ` Russell King - ARM Linux
  2010-10-13  5:15   ` Linus Walleij
  2010-12-22 14:10 ` Russell King - ARM Linux
  1 sibling, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-10-12 19:30 UTC (permalink / raw)
  To: linux-arm-kernel

Linus,

As these patches depend on several other trees, it's difficult for me
to check them on my platforms, and I don't want my tree to break on
Realview/Versatile platforms without these dependents - it's one of
the platforms I'm using to check out SMP/highmem/memblock stuff.

Unless you can assure me that there will be no breakage through these
patches without having their dependencies merged first, I think these
will have to wait another cycle...

On Wed, Oct 06, 2010 at 11:32:06AM +0200, Linus Walleij wrote:
> This extends the PL011 UART driver with generic DMA engine support
> using the PrimeCell DMA engine interface.
> 
> Tested-by: Jerzy Kasenberg <jerzy.kasenberg@tieto.com>
> Tested-by: Grzegorz Sygieda <grzegorz.sygieda@tieto.com>
> Tested-by: Marcin Mielczarczyk <marcin.mielczarczyk@tieto.com>
> Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
> ---
> Changes from previous version that was in the patch set for
> PrimeCell DMA (I've lost count):
> 
> This adds support for the ST-Ericsson specific DMA watermarking
> via a vendor data-specified setup function, a design pattern
> likely to be useful for other users as well.
> ---
>  drivers/serial/amba-pl011.c |  871 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/amba/serial.h |   21 +
>  2 files changed, 883 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
> index 6ca7a44..d99fed0 100644
> --- a/drivers/serial/amba-pl011.c
> +++ b/drivers/serial/amba-pl011.c
> @@ -7,6 +7,7 @@
>   *
>   *  Copyright 1999 ARM Limited
>   *  Copyright (C) 2000 Deep Blue Solutions Ltd.
> + *  Copyright (C) 2010 ST-Ericsson SA
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -48,6 +49,11 @@
>  #include <linux/amba/serial.h>
>  #include <linux/clk.h>
>  #include <linux/slab.h>
> +#include <linux/dmaengine.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/scatterlist.h>
> +#include <linux/completion.h>
> +#include <linux/delay.h>
>  
>  #include <asm/io.h>
>  #include <asm/sizes.h>
> @@ -63,6 +69,27 @@
>  #define UART_DR_ERROR		(UART011_DR_OE|UART011_DR_BE|UART011_DR_PE|UART011_DR_FE)
>  #define UART_DUMMY_DR_RX	(1 << 16)
>  
> +struct uart_amba_port;
> +typedef	void (*dma_init_fn)(struct uart_amba_port *uap);
> +
> +/* Deals with DMA transactions */
> +struct pl011_dma_rx_transaction {
> +	struct completion complete;
> +	bool use_buffer_b;
> +	struct scatterlist scatter_a;
> +	struct scatterlist scatter_b;
> +	char *rx_dma_buf_a;
> +	char *rx_dma_buf_b;
> +	dma_cookie_t cookie;
> +};
> +
> +struct pl011_dma_tx_transaction {
> +	struct completion complete;
> +	struct scatterlist scatter;
> +	char *tx_dma_buf;
> +	dma_cookie_t cookie;
> +};
> +
>  /*
>   * We wrap our port structure around the generic uart_port.
>   */
> @@ -75,7 +102,18 @@ struct uart_amba_port {
>  	unsigned int		lcrh_tx;	/* vendor-specific */
>  	unsigned int		lcrh_rx;	/* vendor-specific */
>  	bool			oversampling;   /* vendor-specific */
> +	dma_init_fn		dma_init;	/* vendor-specific */
>  	bool			autorts;
> +	unsigned int		fifosize;
> +	/* DMA stuff */
> +	bool			enable_dma;
> +	bool			rx_dma_running;
> +#ifdef CONFIG_DMA_ENGINE
> +	struct dma_chan		*dma_rx_channel;
> +	struct dma_chan		*dma_tx_channel;
> +	struct pl011_dma_rx_transaction dmarx;
> +	struct pl011_dma_tx_transaction dmatx;
> +#endif
>  };
>  
>  /* There is by now at least one vendor with differing details, so handle it */
> @@ -85,6 +123,7 @@ struct vendor_data {
>  	unsigned int		lcrh_tx;
>  	unsigned int		lcrh_rx;
>  	bool			oversampling;
> +	dma_init_fn		dma_init;
>  };
>  
>  static struct vendor_data vendor_arm = {
> @@ -95,14 +134,747 @@ static struct vendor_data vendor_arm = {
>  	.oversampling		= false,
>  };
>  
> +static void pl011_st_dma_startup(struct uart_amba_port *uap);
> +
>  static struct vendor_data vendor_st = {
>  	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
>  	.fifosize		= 64,
>  	.lcrh_tx		= ST_UART011_LCRH_TX,
>  	.lcrh_rx		= ST_UART011_LCRH_RX,
>  	.oversampling		= true,
> +	.dma_init		= pl011_st_dma_startup,
>  };
>  
> +/*
> + * All the DMA operation mode stuff goes inside this ifdef.
> + * This assumes that you have a generic DMA device interface,
> + * no custom DMA interfaces are supported.
> + *
> + * If we had discardable probe() functions akin to
> + * platform_device_probe() in the PrimeCell/AMBA bus, we could
> + * discard most of this code after use, but since we haven't,
> + * we have to keep it all around.
> + */
> +#ifdef CONFIG_DMA_ENGINE
> +
> +#define PL011_DMA_BUFFER_SIZE PAGE_SIZE
> +
> +static void pl011_dma_probe_initcall(struct uart_amba_port *uap)
> +{
> +	/* DMA is the sole user of the platform data right now */
> +	struct amba_pl011_data *plat = uap->port.dev->platform_data;
> +	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
> +	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
> +	struct dma_slave_config rx_conf = {
> +		.src_addr = uap->port.mapbase + UART01x_DR,
> +		.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
> +		.direction = DMA_FROM_DEVICE,
> +		.src_maxburst = uap->fifosize >> 1,
> +	};
> +	struct dma_slave_config tx_conf = {
> +		.dst_addr = uap->port.mapbase + UART01x_DR,
> +		.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
> +		.direction = DMA_TO_DEVICE,
> +		.dst_maxburst = uap->fifosize >> 1,
> +	};
> +	dma_cap_mask_t mask;
> +	int sglen;
> +
> +	/* We need platform data */
> +	if (!plat) {
> +		dev_err(uap->port.dev, "no DMA platform data!\n");
> +		return;
> +	}
> +
> +	/* Try to acquire a generic DMA engine slave channel */
> +	dma_cap_zero(mask);
> +	dma_cap_set(DMA_SLAVE, mask);
> +
> +	/*
> +	 * We need both RX and TX channels to do DMA, else do none
> +	 * of them.
> +	 */
> +	uap->dma_rx_channel = dma_request_channel(mask,
> +						  plat->dma_filter,
> +						  plat->dma_rx_param);
> +	if (!uap->dma_rx_channel) {
> +		dev_err(uap->port.dev, "no RX DMA channel!\n");
> +		return;
> +	}
> +	uap->dma_rx_channel->device->device_control(uap->dma_rx_channel,
> +						    DMA_SLAVE_CONFIG,
> +						    (unsigned long) &rx_conf);
> +
> +	uap->dma_tx_channel = dma_request_channel(mask,
> +						  plat->dma_filter,
> +						  plat->dma_tx_param);
> +	if (!uap->dma_tx_channel) {
> +		dev_err(uap->port.dev, "no TX DMA channel!\n");
> +		goto err_no_txchan;
> +	}
> +	uap->dma_tx_channel->device->device_control(uap->dma_tx_channel,
> +						    DMA_SLAVE_CONFIG,
> +						    (unsigned long) &tx_conf);
> +
> +	/* Allocate DMA RX and TX buffers */
> +	dmarx->rx_dma_buf_a = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
> +	if (!dmarx->rx_dma_buf_a) {
> +		dev_err(uap->port.dev, "failed to allocate DMA RX buffer A\n");
> +		goto err_no_rxbuf_a;
> +	}
> +
> +	dmarx->rx_dma_buf_b = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
> +	if (!dmarx->rx_dma_buf_b) {
> +		dev_err(uap->port.dev, "failed to allocate DMA RX buffer B\n");
> +		goto err_no_rxbuf_b;
> +	}
> +
> +	dmatx->tx_dma_buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
> +	if (!dmatx->tx_dma_buf) {
> +		dev_err(uap->port.dev, "failed to allocate DMA TX buffer\n");
> +		goto err_no_txbuf;
> +	}
> +
> +	/* Provide single SG list with one item to the buffers */
> +	sg_init_one(&dmarx->scatter_a, dmarx->rx_dma_buf_a,
> +		    PL011_DMA_BUFFER_SIZE);
> +	sg_init_one(&dmarx->scatter_b, dmarx->rx_dma_buf_b,
> +		    PL011_DMA_BUFFER_SIZE);
> +	sg_init_one(&dmatx->scatter, dmatx->tx_dma_buf, PL011_DMA_BUFFER_SIZE);
> +
> +	/* Map DMA buffers */
> +	sglen = dma_map_sg(uap->port.dev, &dmarx->scatter_a,
> +			   1, DMA_FROM_DEVICE);
> +	if (sglen != 1)
> +		goto err_rx_sgmap_a;
> +
> +	sglen = dma_map_sg(uap->port.dev, &dmarx->scatter_b,
> +			   1, DMA_FROM_DEVICE);
> +	if (sglen != 1)
> +		goto err_rx_sgmap_b;
> +
> +	sglen = dma_map_sg(uap->port.dev, &dmatx->scatter,
> +			   1, DMA_TO_DEVICE);
> +	if (sglen != 1)
> +		goto err_tx_sgmap;
> +
> +	/* Initially we say the transfers are incomplete */
> +	init_completion(&uap->dmatx.complete);
> +	complete(&uap->dmatx.complete);
> +
> +	/* The DMA buffer is now the FIFO the TTY subsystem can use */
> +	uap->port.fifosize = PL011_DMA_BUFFER_SIZE;
> +
> +	uap->enable_dma = true;
> +	dev_info(uap->port.dev, "setup for DMA on RX %s, TX %s\n",
> +		 dma_chan_name(uap->dma_rx_channel),
> +		 dma_chan_name(uap->dma_tx_channel));
> +	return;
> +
> +err_tx_sgmap:
> +	dma_unmap_sg(uap->port.dev, &dmarx->scatter_b,
> +		     1, DMA_FROM_DEVICE);
> +err_rx_sgmap_b:
> +	dma_unmap_sg(uap->port.dev, &dmarx->scatter_a,
> +		     1, DMA_FROM_DEVICE);
> +err_rx_sgmap_a:
> +	kfree(dmatx->tx_dma_buf);
> +err_no_txbuf:
> +	kfree(dmarx->rx_dma_buf_b);
> +err_no_rxbuf_b:
> +	kfree(dmarx->rx_dma_buf_a);
> +err_no_rxbuf_a:
> +	dma_release_channel(uap->dma_tx_channel);
> +	uap->dma_tx_channel = NULL;
> +err_no_txchan:
> +	dma_release_channel(uap->dma_rx_channel);
> +	uap->dma_rx_channel = NULL;
> +	return;
> +}
> +
> +/*
> + * Stack up the UARTs and let the above initcall be done at
> + * device initcall time, because the serial driver is called as
> + * an arch initcall, and at this time the DMA subsystem is not yet
> + * registered. At this point the driver will switch over to using
> + * DMA where desired.
> + */
> +
> +struct dma_uap {
> +	struct list_head node;
> +	struct uart_amba_port *uap;
> +};
> +
> +struct list_head pl011_dma_uarts = LIST_HEAD_INIT(pl011_dma_uarts);
> +
> +static int pl011_dma_initcall(void)
> +{
> +	struct list_head *node, *tmp;
> +
> +	list_for_each_safe(node, tmp, &pl011_dma_uarts) {
> +		struct dma_uap *dmau = list_entry(node, struct dma_uap, node);
> +		pl011_dma_probe_initcall(dmau->uap);
> +		list_del(node);
> +		kfree(dmau);
> +	}
> +	return 0;
> +}
> +
> +device_initcall(pl011_dma_initcall);
> +
> +static void pl011_dma_probe(struct uart_amba_port *uap)
> +{
> +	struct dma_uap *dmau = kzalloc(sizeof(struct dma_uap), GFP_KERNEL);
> +
> +	if (dmau == NULL)
> +		return;
> +	dmau->uap = uap;
> +	list_add_tail(&dmau->node, &pl011_dma_uarts);
> +}
> +
> +static void pl011_dma_remove(struct uart_amba_port *uap)
> +{
> +	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
> +	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
> +
> +	/* TODO: remove the initcall if it has not yet executed */
> +	/* Unmap and free DMA buffers */
> +	if (uap->dma_rx_channel)
> +		dma_release_channel(uap->dma_rx_channel);
> +	if (uap->dma_tx_channel)
> +		dma_release_channel(uap->dma_tx_channel);
> +	if (dmatx->tx_dma_buf) {
> +		dma_unmap_sg(uap->port.dev, &dmatx->scatter,
> +			     1, DMA_TO_DEVICE);
> +		kfree(dmatx->tx_dma_buf);
> +	}
> +	if (dmarx->rx_dma_buf_b) {
> +		dma_unmap_sg(uap->port.dev, &dmarx->scatter_b,
> +			     1, DMA_FROM_DEVICE);
> +		kfree(dmarx->rx_dma_buf_b);
> +	}
> +	if (dmarx->rx_dma_buf_a) {
> +		dma_unmap_sg(uap->port.dev, &dmarx->scatter_a,
> +			     1, DMA_FROM_DEVICE);
> +		kfree(dmarx->rx_dma_buf_a);
> +	}
> +}
> +
> +/* Forward declare this for the refill routine */
> +static int pl011_dma_tx_refill(struct uart_amba_port *uap);
> +
> +/*
> + * Move the tail when this IRQ occurs, if not empty refill and
> + * fire another transaction
> + */
> +static void pl011_dma_tx_callback(void *data)
> +{
> +	struct uart_amba_port *uap = data;
> +	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
> +	struct circ_buf *xmit = &uap->port.state->xmit;
> +	u16 val;
> +	int ret;
> +
> +	/* Temporarily disable TX DMA */
> +	val = readw(uap->port.membase + UART011_DMACR);
> +	val &= ~(UART011_TXDMAE);
> +	writew(val, uap->port.membase + UART011_DMACR);
> +
> +	/* Refill the TX if the buffer is not empty */
> +	if (!uart_circ_empty(xmit)) {
> +		ret = pl011_dma_tx_refill(uap);
> +		if (ret == -EBUSY)
> +			/*
> +			 * If DMA cannot be used right now, we complete this
> +			 * transaction and let the TTY layer retry. If the
> +			 * firs following xfer fails to set up for DMA, it
> +			 * will fall through to interrupt mode.
> +			 */
> +			dev_dbg(uap->port.dev, "DMA busy\n");
> +	} else {
> +		complete(&dmatx->complete);
> +	}
> +}
> +
> +static int pl011_dma_tx_refill(struct uart_amba_port *uap)
> +{
> +	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
> +	struct dma_chan *chan = uap->dma_tx_channel;
> +	struct dma_async_tx_descriptor *desc;
> +	struct circ_buf *xmit = &uap->port.state->xmit;
> +	unsigned int count;
> +	unsigned long flags;
> +	u16 val;
> +
> +	/* Don't bother about using DMA on XON/XOFF */
> +	if (uap->port.x_char) {
> +		/* If we can't get it into the FIFO, retry later */
> +		if (readw(uap->port.membase + UART01x_FR) &
> +		    UART01x_FR_TXFF) {
> +			complete(&dmatx->complete);
> +			return 0;
> +		}
> +		writew(uap->port.x_char, uap->port.membase + UART01x_DR);
> +		uap->port.icount.tx++;
> +		uap->port.x_char = 0;
> +		complete(&dmatx->complete);
> +		return 0;
> +	}
> +
> +	/*
> +	 * Try to avoid the overhead involved in using DMA if the
> +	 * transaction fits in the first half of the FIFO and it's not
> +	 * full. Unfortunately there is only one single bit in the
> +	 * hardware to tell whether the FIFO is full or not, so
> +	 * we don't know exactly how many chars we can fit in.
> +	 */
> +	if (uart_circ_chars_pending(xmit) < (uap->fifosize >> 1)) {
> +		while (uart_circ_chars_pending(xmit)) {
> +			if (readw(uap->port.membase + UART01x_FR) &
> +			    UART01x_FR_TXFF) {
> +				/*
> +				 * Ooops TX FIFO is full, we'd better stop
> +				 * this. Let's enable TX interrupt here to get
> +				 * informed when there is again some space in
> +				 * the TX FIFO so we can continue the transfer.
> +				 * This interrupt will be cleared just before
> +				 * setting up DMA, as it could interfere with
> +				 * TX interrupt handling routine.
> +				 */
> +				uap->im |= UART011_TXIM;
> +				writew(uap->im,
> +				       uap->port.membase + UART011_IMSC);
> +				break;
> +			}
> +			writew(xmit->buf[xmit->tail],
> +			       uap->port.membase + UART01x_DR);
> +			uap->port.icount.tx++;
> +			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
> +		}
> +		complete(&dmatx->complete);
> +		return 0;
> +	}
> +
> +	/*
> +	 * Clear TX interrupt to be sure that DMA will not interfere with
> +	 * TX ISR
> +	 */
> +	local_irq_save(flags);
> +	uap->im &= ~UART011_TXIM;
> +	writew(uap->im, uap->port.membase + UART011_IMSC);
> +	local_irq_restore(flags);
> +
> +	/* Sync the buffer for the CPU so we can write into it */
> +	dma_sync_sg_for_cpu(uap->port.dev,
> +			    &dmatx->scatter,
> +			    1,
> +			    DMA_TO_DEVICE);
> +
> +	/* Else proceed to copy the TX chars to the DMA buffer and fire DMA */
> +	count = uart_circ_chars_pending(xmit);
> +	if (count > PL011_DMA_BUFFER_SIZE)
> +		count = PL011_DMA_BUFFER_SIZE;
> +
> +	if (xmit->tail < xmit->head)
> +		memcpy(&dmatx->tx_dma_buf[0], &xmit->buf[xmit->tail], count);
> +	else {
> +		size_t first = UART_XMIT_SIZE - xmit->tail;
> +		size_t second = xmit->head;
> +
> +		memcpy(&dmatx->tx_dma_buf[0], &xmit->buf[xmit->tail], first);
> +		memcpy(&dmatx->tx_dma_buf[first], &xmit->buf[0], second);
> +	}
> +
> +	dmatx->scatter.length = count;
> +
> +	/* Synchronize the scatterlist, invalidate buffers, caches etc */
> +	dma_sync_sg_for_device(uap->port.dev,
> +			       &dmatx->scatter,
> +			       1,
> +			       DMA_TO_DEVICE);
> +
> +	/* Prepare the scatterlist */
> +	desc = chan->device->device_prep_slave_sg(chan,
> +						  &dmatx->scatter,
> +						  1,
> +						  DMA_TO_DEVICE,
> +						  DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +	if (!desc) {
> +		/* "Complete" DMA (errorpath) */
> +		complete(&dmatx->complete);
> +		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
> +		return -EBUSY;
> +	}
> +
> +	/* Some data to go along to the callback */
> +	desc->callback = pl011_dma_tx_callback;
> +	desc->callback_param = uap;
> +
> +	/* Here is where overloaded DMA controllers can fail */
> +	dmatx->cookie = desc->tx_submit(desc);
> +	if (dma_submit_error(dmatx->cookie)) {
> +		/* "Complete" DMA (errorpath) */
> +		complete(&dmatx->complete);
> +		chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
> +		return dmatx->cookie;
> +	}
> +
> +	/*
> +	 * Now we know that DMA will fire, so advance the ring buffer
> +	 * with the stuff we just dispatched
> +	 */
> +	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
> +	uap->port.icount.tx += count;
> +	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
> +		uart_write_wakeup(&uap->port);
> +
> +	/* Fire the DMA transaction */
> +	chan->device->device_issue_pending(chan);
> +
> +	val = readw(uap->port.membase + UART011_DMACR);
> +	val |= UART011_TXDMAE;
> +	writew(val, uap->port.membase + UART011_DMACR);
> +	return 0;
> +}
> +
> +static void pl011_dma_rx_callback(void *data);
> +
> +static int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
> +{
> +	struct dma_chan *rxchan = uap->dma_rx_channel;
> +	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
> +	struct dma_async_tx_descriptor *desc;
> +	struct scatterlist *scatter = dmarx->use_buffer_b ?
> +		&dmarx->scatter_b : &dmarx->scatter_a;
> +	u16 val;
> +
> +	/* Start the RX DMA job */
> +	desc = rxchan->device->device_prep_slave_sg(rxchan,
> +						    scatter,
> +						    1,
> +						    DMA_FROM_DEVICE,
> +						    DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +	/*
> +	 * If the DMA engine is busy and cannot prepare a
> +	 * channel, no big deal, the driver will fall back
> +	 * to interrupt mode as a result of this error code.
> +	 */
> +	if (!desc) {
> +		uap->rx_dma_running = false;
> +		rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
> +		return -EBUSY;
> +	}
> +
> +	/* Some data to go along to the callback */
> +	desc->callback = pl011_dma_rx_callback;
> +	desc->callback_param = uap;
> +	/* This is another point where an overloaded engine can fail */
> +	dmarx->cookie = desc->tx_submit(desc);
> +	if (dma_submit_error(dmarx->cookie)) {
> +		uap->rx_dma_running = false;
> +		rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
> +		return -EBUSY;
> +	}
> +
> +	rxchan->device->device_issue_pending(rxchan);
> +
> +	val = readw(uap->port.membase + UART011_DMACR);
> +	val |= UART011_RXDMAE;
> +	writew(val, uap->port.membase + UART011_DMACR);
> +	uap->rx_dma_running = true;
> +
> +	return 0;
> +}
> +
> +/*
> + * This is called when either the DMA job is complete, or
> + * the FIFO timeout interrupt occurred. This must be called
> + * with the port spinlock uap->port.lock held.
> + */
> +static void pl011_dma_rx_chars(struct uart_amba_port *uap,
> +			       u32 pending, bool use_buffer_b,
> +			       bool readfifo)
> +{
> +	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
> +	struct tty_struct *tty = uap->port.state->port.tty;
> +	char *buf = use_buffer_b ? dmarx->rx_dma_buf_b : dmarx->rx_dma_buf_a;
> +	struct scatterlist *scatter = use_buffer_b ?
> +		&dmarx->scatter_b : &dmarx->scatter_a;
> +	unsigned int status, ch, flag;
> +	u32 count = pending;
> +	u32 bufp = 0;
> +	u32 fifotaken = 0; /* only used for vdbg() */
> +
> +	/* Sync in buffer */
> +	dma_sync_sg_for_cpu(uap->port.dev,
> +			    scatter,
> +			    1,
> +			    DMA_FROM_DEVICE);
> +
> +	status = readw(uap->port.membase + UART01x_FR);
> +
> +	/*
> +	 * First take all chars in the DMA pipe, then look
> +	 * in the FIFO. So loop while we have chars in the
> +	 * DMA buffer or the FIFO. If we came here from a
> +	 * DMA buffer full interrupt, there is already another
> +	 * DMA job triggered to read the FIFO, so don't look
> +	 * at it.
> +	 */
> +	while (count ||
> +	       (readfifo && (status & UART01x_FR_RXFE) == 0)) {
> +
> +		flag = TTY_NORMAL;
> +		uap->port.icount.rx++;
> +
> +		if (count) {
> +			/* Take chars from the DMA buffer */
> +			int inserted = tty_insert_flip_string(
> +					uap->port.state->port.tty, buf, count);
> +
> +			/*
> +			 * Check if insertion is successful to avoid
> +			 * infinite loop. This can happen when TTY is full.
> +			 */
> +			if (unlikely(inserted == 0))
> +				count = 0;
> +			else {
> +				count -= inserted;
> +				bufp += inserted;
> +			}
> +			continue;
> +		} else {
> +			/* Take chars from the FIFO and update status */
> +			ch = readw(uap->port.membase + UART01x_DR);
> +			status = readw(uap->port.membase + UART01x_FR);
> +			fifotaken++;
> +
> +			/*
> +			 * Error conditions will only occur in the FIFO,
> +			 * these will trigger an immediate interrupt and
> +			 * stop the DMA job, so we will always find the
> +			 * error in the FIFO, never in the DMA buffer.
> +			 */
> +			if (unlikely(ch & UART_DR_ERROR)) {
> +				if (ch & UART011_DR_BE) {
> +					ch &= ~(UART011_DR_FE | UART011_DR_PE);
> +					uap->port.icount.brk++;
> +					if (uart_handle_break(&uap->port))
> +						continue;
> +				} else if (ch & UART011_DR_PE)
> +					uap->port.icount.parity++;
> +				else if (ch & UART011_DR_FE)
> +					uap->port.icount.frame++;
> +				if (ch & UART011_DR_OE)
> +					uap->port.icount.overrun++;
> +
> +				ch &= uap->port.read_status_mask;
> +
> +				if (ch & UART011_DR_BE)
> +					flag = TTY_BREAK;
> +				else if (ch & UART011_DR_PE)
> +					flag = TTY_PARITY;
> +				else if (ch & UART011_DR_FE)
> +					flag = TTY_FRAME;
> +			}
> +		}
> +
> +		if (uart_handle_sysrq_char(&uap->port, ch & 255))
> +			continue;
> +
> +		uart_insert_char(&uap->port, ch, UART011_DR_OE, ch, flag);
> +	}
> +
> +	spin_unlock(&uap->port.lock);
> +	dev_vdbg(uap->port.dev,
> +		 "Took %d chars from DMA buffer and %d chars from the FIFO\n",
> +		 bufp, fifotaken);
> +	tty_flip_buffer_push(tty);
> +	spin_lock(&uap->port.lock);
> +}
> +
> +static void pl011_dma_rx_irq(struct uart_amba_port *uap)
> +{
> +	struct dma_chan *rxchan = uap->dma_rx_channel;
> +	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
> +	struct scatterlist *scatter = dmarx->use_buffer_b ?
> +		&dmarx->scatter_b : &dmarx->scatter_a;
> +	u32 pending;
> +	int ret;
> +	struct dma_tx_state state;
> +	enum dma_status dmastat;
> +	u16 val;
> +
> +	/* Use PrimeCell DMA extensions to stop the transfer */
> +	ret = rxchan->device->device_control(rxchan, DMA_PAUSE, 0);
> +	if (ret)
> +		dev_err(uap->port.dev, "unable to pause DMA transfer\n");
> +	dmastat = rxchan->device->device_tx_status(rxchan,
> +						   dmarx->cookie, &state);
> +
> +	/* Disable RX DMA temporarily */
> +	val = readw(uap->port.membase + UART011_DMACR);
> +	val &= ~(UART011_RXDMAE);
> +	writew(val, uap->port.membase + UART011_DMACR);
> +	uap->rx_dma_running = false;
> +
> +	if (dmastat != DMA_PAUSED)
> +		dev_err(uap->port.dev, "unable to pause DMA transfer\n");
> +	pending = scatter->length - state.residue;
> +
> +	BUG_ON(pending > PL011_DMA_BUFFER_SIZE);
> +
> +	ret = rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
> +	if (ret)
> +		dev_err(uap->port.dev, "unable to terminate DMA transfer\n");
> +
> +	/*
> +	 * This will take the chars we have so far and insert
> +	 * into the framework.
> +	 */
> +	pl011_dma_rx_chars(uap, pending, dmarx->use_buffer_b, true);
> +
> +	/* Switch buffer & re-trigger DMA job */
> +	dmarx->use_buffer_b = !dmarx->use_buffer_b;
> +	ret = pl011_dma_rx_trigger_dma(uap);
> +	if (ret) {
> +		dev_dbg(uap->port.dev, "could not retrigger RX DMA job "
> +			"fall back to interrupt mode\n");
> +		uap->im |= UART011_RXIM;
> +		writew(uap->im, uap->port.membase + UART011_IMSC);
> +	}
> +}
> +
> +static void pl011_dma_rx_callback(void *data)
> +{
> +	struct uart_amba_port *uap = data;
> +	struct pl011_dma_rx_transaction *dmarx = &uap->dmarx;
> +	bool lastbuf = dmarx->use_buffer_b;
> +	int ret;
> +
> +	/*
> +	 * This completion interrupt occurs typically when the
> +	 * RX buffer is totally stuffed but no timeout has yet
> +	 * occurred. When that happens, we just want the RX
> +	 * routine to flush out the secondary DMA buffer while
> +	 * we immediately trigger the next DMA job.
> +	 */
> +	uap->rx_dma_running = false;
> +	dmarx->use_buffer_b = !lastbuf;
> +	ret = pl011_dma_rx_trigger_dma(uap);
> +
> +	spin_lock_irq(&uap->port.lock);
> +	pl011_dma_rx_chars(uap, PL011_DMA_BUFFER_SIZE, lastbuf, false);
> +	spin_unlock_irq(&uap->port.lock);
> +	/*
> +	 * Do this check after we picked the DMA chars so we don't
> +	 * get some IRQ immediately from RX.
> +	 */
> +	if (ret) {
> +		dev_dbg(uap->port.dev, "could not retrigger RX DMA job "
> +			"fall back to interrupt mode\n");
> +		uap->im |= UART011_RXIM;
> +		writew(uap->im, uap->port.membase + UART011_IMSC);
> +	}
> +}
> +
> +static void pl011_st_dma_startup(struct uart_amba_port *uap)
> +{
> +	/* Set DMABREQ threshold */
> +	writew(ST_UART011_DMAWM_RX_16 | ST_UART011_DMAWM_TX_16,
> +	       uap->port.membase + ST_UART011_DMAWM);
> +}
> +
> +static void pl011_dma_startup(struct uart_amba_port *uap)
> +{
> +	u16 val;
> +	int ret = 0;
> +
> +	if (!uap->enable_dma)
> +		return;
> +
> +	/* Turn on DMA error (RX/TX will be enabled on demand) */
> +	val = readw(uap->port.membase + UART011_DMACR);
> +	val |= UART011_DMAONERR;
> +	writew(val, uap->port.membase + UART011_DMACR);
> +
> +	/* call vendor specific dma init */
> +	if (uap->dma_init)
> +		uap->dma_init(uap);
> +
> +	ret = pl011_dma_rx_trigger_dma(uap);
> +	if (ret)
> +		dev_dbg(uap->port.dev, "could not trigger initial "
> +			"RX DMA job, fall back to interrupt mode\n");
> +}
> +
> +static void pl011_dma_shutdown(struct uart_amba_port *uap)
> +{
> +	struct dma_chan *rxchan = uap->dma_rx_channel;
> +	struct dma_chan *txchan = uap->dma_tx_channel;
> +	u16 val;
> +
> +	if (!uap->enable_dma)
> +		return;
> +
> +	/* Disable RX and TX DMA */
> +	while (readw(uap->port.membase + UART01x_FR) & UART01x_FR_BUSY)
> +		barrier();
> +	val = readw(uap->port.membase + UART011_DMACR);
> +	val &= ~(UART011_DMAONERR | UART011_RXDMAE | UART011_TXDMAE);
> +	writew(val, uap->port.membase + UART011_DMACR);
> +	/* Terminate any RX and TX DMA jobs */
> +	rxchan->device->device_control(rxchan, DMA_TERMINATE_ALL, 0);
> +	txchan->device->device_control(txchan, DMA_TERMINATE_ALL, 0);
> +}
> +
> +static int pl011_dma_tx_chars(struct uart_amba_port *uap)
> +{
> +	struct pl011_dma_tx_transaction *dmatx = &uap->dmatx;
> +
> +	/* Try to wait for completion, return if something is in progress */
> +	if (!try_wait_for_completion(&dmatx->complete))
> +		return -EINPROGRESS;
> +
> +	/* Set up and fire the DMA job */
> +	init_completion(&dmatx->complete);
> +	return pl011_dma_tx_refill(uap);
> +}
> +
> +#else
> +/* Blank functions if the DMA engine is not available */
> +static inline void pl011_dma_probe(struct uart_amba_port *uap)
> +{
> +}
> +
> +static inline void pl011_dma_remove(struct uart_amba_port *uap)
> +{
> +}
> +
> +static inline void pl011_dma_rx_irq(struct uart_amba_port *uap)
> +{
> +}
> +
> +static inline int pl011_dma_rx_trigger_dma(struct uart_amba_port *uap)
> +{
> +	return -EIO;
> +}
> +
> +static inline void pl011_dma_startup(struct uart_amba_port *uap)
> +{
> +}
> +
> +static inline void pl011_dma_shutdown(struct uart_amba_port *uap)
> +{
> +}
> +
> +static inline int pl011_dma_tx_chars(struct uart_amba_port *uap)
> +{
> +	return -EIO;
> +}
> +#endif
> +
> +
>  static void pl011_stop_tx(struct uart_port *port)
>  {
>  	struct uart_amba_port *uap = (struct uart_amba_port *)port;
> @@ -111,10 +883,18 @@ static void pl011_stop_tx(struct uart_port *port)
>  	writew(uap->im, uap->port.membase + UART011_IMSC);
>  }
>  
> +static void pl011_tx_chars(struct uart_amba_port *uap);
> +
>  static void pl011_start_tx(struct uart_port *port)
>  {
>  	struct uart_amba_port *uap = (struct uart_amba_port *)port;
>  
> +	if (uap->enable_dma) {
> +		/* Immediately push out chars in DMA mode */
> +		pl011_tx_chars(uap);
> +		return;
> +	}
> +	/* In interrupt mode, let the interrupt pull chars */
>  	uap->im |= UART011_TXIM;
>  	writew(uap->im, uap->port.membase + UART011_IMSC);
>  }
> @@ -140,6 +920,7 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
>  {
>  	struct tty_struct *tty = uap->port.state->port.tty;
>  	unsigned int status, ch, flag, max_count = 256;
> +	int ret;
>  
>  	status = readw(uap->port.membase + UART01x_FR);
>  	while ((status & UART01x_FR_RXFE) == 0 && max_count--) {
> @@ -184,6 +965,21 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
>  	}
>  	spin_unlock(&uap->port.lock);
>  	tty_flip_buffer_push(tty);
> +	/*
> +	 * If we were temporarily out of DMA mode for a while,
> +	 * attempt to switch back to DMA mode again.
> +	 */
> +	if (uap->enable_dma) {
> +		uap->im &= ~UART011_RXIM;
> +		writew(uap->im, uap->port.membase + UART011_IMSC);
> +		ret = pl011_dma_rx_trigger_dma(uap);
> +		if (ret) {
> +			dev_dbg(uap->port.dev, "could not trigger RX DMA job "
> +				"fall back to interrupt mode again\n");
> +			uap->im |= UART011_RXIM;
> +			writew(uap->im, uap->port.membase + UART011_IMSC);
> +		}
> +	}
>  	spin_lock(&uap->port.lock);
>  }
>  
> @@ -192,6 +988,25 @@ static void pl011_tx_chars(struct uart_amba_port *uap)
>  	struct circ_buf *xmit = &uap->port.state->xmit;
>  	int count;
>  
> +	if (uap->enable_dma) {
> +		int ret;
> +
> +		ret = pl011_dma_tx_chars(uap);
> +		if (!ret)
> +			return;
> +		if (ret == -EINPROGRESS)
> +			return;
> +
> +		/*
> +		 * On any other error (including -EBUSY which is emitted
> +		 * in case the DMA engine is out of physical channels
> +		 * for example) we fall through to interrupt mode
> +		 */
> +		dev_dbg(uap->port.dev, "DMA unavailable for TX\n");
> +		uap->im |= UART011_TXIM;
> +		writew(uap->im, uap->port.membase + UART011_IMSC);
> +	}
> +
>  	if (uap->port.x_char) {
>  		writew(uap->port.x_char, uap->port.membase + UART01x_DR);
>  		uap->port.icount.tx++;
> @@ -203,8 +1018,10 @@ static void pl011_tx_chars(struct uart_amba_port *uap)
>  		return;
>  	}
>  
> -	count = uap->port.fifosize >> 1;
> +	count = uap->fifosize >> 1;
>  	do {
> +		if (readw(uap->port.membase + UART01x_FR) & UART01x_FR_TXFF)
> +			break;
>  		writew(xmit->buf[xmit->tail], uap->port.membase + UART01x_DR);
>  		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
>  		uap->port.icount.tx++;
> @@ -249,7 +1066,7 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
>  	unsigned int status, pass_counter = AMBA_ISR_PASS_LIMIT;
>  	int handled = 0;
>  
> -	spin_lock(&uap->port.lock);
> +	spin_lock_irq(&uap->port.lock);
>  
>  	status = readw(uap->port.membase + UART011_MIS);
>  	if (status) {
> @@ -258,13 +1075,30 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
>  					  UART011_RXIS),
>  			       uap->port.membase + UART011_ICR);
>  
> -			if (status & (UART011_RTIS|UART011_RXIS))
> -				pl011_rx_chars(uap);
> +			if (status & (UART011_RTIS|UART011_RXIS)) {
> +				if (uap->enable_dma && uap->rx_dma_running)
> +					pl011_dma_rx_irq(uap);
> +				else
> +					pl011_rx_chars(uap);
> +			}
>  			if (status & (UART011_DSRMIS|UART011_DCDMIS|
>  				      UART011_CTSMIS|UART011_RIMIS))
>  				pl011_modem_status(uap);
> -			if (status & UART011_TXIS)
> +			if (status & UART011_TXIS) {
> +				/*
> +				 * When DMA is enabled we still use TX
> +				 * interrupt to send small amounts of data,
> +				 * and as a fallback when the DMA channel is
> +				 * not available. This interrupt is cleared
> +				 * here and will be enabled when it's needed.
> +				 */
> +				if (uap->enable_dma) {
> +					uap->im &= ~UART011_TXIM;
> +					writew(uap->im,
> +					       uap->port.membase + UART011_IMSC);
> +				}
>  				pl011_tx_chars(uap);
> +			}
>  
>  			if (pass_counter-- == 0)
>  				break;
> @@ -274,7 +1108,7 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
>  		handled = 1;
>  	}
>  
> -	spin_unlock(&uap->port.lock);
> +	spin_unlock_irq(&uap->port.lock);
>  
>  	return IRQ_RETVAL(handled);
>  }
> @@ -423,16 +1257,28 @@ static int pl011_startup(struct uart_port *port)
>  	cr = UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE;
>  	writew(cr, uap->port.membase + UART011_CR);
>  
> +	/* Clear pending error interrupts*/
> +	writew(0xFFFF & ~(UART011_TXIS | UART011_RTIS | UART011_RXIS),
> +	       uap->port.membase + UART011_ICR);
> +
>  	/*
>  	 * initialise the old status of the modem signals
>  	 */
>  	uap->old_status = readw(uap->port.membase + UART01x_FR) & UART01x_FR_MODEM_ANY;
>  
> +	/* Startup DMA */
> +	pl011_dma_startup(uap);
> +
>  	/*
> -	 * Finally, enable interrupts
> +	 * Finally, enable interrupts, only timeouts when using DMA
> +	 * if initial RX DMA job failed, start in interrupt mode
> +	 * as well.
>  	 */
>  	spin_lock_irq(&uap->port.lock);
> -	uap->im = UART011_RXIM | UART011_RTIM;
> +	if (uap->enable_dma && uap->rx_dma_running)
> +		uap->im = UART011_RTIM;
> +	else
> +		uap->im = UART011_RXIM | UART011_RTIM;
>  	writew(uap->im, uap->port.membase + UART011_IMSC);
>  	spin_unlock_irq(&uap->port.lock);
>  
> @@ -467,6 +1313,8 @@ static void pl011_shutdown(struct uart_port *port)
>  	writew(0xffff, uap->port.membase + UART011_ICR);
>  	spin_unlock_irq(&uap->port.lock);
>  
> +	pl011_dma_shutdown(uap);
> +
>  	/*
>  	 * Free the interrupt
>  	 */
> @@ -532,7 +1380,7 @@ pl011_set_termios(struct uart_port *port, struct ktermios *termios,
>  		if (!(termios->c_cflag & PARODD))
>  			lcr_h |= UART01x_LCRH_EPS;
>  	}
> -	if (port->fifosize > 1)
> +	if (uap->fifosize > 1)
>  		lcr_h |= UART01x_LCRH_FEN;
>  
>  	spin_lock_irqsave(&port->lock, flags);
> @@ -862,6 +1710,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
>  	uap->lcrh_rx = vendor->lcrh_rx;
>  	uap->lcrh_tx = vendor->lcrh_tx;
>  	uap->oversampling = vendor->oversampling;
> +	uap->dma_init = vendor->dma_init;
>  	uap->port.dev = &dev->dev;
>  	uap->port.mapbase = dev->res.start;
>  	uap->port.membase = base;
> @@ -871,6 +1720,8 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
>  	uap->port.ops = &amba_pl011_pops;
>  	uap->port.flags = UPF_BOOT_AUTOCONF;
>  	uap->port.line = i;
> +	uap->fifosize = vendor->fifosize;
> +	pl011_dma_probe(uap);
>  
>  	amba_ports[i] = uap;
>  
> @@ -879,6 +1730,7 @@ static int pl011_probe(struct amba_device *dev, struct amba_id *id)
>  	if (ret) {
>  		amba_set_drvdata(dev, NULL);
>  		amba_ports[i] = NULL;
> +		pl011_dma_remove(uap);
>  		clk_put(uap->clk);
>   unmap:
>  		iounmap(base);
> @@ -902,6 +1754,7 @@ static int pl011_remove(struct amba_device *dev)
>  		if (amba_ports[i] == uap)
>  			amba_ports[i] = NULL;
>  
> +	pl011_dma_remove(uap);
>  	iounmap(uap->port.membase);
>  	clk_put(uap->clk);
>  	kfree(uap);
> diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
> index 6021588..47c176c 100644
> --- a/include/linux/amba/serial.h
> +++ b/include/linux/amba/serial.h
> @@ -113,6 +113,21 @@
>  #define UART01x_LCRH_PEN	0x02
>  #define UART01x_LCRH_BRK	0x01
>  
> +#define ST_UART011_DMAWM_RX_1	(0 << 3)
> +#define ST_UART011_DMAWM_RX_2	(1 << 3)
> +#define ST_UART011_DMAWM_RX_4	(2 << 3)
> +#define ST_UART011_DMAWM_RX_8	(3 << 3)
> +#define ST_UART011_DMAWM_RX_16	(4 << 3)
> +#define ST_UART011_DMAWM_RX_32	(5 << 3)
> +#define ST_UART011_DMAWM_RX_48	(6 << 3)
> +#define ST_UART011_DMAWM_TX_1	0
> +#define ST_UART011_DMAWM_TX_2	1
> +#define ST_UART011_DMAWM_TX_4	2
> +#define ST_UART011_DMAWM_TX_8	3
> +#define ST_UART011_DMAWM_TX_16	4
> +#define ST_UART011_DMAWM_TX_32	5
> +#define ST_UART011_DMAWM_TX_48	6
> +
>  #define UART010_IIR_RTIS	0x08
>  #define UART010_IIR_TIS		0x04
>  #define UART010_IIR_RIS		0x02
> @@ -180,6 +195,12 @@ struct amba_device; /* in uncompress this is included but amba/bus.h is not */
>  struct amba_pl010_data {
>  	void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
>  };
> +struct dma_chan;
> +struct amba_pl011_data {
> +	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
> +	void *dma_rx_param;
> +	void *dma_tx_param;
> +};
>  #endif
>  
>  #endif
> -- 
> 1.6.3.3
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-10-12 19:30 ` Russell King - ARM Linux
@ 2010-10-13  5:15   ` Linus Walleij
  0 siblings, 0 replies; 11+ messages in thread
From: Linus Walleij @ 2010-10-13  5:15 UTC (permalink / raw)
  To: linux-arm-kernel

2010/10/12 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> Unless you can assure me that there will be no breakage through these
> patches without having their dependencies merged first, I think these
> will have to wait another cycle...

It's OK Russell I have all patience in the world :-)

We ended up in a bit of cross-subsystem hell and such things
happen.

We'll have the PL08x driver and other necessary bits in by 2.6.37 and
then we can perhaps look at these just after that merge window.
Then the PL022 that is not in as wide use as the others and which is
still marked experimental is merged, so we'll use that thing as a
guninea pig, if it doesn't blow up it's probably safe to merge the
others as well.

If we then merge them and they seem to work my plan is to move the
DMA channel assignments for the drivers into the struct amba_device
as the final step.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-10-06  9:32 [PATCH] ARM: add PrimeCell generic DMA to PL011 Linus Walleij
  2010-10-12 19:30 ` Russell King - ARM Linux
@ 2010-12-22 14:10 ` Russell King - ARM Linux
  2010-12-22 17:09   ` Linus Walleij
  1 sibling, 1 reply; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-12-22 14:10 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 06, 2010 at 11:32:06AM +0200, Linus Walleij wrote:
>  	/*
> -	 * Finally, enable interrupts
> +	 * Finally, enable interrupts, only timeouts when using DMA
> +	 * if initial RX DMA job failed, start in interrupt mode
> +	 * as well.
>  	 */
>  	spin_lock_irq(&uap->port.lock);
> -	uap->im = UART011_RXIM | UART011_RTIM;
> +	if (uap->enable_dma && uap->rx_dma_running)
> +		uap->im = UART011_RTIM;
> +	else
> +		uap->im = UART011_RXIM | UART011_RTIM;
>  	writew(uap->im, uap->port.membase + UART011_IMSC);

There's a big hole here, using DMA on the receive path.

| 3.4.4 UARTRTINTR
| The receive timeout interrupt is asserted when the receive FIFO is not
| empty, and no further data is received over a 32-bit period. The receive
| timeout interrupt is cleared either when the FIFO becomes empty through
| reading all the data (or by reading the holding register), or when a 1
| is written to the corresponding bit of the UARTICR register.

There must be unread data in the FIFO for us to get a RT interrupt.  The
problem is that if we have DMA enabled, then the received characters are
read from the FIFO as soon as they appear, and so the FIFO becomes empty,
which negates the conditions required for the RT interrupt to be
generated.

Let's say I have a login prompt on the serial port.  I type 'root'.
The DMA reads the contents of the FIFO into memory, and causing it to be
emptied.  As there's no characters in the FIFO, we don't receive a RT
interrupt, so we don't transfer these characters into the TTY subsystem.
The only time that the input is received is after a further ~4092 newline
characters are submitted, at which point the DMA engine says the buffer
is complete, and calls the RX callback.  This sucks for interactive use
of the port, and probably prevents it being used for any kind of
sane protocol too.

I don't see any way to bring back interactivity while keeping RX DMA
support, so I don't think we can use DMA for the RX channel with these
UARTs.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 14:10 ` Russell King - ARM Linux
@ 2010-12-22 17:09   ` Linus Walleij
  2010-12-22 18:20     ` Russell King - ARM Linux
  2010-12-31 18:23     ` Russell King - ARM Linux
  0 siblings, 2 replies; 11+ messages in thread
From: Linus Walleij @ 2010-12-22 17:09 UTC (permalink / raw)
  To: linux-arm-kernel

2010/12/22 Russell King - ARM Linux <linux@arm.linux.org.uk>:
> On Wed, Oct 06, 2010 at 11:32:06AM +0200, Linus Walleij wrote:
>> ? ? ? /*
>> - ? ? ?* Finally, enable interrupts
>> + ? ? ?* Finally, enable interrupts, only timeouts when using DMA
>> + ? ? ?* if initial RX DMA job failed, start in interrupt mode
>> + ? ? ?* as well.
>> ? ? ? ?*/
>> ? ? ? spin_lock_irq(&uap->port.lock);
>> - ? ? uap->im = UART011_RXIM | UART011_RTIM;
>> + ? ? if (uap->enable_dma && uap->rx_dma_running)
>> + ? ? ? ? ? ? uap->im = UART011_RTIM;
>> + ? ? else
>> + ? ? ? ? ? ? uap->im = UART011_RXIM | UART011_RTIM;
>> ? ? ? writew(uap->im, uap->port.membase + UART011_IMSC);
>
> There's a big hole here, using DMA on the receive path.
>
> | 3.4.4 UARTRTINTR
> | The receive timeout interrupt is asserted when the receive FIFO is not
> | empty, and no further data is received over a 32-bit period. The receive
> | timeout interrupt is cleared either when the FIFO becomes empty through
> | reading all the data (or by reading the holding register), or when a 1
> | is written to the corresponding bit of the UARTICR register.
>
> There must be unread data in the FIFO for us to get a RT interrupt. ?The
> problem is that if we have DMA enabled, then the received characters are
> read from the FIFO as soon as they appear, and so the FIFO becomes empty,
> which negates the conditions required for the RT interrupt to be
> generated.

The DMA on the RX channel will not read single bytes at all,
instead it waits until it can retrieve a burst, which is set to half
a FIFO: src_maxburst = uap->fifosize >> 1,
(OK this is max burst, but of course the DMA engine will try
to configure as large transfers as possible, so in practice this
will be 8 bytes for an unmodified PL011.)

In the interactive case the RT interrupt is the most expected
execution path. If you type in a few characters
pl011_dma_rx_chars() will not recieve anything using DMA really,
it will get all characters from the FIFO after a RT interrupt.

If you feed the DMA real quick (e.g. try pasting a large buffer
into a terminal) it will run some DMA rounds of pages, then it
will stop with some characters in the FIFO, and then it will time
out, and the remaining characters are read out from the FIFO.
So the characters get in for all cases I can think of.

If it ain't working there is something more fishy to it...

> Let's say I have a login prompt on the serial port. ?I type 'root'.
> The DMA reads the contents of the FIFO into memory, and causing it to be
> emptied. ?As there's no characters in the FIFO, we don't receive a RT
> interrupt, so we don't transfer these characters into the TTY subsystem.
> The only time that the input is received is after a further ~4092 newline
> characters are submitted, at which point the DMA engine says the buffer
> is complete, and calls the RX callback. ?This sucks for interactive use
> of the port, and probably prevents it being used for any kind of
> sane protocol too.
>
> I don't see any way to bring back interactivity while keeping RX DMA
> support, so I don't think we can use DMA for the RX channel with these
> UARTs.

But I'm using this interactively on ux500, u300 (pure PL011) and
RealView PB11MPCore..?

There *may* be a corner case when you're transferring
an even multiple of the burst size though, then you can
*maybe* get into the situation you're describing. Such as
paste a buffer with n MOD 8 bytes, but I haven't
been able to provoke it :-/ (and no hardware here sadly).

In that case however, what we would could do is to have
some last fallback timer that will do what the RT interrupt
does if it doesn't eventually happen.

(Will comment more on the thread(s) soonish, just need to
put the kids to sleep.)

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 17:09   ` Linus Walleij
@ 2010-12-22 18:20     ` Russell King - ARM Linux
  2010-12-22 20:34       ` Russell King - ARM Linux
  2010-12-22 21:17       ` Linus Walleij
  2010-12-31 18:23     ` Russell King - ARM Linux
  1 sibling, 2 replies; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-12-22 18:20 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Dec 22, 2010 at 06:09:22PM +0100, Linus Walleij wrote:
> 2010/12/22 Russell King - ARM Linux <linux@arm.linux.org.uk>:
> > On Wed, Oct 06, 2010 at 11:32:06AM +0200, Linus Walleij wrote:
> >> ? ? ? /*
> >> - ? ? ?* Finally, enable interrupts
> >> + ? ? ?* Finally, enable interrupts, only timeouts when using DMA
> >> + ? ? ?* if initial RX DMA job failed, start in interrupt mode
> >> + ? ? ?* as well.
> >> ? ? ? ?*/
> >> ? ? ? spin_lock_irq(&uap->port.lock);
> >> - ? ? uap->im = UART011_RXIM | UART011_RTIM;
> >> + ? ? if (uap->enable_dma && uap->rx_dma_running)
> >> + ? ? ? ? ? ? uap->im = UART011_RTIM;
> >> + ? ? else
> >> + ? ? ? ? ? ? uap->im = UART011_RXIM | UART011_RTIM;
> >> ? ? ? writew(uap->im, uap->port.membase + UART011_IMSC);
> >
> > There's a big hole here, using DMA on the receive path.
> >
> > | 3.4.4 UARTRTINTR
> > | The receive timeout interrupt is asserted when the receive FIFO is not
> > | empty, and no further data is received over a 32-bit period. The receive
> > | timeout interrupt is cleared either when the FIFO becomes empty through
> > | reading all the data (or by reading the holding register), or when a 1
> > | is written to the corresponding bit of the UARTICR register.
> >
> > There must be unread data in the FIFO for us to get a RT interrupt. ?The
> > problem is that if we have DMA enabled, then the received characters are
> > read from the FIFO as soon as they appear, and so the FIFO becomes empty,
> > which negates the conditions required for the RT interrupt to be
> > generated.
> 
> The DMA on the RX channel will not read single bytes at all,
> instead it waits until it can retrieve a burst, which is set to half
> a FIFO: src_maxburst = uap->fifosize >> 1,

I refer you to section 2.7, which describes the DMA interface.

UARTRXDMASREQ
Single character DMA transfer request, asserted by the UART. For
receive, one character consists of up to 12 bits. This signal is asserted
when the receive FIFO contains at least one character.

UARTRXDMABREQ
Burst DMA transfer request, asserted by the UART. This signal is
asserted when the receive FIFO contains more characters than the
programmed watermark level. You can program the watermark level for
each FIFO through the UARTIFLS register.

If your DMA controller is using UARTRXDMASREQ, then you'll end up DMAing
single bytes as long as the FIFO contains any bytes - thereby causing
it to be emptied completely.  As 3.4.4 says, if the FIFO is empty, you
don't get the timeout.

The PB926 doesn't specify whether it connects the burst or single-
character DMA request signal to the DMA controller - from the behaviour
I observe, I suspect it's the single character request which is.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 18:20     ` Russell King - ARM Linux
@ 2010-12-22 20:34       ` Russell King - ARM Linux
  2010-12-22 20:52         ` Russell King - ARM Linux
  2010-12-22 21:23         ` Linus Walleij
  2010-12-22 21:17       ` Linus Walleij
  1 sibling, 2 replies; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-12-22 20:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Dec 22, 2010 at 06:20:41PM +0000, Russell King - ARM Linux wrote:
> On Wed, Dec 22, 2010 at 06:09:22PM +0100, Linus Walleij wrote:
> > 2010/12/22 Russell King - ARM Linux <linux@arm.linux.org.uk>:
> > > On Wed, Oct 06, 2010 at 11:32:06AM +0200, Linus Walleij wrote:
> > >> ? ? ? /*
> > >> - ? ? ?* Finally, enable interrupts
> > >> + ? ? ?* Finally, enable interrupts, only timeouts when using DMA
> > >> + ? ? ?* if initial RX DMA job failed, start in interrupt mode
> > >> + ? ? ?* as well.
> > >> ? ? ? ?*/
> > >> ? ? ? spin_lock_irq(&uap->port.lock);
> > >> - ? ? uap->im = UART011_RXIM | UART011_RTIM;
> > >> + ? ? if (uap->enable_dma && uap->rx_dma_running)
> > >> + ? ? ? ? ? ? uap->im = UART011_RTIM;
> > >> + ? ? else
> > >> + ? ? ? ? ? ? uap->im = UART011_RXIM | UART011_RTIM;
> > >> ? ? ? writew(uap->im, uap->port.membase + UART011_IMSC);
> > >
> > > There's a big hole here, using DMA on the receive path.
> > >
> > > | 3.4.4 UARTRTINTR
> > > | The receive timeout interrupt is asserted when the receive FIFO is not
> > > | empty, and no further data is received over a 32-bit period. The receive
> > > | timeout interrupt is cleared either when the FIFO becomes empty through
> > > | reading all the data (or by reading the holding register), or when a 1
> > > | is written to the corresponding bit of the UARTICR register.
> > >
> > > There must be unread data in the FIFO for us to get a RT interrupt. ?The
> > > problem is that if we have DMA enabled, then the received characters are
> > > read from the FIFO as soon as they appear, and so the FIFO becomes empty,
> > > which negates the conditions required for the RT interrupt to be
> > > generated.
> > 
> > The DMA on the RX channel will not read single bytes at all,
> > instead it waits until it can retrieve a burst, which is set to half
> > a FIFO: src_maxburst = uap->fifosize >> 1,
> 
> I refer you to section 2.7, which describes the DMA interface.
> 
> UARTRXDMASREQ
> Single character DMA transfer request, asserted by the UART. For
> receive, one character consists of up to 12 bits. This signal is asserted
> when the receive FIFO contains at least one character.
> 
> UARTRXDMABREQ
> Burst DMA transfer request, asserted by the UART. This signal is
> asserted when the receive FIFO contains more characters than the
> programmed watermark level. You can program the watermark level for
> each FIFO through the UARTIFLS register.
> 
> If your DMA controller is using UARTRXDMASREQ, then you'll end up DMAing
> single bytes as long as the FIFO contains any bytes - thereby causing
> it to be emptied completely.  As 3.4.4 says, if the FIFO is empty, you
> don't get the timeout.
> 
> The PB926 doesn't specify whether it connects the burst or single-
> character DMA request signal to the DMA controller - from the behaviour
> I observe, I suspect it's the single character request which is.

And here's what /proc/interrupts reports:

 12:         38         VIC  uart-pl011

Now, I send exactly one 'o' to the port:

 12:         38         VIC  uart-pl011

no interrupt received.  If I extend PL08x's debugfs output to include
a register dump (src, dest, lli, ctrl, conf):

0               uart0rx
                [101f1000 019c4007 0111c011 1a0123f9 0000d01d]

now if I send three characters:

0               uart0rx
                [101f1000 019c400a 0111c011 1a0123f6 0000d01d]

Note that the address has increased by three, and the count has
decremented by three.  This confirms that the DMA is reading the data as
the UART FIFO fills - rather than DMA waiting for the FIFO to partially
fill before starting a request.

This emptying of the RX UART FIFO of all received data prevents the RX
timeout condition being satisfied, which in turn prevents any reasonable
use of the UART port.

I think the answer here is to allow the platform to supply just the TX
DMA channel, so when a UART is wired up to use the single-character
request, RX DMA support can be omitted.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 20:34       ` Russell King - ARM Linux
@ 2010-12-22 20:52         ` Russell King - ARM Linux
  2010-12-22 21:23         ` Linus Walleij
  1 sibling, 0 replies; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-12-22 20:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Dec 22, 2010 at 08:34:39PM +0000, Russell King - ARM Linux wrote:
> On Wed, Dec 22, 2010 at 06:20:41PM +0000, Russell King - ARM Linux wrote:
> > On Wed, Dec 22, 2010 at 06:09:22PM +0100, Linus Walleij wrote:
> > > 2010/12/22 Russell King - ARM Linux <linux@arm.linux.org.uk>:
> > > > On Wed, Oct 06, 2010 at 11:32:06AM +0200, Linus Walleij wrote:
> > > >> ? ? ? /*
> > > >> - ? ? ?* Finally, enable interrupts
> > > >> + ? ? ?* Finally, enable interrupts, only timeouts when using DMA
> > > >> + ? ? ?* if initial RX DMA job failed, start in interrupt mode
> > > >> + ? ? ?* as well.
> > > >> ? ? ? ?*/
> > > >> ? ? ? spin_lock_irq(&uap->port.lock);
> > > >> - ? ? uap->im = UART011_RXIM | UART011_RTIM;
> > > >> + ? ? if (uap->enable_dma && uap->rx_dma_running)
> > > >> + ? ? ? ? ? ? uap->im = UART011_RTIM;
> > > >> + ? ? else
> > > >> + ? ? ? ? ? ? uap->im = UART011_RXIM | UART011_RTIM;
> > > >> ? ? ? writew(uap->im, uap->port.membase + UART011_IMSC);
> > > >
> > > > There's a big hole here, using DMA on the receive path.
> > > >
> > > > | 3.4.4 UARTRTINTR
> > > > | The receive timeout interrupt is asserted when the receive FIFO is not
> > > > | empty, and no further data is received over a 32-bit period. The receive
> > > > | timeout interrupt is cleared either when the FIFO becomes empty through
> > > > | reading all the data (or by reading the holding register), or when a 1
> > > > | is written to the corresponding bit of the UARTICR register.
> > > >
> > > > There must be unread data in the FIFO for us to get a RT interrupt. ?The
> > > > problem is that if we have DMA enabled, then the received characters are
> > > > read from the FIFO as soon as they appear, and so the FIFO becomes empty,
> > > > which negates the conditions required for the RT interrupt to be
> > > > generated.
> > > 
> > > The DMA on the RX channel will not read single bytes at all,
> > > instead it waits until it can retrieve a burst, which is set to half
> > > a FIFO: src_maxburst = uap->fifosize >> 1,
> > 
> > I refer you to section 2.7, which describes the DMA interface.
> > 
> > UARTRXDMASREQ
> > Single character DMA transfer request, asserted by the UART. For
> > receive, one character consists of up to 12 bits. This signal is asserted
> > when the receive FIFO contains at least one character.
> > 
> > UARTRXDMABREQ
> > Burst DMA transfer request, asserted by the UART. This signal is
> > asserted when the receive FIFO contains more characters than the
> > programmed watermark level. You can program the watermark level for
> > each FIFO through the UARTIFLS register.
> > 
> > If your DMA controller is using UARTRXDMASREQ, then you'll end up DMAing
> > single bytes as long as the FIFO contains any bytes - thereby causing
> > it to be emptied completely.  As 3.4.4 says, if the FIFO is empty, you
> > don't get the timeout.
> > 
> > The PB926 doesn't specify whether it connects the burst or single-
> > character DMA request signal to the DMA controller - from the behaviour
> > I observe, I suspect it's the single character request which is.
> 
> And here's what /proc/interrupts reports:
> 
>  12:         38         VIC  uart-pl011
> 
> Now, I send exactly one 'o' to the port:
> 
>  12:         38         VIC  uart-pl011
> 
> no interrupt received.  If I extend PL08x's debugfs output to include
> a register dump (src, dest, lli, ctrl, conf):
> 
> 0               uart0rx
>                 [101f1000 019c4007 0111c011 1a0123f9 0000d01d]
> 
> now if I send three characters:
> 
> 0               uart0rx
>                 [101f1000 019c400a 0111c011 1a0123f6 0000d01d]
> 
> Note that the address has increased by three, and the count has
> decremented by three.  This confirms that the DMA is reading the data as
> the UART FIFO fills - rather than DMA waiting for the FIFO to partially
> fill before starting a request.
> 
> This emptying of the RX UART FIFO of all received data prevents the RX
> timeout condition being satisfied, which in turn prevents any reasonable
> use of the UART port.
> 
> I think the answer here is to allow the platform to supply just the TX
> DMA channel, so when a UART is wired up to use the single-character
> request, RX DMA support can be omitted.

One final point on this: it _will_ work if you send a corrupted
character or break first - that sets an error interrupt flag, which
remains latched because the ICR is never written to clear them.
Could that also have something to do with why your setup works?

I'll continue hacking the RX DMA support around, but for the time
being, I've committed my revised TX DMA support in several patches
to my tree.  That won't work on its own because the mainline
amba-pl08x.c is rather functional on PB/926 - and of course it has
the deadlock issue.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 18:20     ` Russell King - ARM Linux
  2010-12-22 20:34       ` Russell King - ARM Linux
@ 2010-12-22 21:17       ` Linus Walleij
  1 sibling, 0 replies; 11+ messages in thread
From: Linus Walleij @ 2010-12-22 21:17 UTC (permalink / raw)
  To: linux-arm-kernel

2010/12/22 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> The PB926 doesn't specify whether it connects the burst or single-
> character DMA request signal to the DMA controller - from the behaviour
> I observe, I suspect it's the single character request which is.

Aha I suspect this is it... :-(

FYI I have repeatedly requested access to the PB926 board from
ARM but these can't even be bought any more, even if I had the
money. Thanks for testing at it!

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 20:34       ` Russell King - ARM Linux
  2010-12-22 20:52         ` Russell King - ARM Linux
@ 2010-12-22 21:23         ` Linus Walleij
  1 sibling, 0 replies; 11+ messages in thread
From: Linus Walleij @ 2010-12-22 21:23 UTC (permalink / raw)
  To: linux-arm-kernel

2010/12/22 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> This emptying of the RX UART FIFO of all received data prevents the RX
> timeout condition being satisfied, which in turn prevents any reasonable
> use of the UART port.
>
> I think the answer here is to allow the platform to supply just the TX
> DMA channel, so when a UART is wired up to use the single-character
> request, RX DMA support can be omitted.

Yes atleast that is fully OK until someone comes up with some clever
workaround. I don't know if a software fallback timer flipping the buffers
would work.

There's another reason for not doing UART RX at all: for example
it will occupy one of the channels (PB11MPCores PL081 has only
two!) for good, and then MMCI and AACI cannot use it at the same
time.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to PL011
  2010-12-22 17:09   ` Linus Walleij
  2010-12-22 18:20     ` Russell King - ARM Linux
@ 2010-12-31 18:23     ` Russell King - ARM Linux
  1 sibling, 0 replies; 11+ messages in thread
From: Russell King - ARM Linux @ 2010-12-31 18:23 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Dec 22, 2010 at 06:09:22PM +0100, Linus Walleij wrote:
> There *may* be a corner case when you're transferring
> an even multiple of the burst size though, then you can
> *maybe* get into the situation you're describing. Such as
> paste a buffer with n MOD 8 bytes, but I haven't
> been able to provoke it :-/ (and no hardware here sadly).

This will only happen if you send all 8 bytes without triggering a
RTIM interrupt, and your DMA controller reads all 8 bytes upon receiving
the burst request.

If you send 4 bytes, pause, 4 bytes, and your DMA is only transferring
when 8 bytes have been received then you won't see the problem (as after
the first four bytes are received, you'll get a RTIM interrupt, and
another RTIM after the second group of four bytes.

I'm not sure that I'd trust pasting characters into a terminal to
guarantee that we receive all 8 bytes as a single back-to-back burst.

echo -n 01234567 >/dev/ttySn

where /dev/ttySn is the uart on your host PC to which your terminal
program is attached.  If you can't provoke it with fast baud rates,
try slowing down the baud rate so the 32-bit period takes longer to
expire - your DMA controller may not be reading all data from the
FIFO before this period expires, thereby allowing the RTIM interrupt
to occur.

Note: I've no idea how the ST Micro parts operate - you're setting the
DMA threshold to 16 bytes, yet the FIFO is 64 bytes, but you're asking
the DMA controller to do bursts of 32 bytes.  What's the effect of
IFLS on the DMA thesholds too?  Does that result in the DMA BREQ
signal only being activated after 32 bytes and the ST_UART011_DMAWM
register being ignored?  If so that could mean you need to send 32
bytes instead of 8.  Or maybe it's 16 bytes?


Anyway, this is easy to work around.  Let's assume a 16-byte FIFO for
everything that follows.

Set a threshold of 8 characters for the DMA burst request to be triggered
in the PL011 and set the DMA to transfer a burst of 4 bytes.  This will
guarantee that bytes are left in the PL011 FIFO, which means we have the
conditions for the PL011 to generate a RTIM interrupt when there's a
pause of 32 bit times in the transmission no matter what.

In your RTIM interrupt handler, you disable and read data from the DMA
buffer as before.  However, you also check for characters in the FIFO
and empty those into the TTY buffers before re-enabling DMA.

What this means is that the DMA engine controller _absolutely_ _must_
respect the requested burst size and never transfer anything but the
requested burst size per burst request.  If the DMA engine controller
it is unable to make this guarantee, we need some way of ensuring that
the PL011 driver will never use RX DMA.

In other words:
- if you make use of the PL011's SREQ signal for the RX path, you lose.
- if you DMA 8 or more characters per BREQ for the RX path, you lose.

Lastly, I've confirmed what's going on on the PB926 - the PL080 DMA
controller is definitely violating the statement in B.4.1 below figure
B-3.  However, it's rather iffy that this is in an appendix rather than
the main text describing the behaviour of the DMA controller - given
the observed behaviour I believe this paragraph has been accidentally
left in the documentation.  This at least explains why it doesn't work
- and can _never_ work on this platform.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-12-31 18:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-06  9:32 [PATCH] ARM: add PrimeCell generic DMA to PL011 Linus Walleij
2010-10-12 19:30 ` Russell King - ARM Linux
2010-10-13  5:15   ` Linus Walleij
2010-12-22 14:10 ` Russell King - ARM Linux
2010-12-22 17:09   ` Linus Walleij
2010-12-22 18:20     ` Russell King - ARM Linux
2010-12-22 20:34       ` Russell King - ARM Linux
2010-12-22 20:52         ` Russell King - ARM Linux
2010-12-22 21:23         ` Linus Walleij
2010-12-22 21:17       ` Linus Walleij
2010-12-31 18:23     ` Russell King - ARM Linux

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).