* [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic
@ 2023-05-02 19:06 Shenwei Wang
2023-05-03 9:03 ` Ilpo Järvinen
0 siblings, 1 reply; 5+ messages in thread
From: Shenwei Wang @ 2023-05-02 19:06 UTC (permalink / raw)
To: Greg Kroah-Hartman; +Cc: Jiri Slaby, linux-serial, imx, linux-imx, Shenwei Wang
At low baud rates, the DMA transfer may end prematurely due to the timer,
even during an active transfer. This does not accurately simulate an EOP
event as intended. We expect the timer to only complete a DMA transfer
once the idle period satisfies a specified interval.
The patch checks the DMA residue count before copying data to the TTY
buffer. If the residue count remains unchanged since the last interrupt,
that indicates no new data was received. In this case, the DMA should
complete as an EOP event. Instead, the timer restarts.
Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
---
drivers/tty/serial/fsl_lpuart.c | 52 ++++++++++++++++++++++++++++++---
1 file changed, 48 insertions(+), 4 deletions(-)
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index c91916e13648..8d21351fb3bd 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -238,6 +238,7 @@
/* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
#define DMA_RX_TIMEOUT (10)
+#define DMA_RX_IDLE_CHARS (8)
#define UART_AUTOSUSPEND_TIMEOUT 3000
#define DRIVER_NAME "fsl-lpuart"
@@ -282,6 +283,7 @@ struct lpuart_port {
struct scatterlist rx_sgl, tx_sgl[2];
struct circ_buf rx_ring;
int rx_dma_rng_buf_len;
+ int last_residue;
unsigned int dma_tx_nents;
wait_queue_head_t dma_wait;
bool is_cs7; /* Set to true when character size is 7 */
@@ -331,7 +333,7 @@ static struct lpuart_soc_data imx8qxp_data = {
.devtype = IMX8QXP_LPUART,
.iotype = UPIO_MEM32,
.reg_off = IMX_REG_OFF,
- .rx_watermark = 31,
+ .rx_watermark = 8, /* A lower watermark is ideal for low baud rates. */
};
static struct lpuart_soc_data imxrt1050_data = {
.devtype = IMXRT1050_LPUART,
@@ -1255,6 +1257,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
sport->port.icount.rx += copied;
}
+ sport->last_residue = state.residue;
+
exit:
dma_sync_sg_for_device(chan->device->dev, &sport->rx_sgl, 1,
DMA_FROM_DEVICE);
@@ -1272,11 +1276,40 @@ static void lpuart_dma_rx_complete(void *arg)
lpuart_copy_rx_to_tty(sport);
}
+/*
+ * Timer function to simulate the hardware EOP(End Of Package) event.
+ * The timer callback is to check for new RX data and copy to TTY buffer.
+ * If no new data since last interrupt, restart timer. Otherwise, copy data
+ * and continue normal logic.
+ */
static void lpuart_timer_func(struct timer_list *t)
{
struct lpuart_port *sport = from_timer(sport, t, lpuart_timer);
+ struct dma_chan *chan = sport->dma_rx_chan;
+ struct circ_buf *ring = &sport->rx_ring;
+ struct dma_tx_state state;
+ unsigned long flags;
+ int count = 0;
- lpuart_copy_rx_to_tty(sport);
+ dmaengine_tx_status(chan, sport->dma_rx_cookie, &state);
+ ring->head = sport->rx_sgl.length - state.residue;
+
+ if (ring->head < ring->tail)
+ count = sport->rx_sgl.length - ring->tail;
+ else if (ring->tail < ring->head)
+ count = ring->head - ring->tail;
+
+ /* Check if new data received before copying */
+ if ((count != 0) && (sport->last_residue == state.residue))
+ lpuart_copy_rx_to_tty(sport);
+ else
+ mod_timer(&sport->lpuart_timer,
+ jiffies + sport->dma_rx_timeout);
+
+ if (spin_trylock_irqsave(&sport->port.lock, flags)) {
+ sport->last_residue = state.residue;
+ spin_unlock_irqrestore(&sport->port.lock, flags);
+ }
}
static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
@@ -1297,9 +1330,19 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
*/
sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud / bits / 1000) * 2;
sport->rx_dma_rng_buf_len = (1 << fls(sport->rx_dma_rng_buf_len));
+ if (sport->rx_dma_rng_buf_len < sport->rxfifo_size * 2)
+ sport->rx_dma_rng_buf_len = sport->rxfifo_size * 2;
+
+ /*
+ * Keep this condition check in case rxfifo_size is unavailable
+ * for some SoCs.
+ */
if (sport->rx_dma_rng_buf_len < 16)
sport->rx_dma_rng_buf_len = 16;
+ sport->dma_rx_timeout =
+ msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud + 1);
+
ring->buf = kzalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
if (!ring->buf)
return -ENOMEM;
@@ -1687,12 +1730,13 @@ static void lpuart_rx_dma_startup(struct lpuart_port *sport)
if (!sport->dma_rx_chan)
goto err;
+ /* set default Rx DMA timeout */
+ sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
+
ret = lpuart_start_rx_dma(sport);
if (ret)
goto err;
- /* set Rx DMA timeout */
- sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
if (!sport->dma_rx_timeout)
sport->dma_rx_timeout = 1;
--
2.34.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic
2023-05-02 19:06 [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic Shenwei Wang
@ 2023-05-03 9:03 ` Ilpo Järvinen
2023-05-03 18:38 ` [EXT] " Shenwei Wang
0 siblings, 1 reply; 5+ messages in thread
From: Ilpo Järvinen @ 2023-05-03 9:03 UTC (permalink / raw)
To: Shenwei Wang; +Cc: Greg Kroah-Hartman, Jiri Slaby, linux-serial, imx, linux-imx
On Tue, 2 May 2023, Shenwei Wang wrote:
> At low baud rates, the DMA transfer may end prematurely due to the timer,
> even during an active transfer. This does not accurately simulate an EOP
> event as intended. We expect the timer to only complete a DMA transfer
> once the idle period satisfies a specified interval.
>
> The patch checks the DMA residue count before copying data to the TTY
> buffer. If the residue count remains unchanged since the last interrupt,
> that indicates no new data was received. In this case, the DMA should
> complete as an EOP event. Instead, the timer restarts.
This description is lacking something. It does not explain why the stuff
in second paragraph is necessary at all as setting a longer timer based on
the (lower) baud rate would avoid the need to do the timer restart.
> Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
> ---
> drivers/tty/serial/fsl_lpuart.c | 52 ++++++++++++++++++++++++++++++---
> 1 file changed, 48 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
> index c91916e13648..8d21351fb3bd 100644
> --- a/drivers/tty/serial/fsl_lpuart.c
> +++ b/drivers/tty/serial/fsl_lpuart.c
> @@ -238,6 +238,7 @@
>
> /* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
> #define DMA_RX_TIMEOUT (10)
> +#define DMA_RX_IDLE_CHARS (8)
> #define UART_AUTOSUSPEND_TIMEOUT 3000
>
> #define DRIVER_NAME "fsl-lpuart"
> @@ -282,6 +283,7 @@ struct lpuart_port {
> struct scatterlist rx_sgl, tx_sgl[2];
> struct circ_buf rx_ring;
> int rx_dma_rng_buf_len;
> + int last_residue;
> unsigned int dma_tx_nents;
> wait_queue_head_t dma_wait;
> bool is_cs7; /* Set to true when character size is 7 */
> @@ -331,7 +333,7 @@ static struct lpuart_soc_data imx8qxp_data = {
> .devtype = IMX8QXP_LPUART,
> .iotype = UPIO_MEM32,
> .reg_off = IMX_REG_OFF,
> - .rx_watermark = 31,
> + .rx_watermark = 8, /* A lower watermark is ideal for low baud rates. */
> };
> static struct lpuart_soc_data imxrt1050_data = {
> .devtype = IMXRT1050_LPUART,
> @@ -1255,6 +1257,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
> sport->port.icount.rx += copied;
> }
>
> + sport->last_residue = state.residue;
> +
> exit:
> dma_sync_sg_for_device(chan->device->dev, &sport->rx_sgl, 1,
> DMA_FROM_DEVICE);
> @@ -1272,11 +1276,40 @@ static void lpuart_dma_rx_complete(void *arg)
> lpuart_copy_rx_to_tty(sport);
> }
>
> +/*
> + * Timer function to simulate the hardware EOP(End Of Package) event.
Missing space
> + * The timer callback is to check for new RX data and copy to TTY buffer.
> + * If no new data since last interrupt, restart timer. Otherwise, copy data
> + * and continue normal logic.
> + */
> static void lpuart_timer_func(struct timer_list *t)
> {
> struct lpuart_port *sport = from_timer(sport, t, lpuart_timer);
> + struct dma_chan *chan = sport->dma_rx_chan;
> + struct circ_buf *ring = &sport->rx_ring;
> + struct dma_tx_state state;
> + unsigned long flags;
> + int count = 0;
>
> - lpuart_copy_rx_to_tty(sport);
> + dmaengine_tx_status(chan, sport->dma_rx_cookie, &state);
> + ring->head = sport->rx_sgl.length - state.residue;
> +
> + if (ring->head < ring->tail)
> + count = sport->rx_sgl.length - ring->tail;
> + else if (ring->tail < ring->head)
> + count = ring->head - ring->tail;
linux/circ_buf.h has functions which likely handle what you want to do
here. They will get you true count across wrap too which this above does
not do.
Given this is essentially duplicates count calculation some refactor
would seem more useful here rather than recalculating the count again in
lpuart_copy_rx_to_tty().
Also lpuart_handle_sysrq() duplicates the same calculations.
> +
> + /* Check if new data received before copying */
> + if ((count != 0) && (sport->last_residue == state.residue))
I'm unsure about this condition being right.
What will happen when rx_sgl.length (or -1 of that, I'm not sure which way
"full size" is here) worth of data has been DMA'ed. Does this condition
end up delaying copy such that it's done only on every other call here?
Also, should you reset last_residue in lpuart_start_rx_dma() ? I think
that would solve the "full size" problem.
> + lpuart_copy_rx_to_tty(sport);
> + else
> + mod_timer(&sport->lpuart_timer,
> + jiffies + sport->dma_rx_timeout);
> +
> + if (spin_trylock_irqsave(&sport->port.lock, flags)) {
> + sport->last_residue = state.residue;
> + spin_unlock_irqrestore(&sport->port.lock, flags);
> + }
> }
>
> static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
> @@ -1297,9 +1330,19 @@ static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
> */
> sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud / bits / 1000) * 2;
> sport->rx_dma_rng_buf_len = (1 << fls(sport->rx_dma_rng_buf_len));
> + if (sport->rx_dma_rng_buf_len < sport->rxfifo_size * 2)
> + sport->rx_dma_rng_buf_len = sport->rxfifo_size * 2;
max_t()
> +
> + /*
> + * Keep this condition check in case rxfifo_size is unavailable
> + * for some SoCs.
> + */
> if (sport->rx_dma_rng_buf_len < 16)
> sport->rx_dma_rng_buf_len = 16;
>
> + sport->dma_rx_timeout =
> + msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud + 1);
There's ->frame_time these days in uart_port which you should base frame
timing related calculations. I wouldn't mind if that existing ->frame_time
math that is visible in your patch's context is also converted (in a
separate patch).
I'm assuming that magic 10 is assumed number of bits and 1000
MSEC_PER_SEC. That +1 seems odd, did you mean DIV_ROUND_UP() ?
> +
> ring->buf = kzalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
> if (!ring->buf)
> return -ENOMEM;
> @@ -1687,12 +1730,13 @@ static void lpuart_rx_dma_startup(struct lpuart_port *sport)
> if (!sport->dma_rx_chan)
> goto err;
>
> + /* set default Rx DMA timeout */
> + sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> +
> ret = lpuart_start_rx_dma(sport);
> if (ret)
> goto err;
>
> - /* set Rx DMA timeout */
> - sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> if (!sport->dma_rx_timeout)
> sport->dma_rx_timeout = 1;
>
>
--
i.
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic
2023-05-03 9:03 ` Ilpo Järvinen
@ 2023-05-03 18:38 ` Shenwei Wang
2023-05-04 13:02 ` Ilpo Järvinen
0 siblings, 1 reply; 5+ messages in thread
From: Shenwei Wang @ 2023-05-03 18:38 UTC (permalink / raw)
To: Ilpo Järvinen
Cc: Greg Kroah-Hartman, Jiri Slaby, linux-serial, imx@lists.linux.dev,
dl-linux-imx
> -----Original Message-----
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Sent: Wednesday, May 3, 2023 4:03 AM
> To: Shenwei Wang <shenwei.wang@nxp.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>; Jiri Slaby
> <jirislaby@kernel.org>; linux-serial <linux-serial@vger.kernel.org>;
> imx@lists.linux.dev; dl-linux-imx <linux-imx@nxp.com>
> Subject: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based
> EOP logic
>
> Caution: This is an external email. Please take care when clicking links or
> opening attachments. When in doubt, report the message using the 'Report this
> email' button
>
>
> On Tue, 2 May 2023, Shenwei Wang wrote:
>
> > At low baud rates, the DMA transfer may end prematurely due to the
> > timer, even during an active transfer. This does not accurately
> > simulate an EOP event as intended. We expect the timer to only
> > complete a DMA transfer once the idle period satisfies a specified interval.
> >
> > The patch checks the DMA residue count before copying data to the TTY
> > buffer. If the residue count remains unchanged since the last
> > interrupt, that indicates no new data was received. In this case, the
> > DMA should complete as an EOP event. Instead, the timer restarts.
>
> This description is lacking something. It does not explain why the stuff in second
> paragraph is necessary at all as setting a longer timer based on the (lower) baud
> rate would avoid the need to do the timer restart.
>
Agree. Would add the following to the last sentence: "if no new characters are received, the timer just restarts".
> > Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
> > ---
> > drivers/tty/serial/fsl_lpuart.c | 52
> > ++++++++++++++++++++++++++++++---
> > 1 file changed, 48 insertions(+), 4 deletions(-)
> > }
> >
> > +/*
> > + * Timer function to simulate the hardware EOP(End Of Package) event.
>
> Missing space
>
Will fix it in next version.
> > + * The timer callback is to check for new RX data and copy to TTY buffer.
> > + * If no new data since last interrupt, restart timer. Otherwise,
> > + copy data
> > + * and continue normal logic.
> > + */
> > static void lpuart_timer_func(struct timer_list *t) {
> > struct lpuart_port *sport = from_timer(sport, t, lpuart_timer);
> > + struct dma_chan *chan = sport->dma_rx_chan;
> > + struct circ_buf *ring = &sport->rx_ring;
> > + struct dma_tx_state state;
> > + unsigned long flags;
> > + int count = 0;
> >
> > - lpuart_copy_rx_to_tty(sport);
> > + dmaengine_tx_status(chan, sport->dma_rx_cookie, &state);
>
> > + ring->head = sport->rx_sgl.length - state.residue;
> > +
> > + if (ring->head < ring->tail)
> > + count = sport->rx_sgl.length - ring->tail;
> > + else if (ring->tail < ring->head)
> > + count = ring->head - ring->tail;
>
> linux/circ_buf.h has functions which likely handle what you want to do here.
> They will get you true count across wrap too which this above does not do.
>
> Given this is essentially duplicates count calculation some refactor would seem
> more useful here rather than recalculating the count again in
> lpuart_copy_rx_to_tty().
>
> Also lpuart_handle_sysrq() duplicates the same calculations.
>
Just had a check with the circ_buf.h, and this piece of codes can be simplified
with the CIRC_CNT() function.
The other part you mentioned should be optimized as well. I will submit a separate
patch to do that after finishing this one.
> > +
> > + /* Check if new data received before copying */
> > + if ((count != 0) && (sport->last_residue == state.residue))
>
> I'm unsure about this condition being right.
>
> What will happen when rx_sgl.length (or -1 of that, I'm not sure which way "full
> size" is here) worth of data has been DMA'ed. Does this condition end up
> delaying copy such that it's done only on every other call here?
>
The timer function will only complete a DMA transfer when there is new data in the buffer
and the data has been idle for the specified interval.
The "full buffer" situation should be handled by the DMA completion callback itself. A "full" buffer
means the DMA transaction has received sufficient data and invoked the completion callback.
> Also, should you reset last_residue in lpuart_start_rx_dma() ? I think that would
> solve the "full size" problem.
>
> > + lpuart_copy_rx_to_tty(sport);
> > + else
> > + mod_timer(&sport->lpuart_timer,
> > + jiffies + sport->dma_rx_timeout);
> > +
> > + if (spin_trylock_irqsave(&sport->port.lock, flags)) {
> > + sport->last_residue = state.residue;
> > + spin_unlock_irqrestore(&sport->port.lock, flags);
> > + }
> > }
> >
> > static inline int lpuart_start_rx_dma(struct lpuart_port *sport) @@
> > -1297,9 +1330,19 @@ static inline int lpuart_start_rx_dma(struct lpuart_port
> *sport)
> > */
> > sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud / bits / 1000) * 2;
> > sport->rx_dma_rng_buf_len = (1 <<
> > fls(sport->rx_dma_rng_buf_len));
> > + if (sport->rx_dma_rng_buf_len < sport->rxfifo_size * 2)
> > + sport->rx_dma_rng_buf_len = sport->rxfifo_size * 2;
>
> max_t()
>
> > +
> > + /*
> > + * Keep this condition check in case rxfifo_size is unavailable
> > + * for some SoCs.
> > + */
> > if (sport->rx_dma_rng_buf_len < 16)
> > sport->rx_dma_rng_buf_len = 16;
> >
> > + sport->dma_rx_timeout =
> > + msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud
> > + + 1);
>
> There's ->frame_time these days in uart_port which you should base frame
> timing related calculations. I wouldn't mind if that existing ->frame_time math
> that is visible in your patch's context is also converted (in a separate patch).
>
> I'm assuming that magic 10 is assumed number of bits and 1000 MSEC_PER_SEC.
> That +1 seems odd, did you mean DIV_ROUND_UP() ?
Yes, it is 10 bits and 1000 ms. +1 here is similar to the result of round up.
And also the ->frame_time could be used for simplicity.
>
> > +
> > ring->buf = kzalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
> > if (!ring->buf)
> > return -ENOMEM;
> > @@ -1687,12 +1730,13 @@ static void lpuart_rx_dma_startup(struct
> lpuart_port *sport)
> > if (!sport->dma_rx_chan)
> > goto err;
> >
> > + /* set default Rx DMA timeout */
> > + sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> > +
> > ret = lpuart_start_rx_dma(sport);
> > if (ret)
> > goto err;
> >
> > - /* set Rx DMA timeout */
> > - sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
> > if (!sport->dma_rx_timeout)
> > sport->dma_rx_timeout = 1;
> >
> >
>
> --
> i.
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic
2023-05-03 18:38 ` [EXT] " Shenwei Wang
@ 2023-05-04 13:02 ` Ilpo Järvinen
2023-05-04 14:46 ` Shenwei Wang
0 siblings, 1 reply; 5+ messages in thread
From: Ilpo Järvinen @ 2023-05-04 13:02 UTC (permalink / raw)
To: Shenwei Wang
Cc: Greg Kroah-Hartman, Jiri Slaby, linux-serial, imx@lists.linux.dev,
dl-linux-imx
[-- Attachment #1: Type: text/plain, Size: 4128 bytes --]
On Wed, 3 May 2023, Shenwei Wang wrote:
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > On Tue, 2 May 2023, Shenwei Wang wrote:
> >
> > > At low baud rates, the DMA transfer may end prematurely due to the
> > > timer, even during an active transfer. This does not accurately
> > > simulate an EOP event as intended. We expect the timer to only
> > > complete a DMA transfer once the idle period satisfies a specified interval.
> > >
> > > The patch checks the DMA residue count before copying data to the TTY
> > > buffer. If the residue count remains unchanged since the last
> > > interrupt, that indicates no new data was received. In this case, the
> > > DMA should complete as an EOP event. Instead, the timer restarts.
> >
> > This description is lacking something. It does not explain why the stuff in second
> > paragraph is necessary at all as setting a longer timer based on the (lower) baud
> > rate would avoid the need to do the timer restart.
> >
>
> Agree. Would add the following to the last sentence: "if no new
> characters are received, the timer just restarts".
That, however, is unfortunately not the case I was interested in here. The
code does restart the timer if new characters _were received_ (residue
changed), no? So my request for clarification to the changelong still
stands, why is rearming the timer necessary instead of simply setting a
longer timeout right from the start?
(In the first paragraph, you stated the problem is about timer triggering
prematurely with low baud rates.)
This is not to say that the new approach is wrong but the changelog fails
to explain all facets of what is wrong with the old approach adequately.
> Just had a check with the circ_buf.h, and this piece of codes can be simplified
> with the CIRC_CNT() function.
>
> The other part you mentioned should be optimized as well. I will submit a separate
> patch to do that after finishing this one.
Okay. You might also find CIRC_CNT_TO_END() useful in those inner
functions to calculate the length before the wrap.
> > > +
> > > + /* Check if new data received before copying */
> > > + if ((count != 0) && (sport->last_residue == state.residue))
> >
> > I'm unsure about this condition being right.
> >
> > What will happen when rx_sgl.length (or -1 of that, I'm not sure which way "full
> > size" is here) worth of data has been DMA'ed. Does this condition end up
> > delaying copy such that it's done only on every other call here?
> >
>
> The timer function will only complete a DMA transfer when there is new data in the buffer
> and the data has been idle for the specified interval.
>
> The "full buffer" situation should be handled by the DMA completion callback itself. A "full" buffer
> means the DMA transaction has received sufficient data and invoked the completion callback.
>
> > Also, should you reset last_residue in lpuart_start_rx_dma() ? I think that would
> > solve the "full size" problem.
What about this part? If the transfer always does n chars, the left over
residue can match spuriously for the new transfer and trigger the copy
because last and current residue happen to match (kinda by chance but
could be simply due to repetitive transfer pattern)?
> > > + sport->dma_rx_timeout =
> > > + msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) / baud
> > > + + 1);
> >
> > There's ->frame_time these days in uart_port which you should base frame
> > timing related calculations. I wouldn't mind if that existing ->frame_time math
> > that is visible in your patch's context is also converted (in a separate patch).
> >
> > I'm assuming that magic 10 is assumed number of bits and 1000 MSEC_PER_SEC.
> > That +1 seems odd, did you mean DIV_ROUND_UP() ?
>
> Yes, it is 10 bits and 1000 ms. +1 here is similar to the result of round up.
> And also the ->frame_time could be used for simplicity.
Yes, please use ->frame_time. I added it exactly to allow this kind of
calculations to be easily based on actual frame timing (the other
questions were just to gauge if I understood right what is behind your
math).
--
i.
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic
2023-05-04 13:02 ` Ilpo Järvinen
@ 2023-05-04 14:46 ` Shenwei Wang
0 siblings, 0 replies; 5+ messages in thread
From: Shenwei Wang @ 2023-05-04 14:46 UTC (permalink / raw)
To: Ilpo Järvinen
Cc: Greg Kroah-Hartman, Jiri Slaby, linux-serial, imx@lists.linux.dev,
dl-linux-imx
> -----Original Message-----
> From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> Sent: Thursday, May 4, 2023 8:02 AM
> To: Shenwei Wang <shenwei.wang@nxp.com>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>; Jiri Slaby
> <jirislaby@kernel.org>; linux-serial <linux-serial@vger.kernel.org>;
> imx@lists.linux.dev; dl-linux-imx <linux-imx@nxp.com>
> Subject: RE: [EXT] Re: [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer
> based EOP logic
>
> Caution: This is an external email. Please take care when clicking links or
> opening attachments. When in doubt, report the message using the 'Report this
> email' button
>
>
> On Wed, 3 May 2023, Shenwei Wang wrote:
> > From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
> > > On Tue, 2 May 2023, Shenwei Wang wrote:
> > >
> > > > At low baud rates, the DMA transfer may end prematurely due to the
> > > > timer, even during an active transfer. This does not accurately
> > > > simulate an EOP event as intended. We expect the timer to only
> > > > complete a DMA transfer once the idle period satisfies a specified interval.
> > > >
> > > > The patch checks the DMA residue count before copying data to the
> > > > TTY buffer. If the residue count remains unchanged since the last
> > > > interrupt, that indicates no new data was received. In this case,
> > > > the DMA should complete as an EOP event. Instead, the timer restarts.
> > >
> > > This description is lacking something. It does not explain why the
> > > stuff in second paragraph is necessary at all as setting a longer
> > > timer based on the (lower) baud rate would avoid the need to do the timer
> restart.
> > >
> >
> > Agree. Would add the following to the last sentence: "if no new
> > characters are received, the timer just restarts".
>
> That, however, is unfortunately not the case I was interested in here. The code
> does restart the timer if new characters _were received_ (residue changed), no?
> So my request for clarification to the changelong still stands, why is rearming
> the timer necessary instead of simply setting a longer timeout right from the
> start?
>
Once new characters are received within the timer interval, the "residue" will
be updated by the DMA driver, but the DMA transfer will only complete once it receives
the specified number of characters. The purpose of this patch is to prevent the long delay
of the DMA transfer when it cannot receive a sufficient number of characters.
> (In the first paragraph, you stated the problem is about timer triggering
> prematurely with low baud rates.)
>
> This is not to say that the new approach is wrong but the changelog fails to
> explain all facets of what is wrong with the old approach adequately.
>
The timer is used here to simulate the EOP behavior. This means that when a new character is received
within the interval, the timer needs restart. If no new character is received within the specified interval,
the timer callback will complete the DMA transfer as if an EOP event occurred.
The old approach did not exhibit this expected behavior. It would complete the DMA transfer at the fixed
interval regardless of whether new characters were received or not. This incorrect behavior can be easily
observed at low baud rates, but it also for high baud rates.
> > Just had a check with the circ_buf.h, and this piece of codes can be
> > simplified with the CIRC_CNT() function.
> >
> > The other part you mentioned should be optimized as well. I will
> > submit a separate patch to do that after finishing this one.
>
> Okay. You might also find CIRC_CNT_TO_END() useful in those inner functions
> to calculate the length before the wrap.
>
> > > > +
> > > > + /* Check if new data received before copying */
> > > > + if ((count != 0) && (sport->last_residue == state.residue))
> > >
> > > I'm unsure about this condition being right.
> > >
> > > What will happen when rx_sgl.length (or -1 of that, I'm not sure
> > > which way "full size" is here) worth of data has been DMA'ed. Does
> > > this condition end up delaying copy such that it's done only on every other
> call here?
> > >
> >
> > The timer function will only complete a DMA transfer when there is new
> > data in the buffer and the data has been idle for the specified interval.
> >
> > The "full buffer" situation should be handled by the DMA completion
> > callback itself. A "full" buffer means the DMA transaction has received
> sufficient data and invoked the completion callback.
> >
> > > Also, should you reset last_residue in lpuart_start_rx_dma() ? I
> > > think that would solve the "full size" problem.
>
> What about this part? If the transfer always does n chars, the left over residue
> can match spuriously for the new transfer and trigger the copy because last and
> current residue happen to match (kinda by chance but could be simply due to
> repetitive transfer pattern)?
>
Yes, there is a chance. Will reset it when start_rx_dma.
Thanks,
Shenwei
> > > > + sport->dma_rx_timeout =
> > > > + msecs_to_jiffies((1000 * 10 * DMA_RX_IDLE_CHARS) /
> > > > + baud
> > > > + + 1);
> > >
> > > There's ->frame_time these days in uart_port which you should base
> > > frame timing related calculations. I wouldn't mind if that existing
> > > ->frame_time math that is visible in your patch's context is also converted (in
> a separate patch).
> > >
> > > I'm assuming that magic 10 is assumed number of bits and 1000
> MSEC_PER_SEC.
> > > That +1 seems odd, did you mean DIV_ROUND_UP() ?
> >
> > Yes, it is 10 bits and 1000 ms. +1 here is similar to the result of round up.
> > And also the ->frame_time could be used for simplicity.
>
> Yes, please use ->frame_time. I added it exactly to allow this kind of calculations
> to be easily based on actual frame timing (the other questions were just to
> gauge if I understood right what is behind your math).
>
>
> --
> i.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-05-04 14:46 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-02 19:06 [PATCH 1/1] tty: serial: fsl_lpuart: optimize the timer based EOP logic Shenwei Wang
2023-05-03 9:03 ` Ilpo Järvinen
2023-05-03 18:38 ` [EXT] " Shenwei Wang
2023-05-04 13:02 ` Ilpo Järvinen
2023-05-04 14:46 ` Shenwei Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox