* [PATCH 1/4] spi: reenable sync SPI transfers
2014-09-01 14:30 [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Jeff Epler
@ 2014-09-01 14:30 ` Jeff Epler
2014-09-01 14:30 ` [PATCH 2/4] spidev: Avoid runtime memory allocations Jeff Epler
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Jeff Epler @ 2014-09-01 14:30 UTC (permalink / raw)
To: linux-rt-users
one source of excess latency in hm2_spi / hal_spidev is latency due
to async transfers. Make the __spi_sync primitive actually synchronous,
rather than building on an asynchronous primitive.
---
drivers/spi/spi.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 19ee901..55404b5 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -664,10 +664,18 @@ void spi_finalize_current_message(struct spi_master *master)
struct spi_message *mesg;
unsigned long flags;
+ if(!master->cur_msg)
+ return;
+
spin_lock_irqsave(&master->queue_lock, flags);
mesg = master->cur_msg;
master->cur_msg = NULL;
+ if(!mesg) {
+ spin_unlock_irqrestore(&master->queue_lock, flags);
+ return;
+ }
+
queue_kthread_work(&master->kworker, &master->pump_messages);
spin_unlock_irqrestore(&master->queue_lock, flags);
@@ -1490,24 +1498,26 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message,
int bus_locked)
{
DECLARE_COMPLETION_ONSTACK(done);
- int status;
+ int status = 0;
struct spi_master *master = spi->master;
message->complete = spi_complete;
message->context = &done;
+ message->spi = spi;
if (!bus_locked)
mutex_lock(&master->bus_lock_mutex);
- status = spi_async_locked(spi, message);
+ if(master->prepare_transfer_hardware)
+ status = master->prepare_transfer_hardware(master);
+ if(status >= 0)
+ status = master->transfer_one_message(master, message);
+ if(status >= 0 && master->unprepare_transfer_hardware)
+ status = master->unprepare_transfer_hardware(master);
if (!bus_locked)
mutex_unlock(&master->bus_lock_mutex);
- if (status == 0) {
- wait_for_completion(&done);
- status = message->status;
- }
message->context = NULL;
return status;
}
--
2.0.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/4] spidev: Avoid runtime memory allocations
2014-09-01 14:30 [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Jeff Epler
2014-09-01 14:30 ` [PATCH 1/4] spi: reenable sync SPI transfers Jeff Epler
@ 2014-09-01 14:30 ` Jeff Epler
2014-09-01 14:30 ` [PATCH 3/4] spidev: actually use synchronous transfers Jeff Epler
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Jeff Epler @ 2014-09-01 14:30 UTC (permalink / raw)
To: linux-rt-users
allocating bounce and k_xfers at every iteration was a source of
excess delays.
---
drivers/spi/spidev.c | 35 +++++++++++++++++++++++++++++------
1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 2e0655d..dda7632 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -85,6 +85,8 @@ struct spidev_data {
struct mutex buf_lock;
unsigned users;
u8 *buffer;
+ struct spi_ioc_transfer *bounce;
+ struct spi_transfer *k_xfers;
};
static LIST_HEAD(device_list);
@@ -227,9 +229,9 @@ static int spidev_message(struct spidev_data *spidev,
int status = -EFAULT;
spi_message_init(&msg);
- k_xfers = kcalloc(n_xfers, sizeof(*k_tmp), GFP_KERNEL);
- if (k_xfers == NULL)
- return -ENOMEM;
+ if (n_xfers * sizeof(*k_tmp) > bufsiz)
+ return -ENOMEM;
+ k_xfers = spidev->k_xfers;
/* Construct spi_message, copying any tx data to bounce buffer.
* We walk the array of user-provided transfers, using each one
@@ -302,7 +304,6 @@ static int spidev_message(struct spidev_data *spidev,
status = total;
done:
- kfree(k_xfers);
return status;
}
@@ -451,8 +452,12 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (n_ioc == 0)
break;
+ if(tmp > bufsiz) {
+ retval = -EINVAL;
+ break;
+ }
/* copy into scratch area */
- ioc = kmalloc(tmp, GFP_KERNEL);
+ ioc = spidev->bounce;
if (!ioc) {
retval = -ENOMEM;
break;
@@ -465,7 +470,6 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/* translate to spi_message, execute */
retval = spidev_message(spidev, ioc, n_ioc);
- kfree(ioc);
break;
}
@@ -504,6 +508,19 @@ static int spidev_open(struct inode *inode, struct file *filp)
dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
status = -ENOMEM;
}
+ spidev->bounce = kmalloc(bufsiz, GFP_KERNEL);
+ if (!spidev->bounce) {
+ dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
+ kfree(spidev->buffer);
+ status = -ENOMEM;
+ }
+ spidev->k_xfers = kmalloc(bufsiz, GFP_KERNEL);
+ if (!spidev->k_xfers) {
+ dev_dbg(&spidev->spi->dev, "open/ENOMEM\n");
+ kfree(spidev->buffer);
+ kfree(spidev->bounce);
+ status = -ENOMEM;
+ }
}
if (status == 0) {
spidev->users++;
@@ -534,6 +551,12 @@ static int spidev_release(struct inode *inode, struct file *filp)
kfree(spidev->buffer);
spidev->buffer = NULL;
+ kfree(spidev->bounce);
+ spidev->bounce = NULL;
+
+ kfree(spidev->k_xfers);
+ spidev->k_xfers = NULL;
+
/* ... after we unbound from the underlying device? */
spin_lock_irq(&spidev->spi_lock);
dofree = (spidev->spi == NULL);
--
2.0.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/4] spidev: actually use synchronous transfers
2014-09-01 14:30 [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Jeff Epler
2014-09-01 14:30 ` [PATCH 1/4] spi: reenable sync SPI transfers Jeff Epler
2014-09-01 14:30 ` [PATCH 2/4] spidev: Avoid runtime memory allocations Jeff Epler
@ 2014-09-01 14:30 ` Jeff Epler
2014-09-01 14:30 ` [PATCH 4/4] spidev-s3c64xx: allocate dma channel at startup Jeff Epler
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Jeff Epler @ 2014-09-01 14:30 UTC (permalink / raw)
To: linux-rt-users
this reduces a source of latency
---
drivers/spi/spidev.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index dda7632..6e37c59 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -120,15 +120,9 @@ spidev_sync(struct spidev_data *spidev, struct spi_message *message)
if (spidev->spi == NULL)
status = -ESHUTDOWN;
else
- status = spi_async(spidev->spi, message);
+ status = spi_sync(spidev->spi, message);
spin_unlock_irq(&spidev->spi_lock);
- if (status == 0) {
- wait_for_completion(&done);
- status = message->status;
- if (status == 0)
- status = message->actual_length;
- }
return status;
}
--
2.0.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 4/4] spidev-s3c64xx: allocate dma channel at startup
2014-09-01 14:30 [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Jeff Epler
` (2 preceding siblings ...)
2014-09-01 14:30 ` [PATCH 3/4] spidev: actually use synchronous transfers Jeff Epler
@ 2014-09-01 14:30 ` Jeff Epler
2014-09-01 15:41 ` [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Philipp Lutz
2014-09-09 20:01 ` Uwe Kleine-König
5 siblings, 0 replies; 9+ messages in thread
From: Jeff Epler @ 2014-09-01 14:30 UTC (permalink / raw)
To: linux-rt-users
it appears that acquire_dma introduces intolerable delays; the system
is fine with the dma channnels allocated all the time.
---
drivers/spi/spi-s3c64xx.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 92de1b5..e6a46c9 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -797,10 +797,6 @@ static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
{
struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
- /* Acquire DMA channels */
- while (!acquire_dma(sdd))
- msleep(10);
-
pm_runtime_get_sync(&sdd->pdev->dev);
return 0;
@@ -810,10 +806,6 @@ static int s3c64xx_spi_unprepare_transfer(struct spi_master *spi)
{
struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(spi);
- /* Free DMA channels */
- sdd->ops->release(sdd->rx_dma.ch, &s3c64xx_spi_dma_client);
- sdd->ops->release(sdd->tx_dma.ch, &s3c64xx_spi_dma_client);
-
pm_runtime_put(&sdd->pdev->dev);
return 0;
@@ -1344,6 +1336,9 @@ static int __init s3c64xx_spi_probe(struct platform_device *pdev)
goto err7;
}
+ // cannot fail(?)
+ acquire_dma(sdd);
+
writel(S3C64XX_SPI_INT_RX_OVERRUN_EN | S3C64XX_SPI_INT_RX_UNDERRUN_EN |
S3C64XX_SPI_INT_TX_OVERRUN_EN | S3C64XX_SPI_INT_TX_UNDERRUN_EN,
sdd->regs + S3C64XX_SPI_INT_EN);
@@ -1396,6 +1391,10 @@ static int s3c64xx_spi_remove(struct platform_device *pdev)
spi_unregister_master(master);
+ /* Free DMA channels */
+ sdd->ops->release(sdd->rx_dma.ch, &s3c64xx_spi_dma_client);
+ sdd->ops->release(sdd->tx_dma.ch, &s3c64xx_spi_dma_client);
+
writel(0, sdd->regs + S3C64XX_SPI_INT_EN);
free_irq(platform_get_irq(pdev, 0), sdd);
--
2.0.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31)
2014-09-01 14:30 [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Jeff Epler
` (3 preceding siblings ...)
2014-09-01 14:30 ` [PATCH 4/4] spidev-s3c64xx: allocate dma channel at startup Jeff Epler
@ 2014-09-01 15:41 ` Philipp Lutz
2014-09-01 18:13 ` Jeff Epler
2014-09-09 20:01 ` Uwe Kleine-König
5 siblings, 1 reply; 9+ messages in thread
From: Philipp Lutz @ 2014-09-01 15:41 UTC (permalink / raw)
To: linux-rt-users
Hi Jeff,
thanks for your effort of investigating this issue.
I'll have a look if it can help me decreasing SPI latency with my ARM
based boards (Gumstix, BeagleBoneBlack).
Have you looked into setting a RTPRIO for the SPI worker threads
(spi_init_queue() in spi.c)? As far as I see hardly any SPI
hardware-dependent driver sets the "rt" property in the respective
spi_master struct, so that the main SPI driver (spi.c) will not use a RT
priority, which will be MAX_RT_PRIO - 1.
So far setting a medium RTPRIO for the SPI worker threads helped a lot
to reduce latency. Now I'm able to sustain a stable SPI transaction rate
of around 800 Hz.
Cheers
Phil
-------- Original Message --------
Subject: [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31)
From: Jeff Epler <jepler@unpythonic.net>
To: linux-rt-users@vger.kernel.org
Date: 09/01/2014 04:30 PM
> I recently became interested in realtime access to an SPI device on the
> Odroid U3 platform, with a goal of running a repetitive task every 1ms
> that performs two SPI transactions. (for http://linuxcnc.org/
> interfacing to a http://mesanet.com/ "Anything I/O" card)
>
> Unfortunately, I found that there were frequent large latencies, some
> over 10ms, when using /dev/spidev. This seems to be typical of others'
> experience using it (for instance, one can find threads discussing
> disappointing RT performance of SPI on the beaglebone and pandaboard; at
> least one raspberry pi project chose to implement a pure userspace SPI
> driver instead of using spidev)
>
> At all levels of the SPI stack, I found things that could be improved if
> lowest delays are the goal. I doubt that in their current form these
> changes are suitable to be incorporated in preempt-rt, but I hope that
> this might spur some discussion that would ultimately lead to better
> realtime performance of SPI in the preempt-rt kernel.
>
> As you may know, the kernel's spi driver stack consists of
> spidev - implementation of the /dev/spidev* device
> spi - hardware-independent kernel layer
> spi-xyzzy - hardware-dependent driver for xyzzy hardware
> (s3c64xx in my device)
>
> I fixed causes of latency at each layer
> * First, I eliminated per-ioctl memory allocations in spidev
> * Second, I made __spi_sync *actually* synchronous, rather than
> being a wrapper over spi_async + wait_for_completion; and changed
> spidev to use spi_sync
> * Third, in the hardware-dependent code I moved DMA acquisition to
> device initialization time rather than transaction time
>
> I did not quite achieve my goal of a 1ms repetitive rate yet, but with
> these changes I have run for 12+ hours at a rate of 3 transactions per
> 2ms with acceptable worst-case performance---under 250us for the biggest
> transaction, and 465us for all three (they have different sizes), with
> typical figures of more like 200us for all three transactions. This is
> in contrast to the original performance, in which transactions taking
> over 10ms were seen multiple times per hour. (12 hours is about 64
> million spi transations)
>
> (I changed from talking about 2 transactions to 3, because for an
> unrelated reason the communication in my program is currently divided
> into 3 SPI transactions when two would do)
>
> I know that 3.8 is by no means current, but 3.8.y is the default kernel
> shipped by hardkernel for their U3 devices so it was a rational version
> choice for me. I did skim spi changes from version 3.8 to the present
> and didn't see anything that looked like it was directed at improving
> SPI latency, though the underlying code has probably changed enough over
> time that I assume my patches wouldn't actually apply at the tip of the
> latest and greatest branches.
>
> PS The fact that the first PREEMPT-RT kernel I built for the odroid
> worked and has basically good latency (until trying to talk to the
> hardware :-/) impressed the heck out of me.
>
> Jeff Epler (4):
> spi: reenable sync SPI transfers
> spidev: Avoid runtime memory allocations
> spidev: actually use synchronous transfers
> spidev-s3c64xx: allocate dma channel at startup
>
> drivers/spi/spi-s3c64xx.c | 15 +++++++--------
> drivers/spi/spi.c | 22 ++++++++++++++++------
> drivers/spi/spidev.c | 43 ++++++++++++++++++++++++++++++-------------
> 3 files changed, 53 insertions(+), 27 deletions(-)
>
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31)
2014-09-01 15:41 ` [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Philipp Lutz
@ 2014-09-01 18:13 ` Jeff Epler
2014-09-01 22:30 ` Harry van Haaren
0 siblings, 1 reply; 9+ messages in thread
From: Jeff Epler @ 2014-09-01 18:13 UTC (permalink / raw)
To: Philipp Lutz; +Cc: linux-rt-users
On Mon, Sep 01, 2014 at 05:41:30PM +0200, Philipp Lutz wrote:
> Have you looked into setting a RTPRIO for the SPI worker threads
> (spi_init_queue() in spi.c)? As far as I see hardly any SPI
> hardware-dependent driver sets the "rt" property in the respective
> spi_master struct, so that the main SPI driver (spi.c) will not use a RT
> priority, which will be MAX_RT_PRIO - 1.
Sort of.
I was unaware of the "rt" property so I never tried setting it on the
kernel side. However, I did run into the idea of using "chrt" on the
kernel tasks [spi1] and [irq/99-spi-s3c6]. This improved things
somewhat, but this plus eliminating DMA acquistion still left me with
some 800+us latencies that I didn't want.
(it may have also helped somewhat to to use cpuset to bind these tasks
to the same CPU where my realtime task runs--the U3 is a 4-core chip,
and linuxcnc binds to the last CPU by default)
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31)
2014-09-01 18:13 ` Jeff Epler
@ 2014-09-01 22:30 ` Harry van Haaren
0 siblings, 0 replies; 9+ messages in thread
From: Harry van Haaren @ 2014-09-01 22:30 UTC (permalink / raw)
To: Jeff Epler; +Cc: Philipp Lutz, linux-rt-users
On Mon, Sep 1, 2014 at 7:13 PM, Jeff Epler <jepler@unpythonic.net> wrote:
> On Mon, Sep 01, 2014 at 05:41:30PM +0200, Philipp Lutz wrote:
>> Have you looked into setting a RTPRIO for the SPI worker threads
>> <snip>
> Sort of.
>
> I was unaware of the "rt" property so I never tried setting it on the
> kernel side. However, I did run into the idea of using "chrt" on the
> kernel tasks [spi1] and [irq/99-spi-s3c6]. This improved things
> somewhat, but this plus eliminating DMA acquistion still left me with
> some 800+us latencies that I didn't want.
chrt changes the rtprio, its the same thing.
Prioritizing IRQ's and using chrt is discussed (with a focus on audio
soundcards) [1]. Although the page is outdated, it is the best resource
I'm aware of for practical advice / walktrough of using rtprio and such.
[1] http://subversion.ffado.org/wiki/IrqPriorities
Cheers, -Harry
www.openavproductions.com
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31)
2014-09-01 14:30 [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Jeff Epler
` (4 preceding siblings ...)
2014-09-01 15:41 ` [RFC 0/4] Improving SPI driver latency (vs v3.8.13.14-rt31) Philipp Lutz
@ 2014-09-09 20:01 ` Uwe Kleine-König
5 siblings, 0 replies; 9+ messages in thread
From: Uwe Kleine-König @ 2014-09-09 20:01 UTC (permalink / raw)
To: Jeff Epler; +Cc: linux-rt-users
Hello Jeff,
On Mon, Sep 01, 2014 at 09:30:31AM -0500, Jeff Epler wrote:
> Unfortunately, I found that there were frequent large latencies, some
> over 10ms, when using /dev/spidev. This seems to be typical of others'
> experience using it (for instance, one can find threads discussing
> disappointing RT performance of SPI on the beaglebone and pandaboard; at
> least one raspberry pi project chose to implement a pure userspace SPI
> driver instead of using spidev)
>
> At all levels of the SPI stack, I found things that could be improved if
> lowest delays are the goal. I doubt that in their current form these
> changes are suitable to be incorporated in preempt-rt, but I hope that
> this might spur some discussion that would ultimately lead to better
> realtime performance of SPI in the preempt-rt kernel.
I wonder why you didn't consider mainline as target for your patches
instead of preempt-rt. Also you might want to cc the spi maintainer on
this topic. I didn't check in detail what you did, but if it's useful
for others it would be a pity if it were not picked up because the
responsible people were not aware of your effort.
Best regards
Uwe
--
Pengutronix e.K. | Uwe Kleine-König |
Industrial Linux Solutions | http://www.pengutronix.de/ |
--
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 9+ messages in thread