* [PATCH 2/3] spufs: Disable local interrupts for SPE hash_page calls.
2006-04-29 0:40 [PATCH 0/3] cell: 2.6.17-rc3 bug fixes Arnd Bergmann
@ 2006-04-29 0:40 ` Arnd Bergmann
2006-04-29 0:40 ` [PATCH 3/3] powerpc: update cell_defconfig Arnd Bergmann
2006-04-29 0:45 ` [PATCH 1/3] powerpc: Make rtas console _much_ faster Arnd Bergmann
2 siblings, 0 replies; 7+ messages in thread
From: Arnd Bergmann @ 2006-04-29 0:40 UTC (permalink / raw)
To: Paul Mackerras; +Cc: Arnd Bergmann, linuxppc-dev, cbe-oss-dev, linux-kernel
This patch disables and saves local interrupts during
hash_page processing for SPE contexts.
We have to do it explicitly in the spu_irq_class_1_bottom
function. For the interrupt handlers, we get the behaviour
implicitly by using SA_INTERRUPT to disable interrupts while
in the handler.
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
---
Index: linus-2.6/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- linus-2.6.orig/arch/powerpc/platforms/cell/spu_base.c
+++ linus-2.6/arch/powerpc/platforms/cell/spu_base.c
@@ -306,19 +306,19 @@ spu_request_irqs(struct spu *spu)
snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number);
ret = request_irq(irq_base + spu->isrc,
- spu_irq_class_0, 0, spu->irq_c0, spu);
+ spu_irq_class_0, SA_INTERRUPT, spu->irq_c0, spu);
if (ret)
goto out;
snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number);
ret = request_irq(irq_base + IIC_CLASS_STRIDE + spu->isrc,
- spu_irq_class_1, 0, spu->irq_c1, spu);
+ spu_irq_class_1, SA_INTERRUPT, spu->irq_c1, spu);
if (ret)
goto out1;
snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number);
ret = request_irq(irq_base + 2*IIC_CLASS_STRIDE + spu->isrc,
- spu_irq_class_2, 0, spu->irq_c2, spu);
+ spu_irq_class_2, SA_INTERRUPT, spu->irq_c2, spu);
if (ret)
goto out2;
goto out;
@@ -487,10 +487,14 @@ int spu_irq_class_1_bottom(struct spu *s
ea = spu->dar;
dsisr = spu->dsisr;
if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
+ u64 flags;
+
access = (_PAGE_PRESENT | _PAGE_USER);
access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+ local_irq_save(flags);
if (hash_page(ea, access, 0x300) != 0)
error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
+ local_irq_restore(flags);
}
if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
if ((ret = spu_handle_mm_fault(spu)) != 0)
--
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 3/3] powerpc: update cell_defconfig
2006-04-29 0:40 [PATCH 0/3] cell: 2.6.17-rc3 bug fixes Arnd Bergmann
2006-04-29 0:40 ` [PATCH 2/3] spufs: Disable local interrupts for SPE hash_page calls Arnd Bergmann
@ 2006-04-29 0:40 ` Arnd Bergmann
2006-04-29 0:45 ` [PATCH 1/3] powerpc: Make rtas console _much_ faster Arnd Bergmann
2 siblings, 0 replies; 7+ messages in thread
From: Arnd Bergmann @ 2006-04-29 0:40 UTC (permalink / raw)
To: Paul Mackerras; +Cc: Arnd Bergmann, linuxppc-dev, cbe-oss-dev, linux-kernel
reflect the changes to Kconfig since the last update.
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
---
Index: linus-2.6/arch/powerpc/configs/cell_defconfig
===================================================================
--- linus-2.6.orig/arch/powerpc/configs/cell_defconfig 2006-04-29 01:50:25.000000000 +0200
+++ linus-2.6/arch/powerpc/configs/cell_defconfig 2006-04-29 02:05:56.000000000 +0200
@@ -9,6 +9,7 @@
CONFIG_MMU=y
CONFIG_GENERIC_HARDIRQS=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_PPC=y
CONFIG_EARLY_PRINTK=y
@@ -55,6 +56,7 @@
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_CPUSETS is not set
+# CONFIG_RELAY is not set
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_EMBEDDED is not set
@@ -69,10 +71,6 @@
CONFIG_FUTEX=y
CONFIG_EPOLL=y
CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
CONFIG_SLAB=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
@@ -84,7 +82,6 @@
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_MODULE_FORCE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
@@ -93,6 +90,7 @@
#
# Block layer
#
+# CONFIG_BLK_DEV_IO_TRACE is not set
#
# IO Schedulers
@@ -126,6 +124,7 @@
CONFIG_MMIO_NVRAM=y
CONFIG_CELL_IIC=y
# CONFIG_PPC_MPC106 is not set
+# CONFIG_PPC_970_NAP is not set
# CONFIG_CPU_FREQ is not set
# CONFIG_WANT_EARLY_SERIAL is not set
@@ -167,7 +166,6 @@
CONFIG_SPARSEMEM_EXTREME=y
# CONFIG_MEMORY_HOTPLUG is not set
CONFIG_SPLIT_PTLOCK_CPUS=4
-CONFIG_MIGRATION=y
# CONFIG_PPC_64K_PAGES is not set
CONFIG_SCHED_SMT=y
CONFIG_PROC_DEVICETREE=y
@@ -184,7 +182,6 @@
# CONFIG_PPC_INDIRECT_PCI is not set
CONFIG_PCI=y
CONFIG_PCI_DOMAINS=y
-CONFIG_PCI_LEGACY_PROC=y
# CONFIG_PCI_DEBUG is not set
#
@@ -226,6 +223,7 @@
# CONFIG_INET_AH is not set
# CONFIG_INET_ESP is not set
# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
CONFIG_INET_TUNNEL=y
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
@@ -242,6 +240,7 @@
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_XFRM_TUNNEL=m
CONFIG_INET6_TUNNEL=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
@@ -632,6 +631,7 @@
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_NR_UARTS=4
CONFIG_SERIAL_8250_RUNTIME_UARTS=4
# CONFIG_SERIAL_8250_EXTENDED is not set
@@ -717,7 +717,6 @@
# CONFIG_I2C_PARPORT_LIGHT is not set
# CONFIG_I2C_PROSAVAGE is not set
# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_SCx200_ACB is not set
# CONFIG_I2C_SIS5595 is not set
# CONFIG_I2C_SIS630 is not set
# CONFIG_I2C_SIS96X is not set
@@ -736,9 +735,7 @@
# CONFIG_SENSORS_PCF8574 is not set
# CONFIG_SENSORS_PCA9539 is not set
# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_RTC_X1205_I2C is not set
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
@@ -766,10 +763,6 @@
#
#
-# Multimedia Capabilities Port drivers
-#
-
-#
# Multimedia devices
#
# CONFIG_VIDEO_DEV is not set
@@ -818,6 +811,19 @@
# CONFIG_MMC is not set
#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
+#
# InfiniBand support
#
CONFIG_INFINIBAND=y
@@ -834,6 +840,11 @@
#
#
+# Real Time Clock
+#
+# CONFIG_RTC_CLASS is not set
+
+#
# File systems
#
CONFIG_EXT2_FS=y
@@ -889,7 +900,6 @@
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
# CONFIG_CONFIGFS_FS is not set
#
--
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/3] powerpc: Make rtas console _much_ faster
2006-04-29 0:40 [PATCH 0/3] cell: 2.6.17-rc3 bug fixes Arnd Bergmann
2006-04-29 0:40 ` [PATCH 2/3] spufs: Disable local interrupts for SPE hash_page calls Arnd Bergmann
2006-04-29 0:40 ` [PATCH 3/3] powerpc: update cell_defconfig Arnd Bergmann
@ 2006-04-29 0:45 ` Arnd Bergmann
2006-04-29 1:56 ` Michael Ellerman
2 siblings, 1 reply; 7+ messages in thread
From: Arnd Bergmann @ 2006-04-29 0:45 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linux-kernel, linuxppc-dev, cbe-oss-dev, Arnd Bergmann
Currently the hvc_rtas driver is painfully slow to use. Our "benchmark" is
ls -R /etc, which spits out about 27866 characters. The theoretical maximum
speed would be about 2.2 seconds, the current code takes ~50 seconds.
The core of the problem is that sometimes when the tty layer asks us to push
characters the firmware isn't able to handle some or all of them, and so
returns an error. The current code sees this and just returns to the tty code
with the buffer half sent.
There's the khvcd thread which will eventually wake up and try to push more
characters, that will usually work because the firmware's had time to push
the characters out. But the thread only wakes up every 10 milliseconds, which
isn't fast enough.
There's already code in the hvc_console driver to make the khvcd thread do
a "quick" loop, where it just calls yield() instead of sleeping. The only code
that triggered that behaviour was recently removed though, which I don't
quite understand.
Still, if we set HVC_POLL_QUICK whenever the push hvc_push() doesn't push all
characters (ie. RTAS blocks), we can get good performance out of the hvc_rtas
backend. With this patch the "benchmark" takes ~2.8 seconds.
Cc: Ryan Arnold <rsa@us.ibm.com>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
---
drivers/char/hvc_console.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linus-2.6/drivers/char/hvc_console.c
===================================================================
--- linus-2.6.orig/drivers/char/hvc_console.c
+++ linus-2.6/drivers/char/hvc_console.c
@@ -570,7 +570,7 @@ static int hvc_poll(struct hvc_struct *h
hvc_push(hp);
/* Reschedule us if still some write pending */
if (hp->n_outbuf > 0)
- poll_mask |= HVC_POLL_WRITE;
+ poll_mask |= HVC_POLL_WRITE | HVC_POLL_QUICK;
/* No tty attached, just skip */
tty = hp->tty;
--
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] powerpc: Make rtas console _much_ faster
2006-04-29 0:45 ` [PATCH 1/3] powerpc: Make rtas console _much_ faster Arnd Bergmann
@ 2006-04-29 1:56 ` Michael Ellerman
2006-04-29 8:00 ` Arnd Bergmann
0 siblings, 1 reply; 7+ messages in thread
From: Michael Ellerman @ 2006-04-29 1:56 UTC (permalink / raw)
To: Arnd Bergmann
Cc: Arnd Bergmann, linux-kernel, linuxppc-dev, Paul Mackerras,
cbe-oss-dev
[-- Attachment #1: Type: text/plain, Size: 2562 bytes --]
I'll clean this one up a little before merging it as per Ryan's email of
a week or two ago. New patch today or tomorrow.
Even though this is 1/3 the rest of the series should be fine to merge,
right Arnd?
cheers
On Sat, 2006-04-29 at 02:45 +0200, Arnd Bergmann wrote:
> Currently the hvc_rtas driver is painfully slow to use. Our "benchmark" is
> ls -R /etc, which spits out about 27866 characters. The theoretical maximum
> speed would be about 2.2 seconds, the current code takes ~50 seconds.
>
> The core of the problem is that sometimes when the tty layer asks us to push
> characters the firmware isn't able to handle some or all of them, and so
> returns an error. The current code sees this and just returns to the tty code
> with the buffer half sent.
>
> There's the khvcd thread which will eventually wake up and try to push more
> characters, that will usually work because the firmware's had time to push
> the characters out. But the thread only wakes up every 10 milliseconds, which
> isn't fast enough.
>
> There's already code in the hvc_console driver to make the khvcd thread do
> a "quick" loop, where it just calls yield() instead of sleeping. The only code
> that triggered that behaviour was recently removed though, which I don't
> quite understand.
>
> Still, if we set HVC_POLL_QUICK whenever the push hvc_push() doesn't push all
> characters (ie. RTAS blocks), we can get good performance out of the hvc_rtas
> backend. With this patch the "benchmark" takes ~2.8 seconds.
>
> Cc: Ryan Arnold <rsa@us.ibm.com>
> Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
> Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
>
> ---
>
> drivers/char/hvc_console.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> Index: linus-2.6/drivers/char/hvc_console.c
> ===================================================================
> --- linus-2.6.orig/drivers/char/hvc_console.c
> +++ linus-2.6/drivers/char/hvc_console.c
> @@ -570,7 +570,7 @@ static int hvc_poll(struct hvc_struct *h
> hvc_push(hp);
> /* Reschedule us if still some write pending */
> if (hp->n_outbuf > 0)
> - poll_mask |= HVC_POLL_WRITE;
> + poll_mask |= HVC_POLL_WRITE | HVC_POLL_QUICK;
>
> /* No tty attached, just skip */
> tty = hp->tty;
>
> --
--
Michael Ellerman
IBM OzLabs
wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)
We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 191 bytes --]
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/3] powerpc: Make rtas console _much_ faster
2006-04-29 1:56 ` Michael Ellerman
@ 2006-04-29 8:00 ` Arnd Bergmann
2006-04-30 3:07 ` [PATCH] " Michael Ellerman
0 siblings, 1 reply; 7+ messages in thread
From: Arnd Bergmann @ 2006-04-29 8:00 UTC (permalink / raw)
To: michael; +Cc: linuxppc-dev, Paul Mackerras, cbe-oss-dev, linux-kernel
On Saturday 29 April 2006 03:56, Michael Ellerman wrote:
> I'll clean this one up a little before merging it as per Ryan's email of
> a week or two ago. New patch today or tomorrow.
Ok, I misremembered the discussion on that patch and it didn't occur
to me that a one-line patch needs cleanup ;-)
Thanks!
> Even though this is 1/3 the rest of the series should be fine to merge,
> right Arnd?
Yes.
Arnd <><
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH] powerpc: Make rtas console _much_ faster
2006-04-29 8:00 ` Arnd Bergmann
@ 2006-04-30 3:07 ` Michael Ellerman
0 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2006-04-30 3:07 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev, linux-kernel, Arnd Bergmann, cbe-oss-dev
Currently the hvc_rtas driver is painfully slow to use. Our "benchmark" is
ls -R /etc, which spits out about 27866 characters. The theoretical maximum
speed would be about 2.2 seconds, the current code takes ~50 seconds.
The core of the problem is that sometimes when the tty layer asks us to push
characters the firmware isn't able to handle some or all of them, and so
returns an error. The current code sees this and just returns to the tty code
with the buffer half sent.
The khvcd thread will eventually wake up and try to push more characters, which
will usually work because by then the firmware's had time to make room. But
the khvcd thread only wakes up every 10 milliseconds, which isn't fast enough.
So change the khvcd thread logic so that if there's an incomplete write we
yield() and then immediately try writing again. Doing so makes POLL_QUICK and
POLL_WRITE synonymous, so remove POLL_QUICK.
With this patch our "benchmark" takes ~2.8 seconds.
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
drivers/char/hvc_console.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: cell/drivers/char/hvc_console.c
===================================================================
--- cell.orig/drivers/char/hvc_console.c
+++ cell/drivers/char/hvc_console.c
@@ -553,7 +553,6 @@ static int hvc_chars_in_buffer(struct tt
#define HVC_POLL_READ 0x00000001
#define HVC_POLL_WRITE 0x00000002
-#define HVC_POLL_QUICK 0x00000004
static int hvc_poll(struct hvc_struct *hp)
{
@@ -568,6 +567,7 @@ static int hvc_poll(struct hvc_struct *h
/* Push pending writes */
if (hp->n_outbuf > 0)
hvc_push(hp);
+
/* Reschedule us if still some write pending */
if (hp->n_outbuf > 0)
poll_mask |= HVC_POLL_WRITE;
@@ -680,7 +680,7 @@ int khvcd(void *unused)
poll_mask |= HVC_POLL_READ;
if (hvc_kicked)
continue;
- if (poll_mask & HVC_POLL_QUICK) {
+ if (poll_mask & HVC_POLL_WRITE) {
yield();
continue;
}
^ permalink raw reply [flat|nested] 7+ messages in thread