* [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-23 14:59 [PATCH 0/2][RT] powerpc - fix bug in irq reverse mapping radix tree Sebastien Dugue
@ 2008-07-23 15:01 ` Sebastien Dugue
0 siblings, 0 replies; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-23 15:01 UTC (permalink / raw)
To: Linux RT Users
Cc: Tim Chavez, linux-kernel, Jean Pierre Dion, linuxppc-dev, paulus,
Gilles Carry
From: Sebastien Dugue <sebastien.dugue@bull.net>
Date: Tue, 22 Jul 2008 11:56:41 +0200
Subject: [PATCH][RT] powerpc - Make the irq reverse mapping radix tree lockless
The radix tree used by interrupt controllers for their irq reverse mapping
(currently only the XICS found on pSeries) have a complex locking scheme
dating back to before the advent of the concurrent radix tree on preempt-rt.
Take advantage of this and of the fact that the items of the tree are
pointers to a static array (irq_map) elements which can never go under us
to simplify the locking.
Concurrency between readers and writers are handled by the intrinsic
properties of the concurrent radix tree. Concurrency between the tree
initialization which is done asynchronously with readers and writers access is
handled via an atomic variable (revmap_trees_allocated) set when the tree
has been initialized and checked before any reader or writer access just
like we used to check for tree.gfp_mask != 0 before.
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/kernel/irq.c | 102 ++++++++++++----------------------------------
1 file changed, 27 insertions(+), 75 deletions(-)
Index: linux-2.6.25.8-rt7/arch/powerpc/kernel/irq.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/kernel/irq.c
+++ linux-2.6.25.8-rt7/arch/powerpc/kernel/irq.c
@@ -403,8 +403,7 @@ void do_softirq(void)
static LIST_HEAD(irq_hosts);
static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static DEFINE_PER_CPU(unsigned int, irq_radix_reader);
-static unsigned int irq_radix_writer;
+static atomic_t revmap_trees_allocated = ATOMIC_INIT(0);
struct irq_map_entry irq_map[NR_IRQS];
static unsigned int irq_virq_count = NR_IRQS;
static struct irq_host *irq_default_host;
@@ -547,57 +546,6 @@ void irq_set_virq_count(unsigned int cou
irq_virq_count = count;
}
-/* radix tree not lockless safe ! we use a brlock-type mecanism
- * for now, until we can use a lockless radix tree
- */
-static void irq_radix_wrlock(unsigned long *flags)
-{
- unsigned int cpu, ok;
-
- spin_lock_irqsave(&irq_big_lock, *flags);
- irq_radix_writer = 1;
- smp_mb();
- do {
- barrier();
- ok = 1;
- for_each_possible_cpu(cpu) {
- if (per_cpu(irq_radix_reader, cpu)) {
- ok = 0;
- break;
- }
- }
- if (!ok)
- cpu_relax();
- } while(!ok);
-}
-
-static void irq_radix_wrunlock(unsigned long flags)
-{
- smp_wmb();
- irq_radix_writer = 0;
- spin_unlock_irqrestore(&irq_big_lock, flags);
-}
-
-static void irq_radix_rdlock(unsigned long *flags)
-{
- local_irq_save(*flags);
- __get_cpu_var(irq_radix_reader) = 1;
- smp_mb();
- if (likely(irq_radix_writer == 0))
- return;
- __get_cpu_var(irq_radix_reader) = 0;
- smp_wmb();
- spin_lock(&irq_big_lock);
- __get_cpu_var(irq_radix_reader) = 1;
- spin_unlock(&irq_big_lock);
-}
-
-static void irq_radix_rdunlock(unsigned long flags)
-{
- __get_cpu_var(irq_radix_reader) = 0;
- local_irq_restore(flags);
-}
-
static int irq_setup_virq(struct irq_host *host, unsigned int virq,
irq_hw_number_t hwirq)
{
@@ -752,7 +700,6 @@ void irq_dispose_mapping(unsigned int vi
{
struct irq_host *host;
irq_hw_number_t hwirq;
- unsigned long flags;
if (virq == NO_IRQ)
return;
@@ -784,15 +731,20 @@ void irq_dispose_mapping(unsigned int vi
if (hwirq < host->revmap_data.linear.size)
host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
break;
- case IRQ_HOST_MAP_TREE:
+ case IRQ_HOST_MAP_TREE: {
+ DEFINE_RADIX_TREE_CONTEXT(ctx, &host->revmap_data.tree);
+
/* Check if radix tree allocated yet */
- if (host->revmap_data.tree.gfp_mask == 0)
+ if (atomic_read(&revmap_trees_allocated) == 0)
break;
- irq_radix_wrlock(&flags);
- radix_tree_delete(&host->revmap_data.tree, hwirq);
- irq_radix_wrunlock(flags);
+
+ radix_tree_lock(&ctx);
+ radix_tree_delete(ctx.tree, hwirq);
+ radix_tree_unlock(&ctx);
+
break;
}
+ }
/* Destroy map */
smp_mb();
@@ -845,22 +797,20 @@ unsigned int irq_radix_revmap(struct irq
struct radix_tree_root *tree;
struct irq_map_entry *ptr;
unsigned int virq;
- unsigned long flags;
WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
- /* Check if the radix tree exist yet. We test the value of
- * the gfp_mask for that. Sneaky but saves another int in the
- * structure. If not, we fallback to slow mode
- */
- tree = &host->revmap_data.tree;
- if (tree->gfp_mask == 0)
+ /* Check if the radix tree exist yet. */
+ if (atomic_read(&revmap_trees_allocated) == 0)
return irq_find_mapping(host, hwirq);
- /* Now try to resolve */
- irq_radix_rdlock(&flags);
+ /*
+ * Now try to resolve
+ * No rcu_read_lock(ing) needed, the ptr returned can't go under us
+ * as it's referencing an entry in the static irq_map table.
+ */
+ tree = &host->revmap_data.tree;
ptr = radix_tree_lookup(tree, hwirq);
- irq_radix_rdunlock(flags);
/* Found it, return */
if (ptr) {
@@ -871,9 +821,10 @@ unsigned int irq_radix_revmap(struct irq
/* If not there, try to insert it */
virq = irq_find_mapping(host, hwirq);
if (virq != NO_IRQ) {
- irq_radix_wrlock(&flags);
- radix_tree_insert(tree, hwirq, &irq_map[virq]);
- irq_radix_wrunlock(flags);
+ DEFINE_RADIX_TREE_CONTEXT(ctx, tree);
+ radix_tree_lock(&ctx);
+ radix_tree_insert(ctx.tree, hwirq, &irq_map[virq]);
+ radix_tree_unlock(&ctx);
}
return virq;
}
@@ -984,14 +935,15 @@ void irq_early_init(void)
static int irq_late_init(void)
{
struct irq_host *h;
- unsigned long flags;
- irq_radix_wrlock(&flags);
list_for_each_entry(h, &irq_hosts, link) {
if (h->revmap_type == IRQ_HOST_MAP_TREE)
INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC);
}
- irq_radix_wrunlock(flags);
+
+ /* Make sure the radix trees inits are visible before setting the flag */
+ smp_mb();
+ atomic_set(&revmap_trees_allocated, 1);
return 0;
}
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 0/2][RT] powerpc - fix bug in irq reverse mapping radix tree (Resend)
@ 2008-07-24 10:23 Sebastien Dugue
2008-07-24 10:48 ` [PATCH 1/2][RT] powerpc - XICS: move the call to irq_radix_revmap from xics_startup to xics_host_map Sebastien Dugue
2008-07-24 10:50 ` [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless Sebastien Dugue
0 siblings, 2 replies; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-24 10:23 UTC (permalink / raw)
To: Linux-rt
Cc: Tim Chavez, linux-kernel, Jean Pierre Dion, linux-ppc,
Paul Mackerras, Gilles Carry
(This is resend as vger dropped my previous attempt, sorry for the duplication)
Hi,
here are 2 patches for fixing the following bug occuring on IBM pSeries under
an RT kernel:
BUG: sleeping function called from invalid context swapper(1) at kernel/rtmutex.c:739
in_atomic():1 [00000002], irqs_disabled():1
Call Trace:
[c0000001e20f3340] [c000000000010370] .show_stack+0x70/0x1bc (unreliable)
[c0000001e20f33f0] [c000000000049380] .__might_sleep+0x11c/0x138
[c0000001e20f3470] [c0000000002a2f64] .__rt_spin_lock+0x3c/0x98
[c0000001e20f34f0] [c0000000000c3f20] .kmem_cache_alloc+0x68/0x184
[c0000001e20f3590] [c000000000193f3c] .radix_tree_node_alloc+0xf0/0x144
[c0000001e20f3630] [c000000000195190] .radix_tree_insert+0x18c/0x2fc
[c0000001e20f36f0] [c00000000000c710] .irq_radix_revmap+0x1a4/0x1e4
[c0000001e20f37b0] [c00000000003b3f0] .xics_startup+0x30/0x54
[c0000001e20f3840] [c00000000008b864] .setup_irq+0x26c/0x370
[c0000001e20f38f0] [c00000000008ba68] .request_irq+0x100/0x158
[c0000001e20f39a0] [c0000000001ee9c0] .hvc_open+0xb4/0x148
[c0000001e20f3a40] [c0000000001d72ec] .tty_open+0x200/0x368
[c0000001e20f3af0] [c0000000000ce928] .chrdev_open+0x1f4/0x25c
[c0000001e20f3ba0] [c0000000000c8bf0] .__dentry_open+0x188/0x2c8
[c0000001e20f3c50] [c0000000000c8dec] .do_filp_open+0x50/0x70
[c0000001e20f3d70] [c0000000000c8e8c] .do_sys_open+0x80/0x148
[c0000001e20f3e20] [c00000000000928c] .init_post+0x4c/0x100
[c0000001e20f3ea0] [c0000000003c0e0c] .kernel_init+0x428/0x478
[c0000001e20f3f90] [c000000000027448] .kernel_thread+0x4c/0x68
The root cause of this bug lies in the fact that the XICS interrupt controller
uses a radix tree for its reverse irq mapping and that we cannot allocate the tree
nodes (even GFP_ATOMIC) with preemption disabled.
In fact, we have 2 nested preemption disabling when we want to allocate
a new node:
- setup_irq() does a spin_lock_irqsave() before calling xics_startup() which
then calls irq_radix_revmap() to insert a new node in the tree
- irq_radix_revmap() also does a spin_lock_irqsave() (in irq_radix_wrlock())
before the radix_tree_insert()
The first patch moves the call to irq_radix_revmap() from xics_startup() out to
xics_host_map_direct() and xics_host_map_lpar() which are called with preemption
enabled.
The second patch is a little more involved in that it takes advantage of
the concurrent radix tree to simplify the locking requirements and allows
to allocate a new node outside a preemption disabled section.
I just hope I've correctly understood the concurrent radix trees semantic
and got the (absence of) locking right.
Sebastien.
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 1/2][RT] powerpc - XICS: move the call to irq_radix_revmap from xics_startup to xics_host_map
2008-07-24 10:23 [PATCH 0/2][RT] powerpc - fix bug in irq reverse mapping radix tree (Resend) Sebastien Dugue
@ 2008-07-24 10:48 ` Sebastien Dugue
2008-07-24 10:50 ` [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless Sebastien Dugue
1 sibling, 0 replies; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-24 10:48 UTC (permalink / raw)
To: Linux-rt
Cc: Tim Chavez, linux-kernel, Jean Pierre Dion, linux-ppc,
Paul Mackerras, Gilles Carry
From: Sebastien Dugue <sebastien.dugue@bull.net>
Date: Tue, 22 Jul 2008 13:05:24 +0200
Subject: [PATCH][RT] powerpc - XICS: move the call to irq_radix_revmap from xics_startup to xics_host_map
This patch moves the insertion of an irq into the reverse mapping radix tree
from xics_startup() into xics_host_map().
The reason for this change is that xics_startup() is called with preemption
disabled (which is not the case for xics_host_map()) which is a problem under a
preempt-rt kernel as we cannot even allocate GFP_ATOMIC memory for the radix tree
nodes.
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <michael@ellerman.id.au>
---
arch/powerpc/platforms/pseries/xics.c | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
Index: linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/xics.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/platforms/pseries/xics.c
+++ linux-2.6.25.8-rt7/arch/powerpc/platforms/pseries/xics.c
@@ -311,12 +311,6 @@ static void xics_mask_irq(unsigned int v
static unsigned int xics_startup(unsigned int virq)
{
- unsigned int irq;
-
- /* force a reverse mapping of the interrupt so it gets in the cache */
- irq = (unsigned int)irq_map[virq].hwirq;
- irq_radix_revmap(xics_host, irq);
-
/* unmask it */
xics_unmask_irq(virq);
return 0;
@@ -529,8 +523,14 @@ static int xics_host_match(struct irq_ho
static int xics_host_map_direct(struct irq_host *h, unsigned int virq,
irq_hw_number_t hw)
{
+ unsigned int irq;
+
pr_debug("xics: map_direct virq %d, hwirq 0x%lx\n", virq, hw);
+ /* force a reverse mapping of the interrupt so it gets in the cache */
+ irq = (unsigned int)irq_map[virq].hwirq;
+ irq_radix_revmap(xics_host, irq);
+
get_irq_desc(virq)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(virq, &xics_pic_direct, handle_fasteoi_irq);
return 0;
@@ -539,8 +539,14 @@ static int xics_host_map_direct(struct i
static int xics_host_map_lpar(struct irq_host *h, unsigned int virq,
irq_hw_number_t hw)
{
+ unsigned int irq;
+
pr_debug("xics: map_direct virq %d, hwirq 0x%lx\n", virq, hw);
+ /* force a reverse mapping of the interrupt so it gets in the cache */
+ irq = (unsigned int)irq_map[virq].hwirq;
+ irq_radix_revmap(xics_host, irq);
+
get_irq_desc(virq)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(virq, &xics_pic_lpar, handle_fasteoi_irq);
return 0;
^ permalink raw reply [flat|nested] 13+ messages in thread
* [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-24 10:23 [PATCH 0/2][RT] powerpc - fix bug in irq reverse mapping radix tree (Resend) Sebastien Dugue
2008-07-24 10:48 ` [PATCH 1/2][RT] powerpc - XICS: move the call to irq_radix_revmap from xics_startup to xics_host_map Sebastien Dugue
@ 2008-07-24 10:50 ` Sebastien Dugue
2008-07-24 11:11 ` Nick Piggin
1 sibling, 1 reply; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-24 10:50 UTC (permalink / raw)
To: Linux-rt
Cc: Tim Chavez, linux-kernel, Jean Pierre Dion, linux-ppc,
Paul Mackerras, Gilles Carry
From: Sebastien Dugue <sebastien.dugue@bull.net>
Date: Tue, 22 Jul 2008 11:56:41 +0200
Subject: [PATCH][RT] powerpc - Make the irq reverse mapping radix tree lockless
The radix tree used by interrupt controllers for their irq reverse mapping
(currently only the XICS found on pSeries) have a complex locking scheme
dating back to before the advent of the concurrent radix tree on preempt-rt.
Take advantage of this and of the fact that the items of the tree are
pointers to a static array (irq_map) elements which can never go under us
to simplify the locking.
Concurrency between readers and writers are handled by the intrinsic
properties of the concurrent radix tree. Concurrency between the tree
initialization which is done asynchronously with readers and writers access is
handled via an atomic variable (revmap_trees_allocated) set when the tree
has been initialized and checked before any reader or writer access just
like we used to check for tree.gfp_mask != 0 before.
Signed-off-by: Sebastien Dugue <sebastien.dugue@bull.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/kernel/irq.c | 102 ++++++++++++----------------------------------
1 file changed, 27 insertions(+), 75 deletions(-)
Index: linux-2.6.25.8-rt7/arch/powerpc/kernel/irq.c
===================================================================
--- linux-2.6.25.8-rt7.orig/arch/powerpc/kernel/irq.c
+++ linux-2.6.25.8-rt7/arch/powerpc/kernel/irq.c
@@ -403,8 +403,7 @@ void do_softirq(void)
static LIST_HEAD(irq_hosts);
static DEFINE_RAW_SPINLOCK(irq_big_lock);
-static DEFINE_PER_CPU(unsigned int, irq_radix_reader);
-static unsigned int irq_radix_writer;
+static atomic_t revmap_trees_allocated = ATOMIC_INIT(0);
struct irq_map_entry irq_map[NR_IRQS];
static unsigned int irq_virq_count = NR_IRQS;
static struct irq_host *irq_default_host;
@@ -547,57 +546,6 @@ void irq_set_virq_count(unsigned int cou
irq_virq_count = count;
}
-/* radix tree not lockless safe ! we use a brlock-type mecanism
- * for now, until we can use a lockless radix tree
- */
-static void irq_radix_wrlock(unsigned long *flags)
-{
- unsigned int cpu, ok;
-
- spin_lock_irqsave(&irq_big_lock, *flags);
- irq_radix_writer = 1;
- smp_mb();
- do {
- barrier();
- ok = 1;
- for_each_possible_cpu(cpu) {
- if (per_cpu(irq_radix_reader, cpu)) {
- ok = 0;
- break;
- }
- }
- if (!ok)
- cpu_relax();
- } while(!ok);
-}
-
-static void irq_radix_wrunlock(unsigned long flags)
-{
- smp_wmb();
- irq_radix_writer = 0;
- spin_unlock_irqrestore(&irq_big_lock, flags);
-}
-
-static void irq_radix_rdlock(unsigned long *flags)
-{
- local_irq_save(*flags);
- __get_cpu_var(irq_radix_reader) = 1;
- smp_mb();
- if (likely(irq_radix_writer == 0))
- return;
- __get_cpu_var(irq_radix_reader) = 0;
- smp_wmb();
- spin_lock(&irq_big_lock);
- __get_cpu_var(irq_radix_reader) = 1;
- spin_unlock(&irq_big_lock);
-}
-
-static void irq_radix_rdunlock(unsigned long flags)
-{
- __get_cpu_var(irq_radix_reader) = 0;
- local_irq_restore(flags);
-}
-
static int irq_setup_virq(struct irq_host *host, unsigned int virq,
irq_hw_number_t hwirq)
{
@@ -752,7 +700,6 @@ void irq_dispose_mapping(unsigned int vi
{
struct irq_host *host;
irq_hw_number_t hwirq;
- unsigned long flags;
if (virq == NO_IRQ)
return;
@@ -784,15 +731,20 @@ void irq_dispose_mapping(unsigned int vi
if (hwirq < host->revmap_data.linear.size)
host->revmap_data.linear.revmap[hwirq] = NO_IRQ;
break;
- case IRQ_HOST_MAP_TREE:
+ case IRQ_HOST_MAP_TREE: {
+ DEFINE_RADIX_TREE_CONTEXT(ctx, &host->revmap_data.tree);
+
/* Check if radix tree allocated yet */
- if (host->revmap_data.tree.gfp_mask == 0)
+ if (atomic_read(&revmap_trees_allocated) == 0)
break;
- irq_radix_wrlock(&flags);
- radix_tree_delete(&host->revmap_data.tree, hwirq);
- irq_radix_wrunlock(flags);
+
+ radix_tree_lock(&ctx);
+ radix_tree_delete(ctx.tree, hwirq);
+ radix_tree_unlock(&ctx);
+
break;
}
+ }
/* Destroy map */
smp_mb();
@@ -845,22 +797,20 @@ unsigned int irq_radix_revmap(struct irq
struct radix_tree_root *tree;
struct irq_map_entry *ptr;
unsigned int virq;
- unsigned long flags;
WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
- /* Check if the radix tree exist yet. We test the value of
- * the gfp_mask for that. Sneaky but saves another int in the
- * structure. If not, we fallback to slow mode
- */
- tree = &host->revmap_data.tree;
- if (tree->gfp_mask == 0)
+ /* Check if the radix tree exist yet. */
+ if (atomic_read(&revmap_trees_allocated) == 0)
return irq_find_mapping(host, hwirq);
- /* Now try to resolve */
- irq_radix_rdlock(&flags);
+ /*
+ * Now try to resolve
+ * No rcu_read_lock(ing) needed, the ptr returned can't go under us
+ * as it's referencing an entry in the static irq_map table.
+ */
+ tree = &host->revmap_data.tree;
ptr = radix_tree_lookup(tree, hwirq);
- irq_radix_rdunlock(flags);
/* Found it, return */
if (ptr) {
@@ -871,9 +821,10 @@ unsigned int irq_radix_revmap(struct irq
/* If not there, try to insert it */
virq = irq_find_mapping(host, hwirq);
if (virq != NO_IRQ) {
- irq_radix_wrlock(&flags);
- radix_tree_insert(tree, hwirq, &irq_map[virq]);
- irq_radix_wrunlock(flags);
+ DEFINE_RADIX_TREE_CONTEXT(ctx, tree);
+ radix_tree_lock(&ctx);
+ radix_tree_insert(ctx.tree, hwirq, &irq_map[virq]);
+ radix_tree_unlock(&ctx);
}
return virq;
}
@@ -984,14 +935,15 @@ void irq_early_init(void)
static int irq_late_init(void)
{
struct irq_host *h;
- unsigned long flags;
- irq_radix_wrlock(&flags);
list_for_each_entry(h, &irq_hosts, link) {
if (h->revmap_type == IRQ_HOST_MAP_TREE)
INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC);
}
- irq_radix_wrunlock(flags);
+
+ /* Make sure the radix trees inits are visible before setting the flag */
+ smp_mb();
+ atomic_set(&revmap_trees_allocated, 1);
return 0;
}
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-24 10:50 ` [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless Sebastien Dugue
@ 2008-07-24 11:11 ` Nick Piggin
2008-07-24 12:18 ` Sebastien Dugue
2008-07-25 5:13 ` Benjamin Herrenschmidt
0 siblings, 2 replies; 13+ messages in thread
From: Nick Piggin @ 2008-07-24 11:11 UTC (permalink / raw)
To: Sebastien Dugue
Cc: Tim Chavez, Linux-rt, linux-kernel, Jean Pierre Dion, linux-ppc,
Paul Mackerras, Gilles Carry
On Thursday 24 July 2008 20:50, Sebastien Dugue wrote:
> From: Sebastien Dugue <sebastien.dugue@bull.net>
> Date: Tue, 22 Jul 2008 11:56:41 +0200
> Subject: [PATCH][RT] powerpc - Make the irq reverse mapping radix tree
> lockless
>
> The radix tree used by interrupt controllers for their irq reverse
> mapping (currently only the XICS found on pSeries) have a complex locking
> scheme dating back to before the advent of the concurrent radix tree on
> preempt-rt.
>
> Take advantage of this and of the fact that the items of the tree are
> pointers to a static array (irq_map) elements which can never go under us
> to simplify the locking.
>
> Concurrency between readers and writers are handled by the intrinsic
> properties of the concurrent radix tree. Concurrency between the tree
> initialization which is done asynchronously with readers and writers access
> is handled via an atomic variable (revmap_trees_allocated) set when the
> tree has been initialized and checked before any reader or writer access
> just like we used to check for tree.gfp_mask != 0 before.
Hmm, RCU radix tree is in mainline too for quite a while. I thought
Ben had already converted this code over ages ago...
Nothing against the -rt patch, but mainline should probably be updated
to use RCU as well?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-24 11:11 ` Nick Piggin
@ 2008-07-24 12:18 ` Sebastien Dugue
2008-07-25 7:49 ` Peter Zijlstra
2008-07-25 5:13 ` Benjamin Herrenschmidt
1 sibling, 1 reply; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-24 12:18 UTC (permalink / raw)
To: Nick Piggin
Cc: Tim Chavez, Linux-rt, linux-kernel, Jean Pierre Dion, linux-ppc,
Paul Mackerras, Gilles Carry
On Thu, 24 Jul 2008 21:11:34 +1000 Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> On Thursday 24 July 2008 20:50, Sebastien Dugue wrote:
> > From: Sebastien Dugue <sebastien.dugue@bull.net>
> > Date: Tue, 22 Jul 2008 11:56:41 +0200
> > Subject: [PATCH][RT] powerpc - Make the irq reverse mapping radix tree
> > lockless
> >
> > The radix tree used by interrupt controllers for their irq reverse
> > mapping (currently only the XICS found on pSeries) have a complex locking
> > scheme dating back to before the advent of the concurrent radix tree on
> > preempt-rt.
> >
> > Take advantage of this and of the fact that the items of the tree are
> > pointers to a static array (irq_map) elements which can never go under us
> > to simplify the locking.
> >
> > Concurrency between readers and writers are handled by the intrinsic
> > properties of the concurrent radix tree. Concurrency between the tree
> > initialization which is done asynchronously with readers and writers access
> > is handled via an atomic variable (revmap_trees_allocated) set when the
> > tree has been initialized and checked before any reader or writer access
> > just like we used to check for tree.gfp_mask != 0 before.
>
> Hmm, RCU radix tree is in mainline too for quite a while. I thought
> Ben had already converted this code over ages ago...
Mainline does not have the concurrent radix tree which this patch
is based on, but maybe it's overkill and the RCU radix tree is enough.
Not sure, will have to think about it a bit more.
>
> Nothing against the -rt patch, but mainline should probably be updated
> to use RCU as well?
>
If rcu radix tree is enough, then definitely yes.
Sebastien.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-24 11:11 ` Nick Piggin
2008-07-24 12:18 ` Sebastien Dugue
@ 2008-07-25 5:13 ` Benjamin Herrenschmidt
1 sibling, 0 replies; 13+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-25 5:13 UTC (permalink / raw)
To: Nick Piggin
Cc: Tim Chavez, Linux-rt, linux-kernel, Jean Pierre Dion, linux-ppc,
Sebastien Dugue, Paul Mackerras, Gilles Carry
> > Concurrency between readers and writers are handled by the intrinsic
> > properties of the concurrent radix tree. Concurrency between the tree
> > initialization which is done asynchronously with readers and writers access
> > is handled via an atomic variable (revmap_trees_allocated) set when the
> > tree has been initialized and checked before any reader or writer access
> > just like we used to check for tree.gfp_mask != 0 before.
>
> Hmm, RCU radix tree is in mainline too for quite a while. I thought
> Ben had already converted this code over ages ago...
>
> Nothing against the -rt patch, but mainline should probably be updated
> to use RCU as well?
No, I haven't updated that code yet, and yes, we should do it :-)
Cheers,
Ben.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-24 12:18 ` Sebastien Dugue
@ 2008-07-25 7:49 ` Peter Zijlstra
2008-07-25 8:27 ` Benjamin Herrenschmidt
2008-07-25 8:34 ` Sebastien Dugue
0 siblings, 2 replies; 13+ messages in thread
From: Peter Zijlstra @ 2008-07-25 7:49 UTC (permalink / raw)
To: Sebastien Dugue
Cc: Nick Piggin, Tim Chavez, Linux-rt, linux-kernel, Jean Pierre Dion,
linux-ppc, Paul Mackerras, Gilles Carry
On Thu, 2008-07-24 at 14:18 +0200, Sebastien Dugue wrote:
> On Thu, 24 Jul 2008 21:11:34 +1000 Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> > On Thursday 24 July 2008 20:50, Sebastien Dugue wrote:
> > > From: Sebastien Dugue <sebastien.dugue@bull.net>
> > > Date: Tue, 22 Jul 2008 11:56:41 +0200
> > > Subject: [PATCH][RT] powerpc - Make the irq reverse mapping radix tree
> > > lockless
> > >
> > > The radix tree used by interrupt controllers for their irq reverse
> > > mapping (currently only the XICS found on pSeries) have a complex locking
> > > scheme dating back to before the advent of the concurrent radix tree on
> > > preempt-rt.
> > >
> > > Take advantage of this and of the fact that the items of the tree are
> > > pointers to a static array (irq_map) elements which can never go under us
> > > to simplify the locking.
> > >
> > > Concurrency between readers and writers are handled by the intrinsic
> > > properties of the concurrent radix tree. Concurrency between the tree
> > > initialization which is done asynchronously with readers and writers access
> > > is handled via an atomic variable (revmap_trees_allocated) set when the
> > > tree has been initialized and checked before any reader or writer access
> > > just like we used to check for tree.gfp_mask != 0 before.
> >
> > Hmm, RCU radix tree is in mainline too for quite a while. I thought
> > Ben had already converted this code over ages ago...
>
> Mainline does not have the concurrent radix tree which this patch
> is based on, but maybe it's overkill and the RCU radix tree is enough.
> Not sure, will have to think about it a bit more.
Should be. The model of the concurrent radix tree can be mapped to
spinlock + rcu radix tree.
So instead of:
> + DEFINE_RADIX_TREE_CONTEXT(ctx, tree);
> + radix_tree_lock(&ctx);
> + radix_tree_insert(ctx.tree, hwirq, &irq_map[virq]);
> + radix_tree_unlock(&ctx);
you then write:
spin_lock(&host->revmap_data.tree_lock);
radix_tree_insert(&host->revmap_data.tree, hwirq, &irq_map[virq]);
spin_unlock(&host->revmap_data.tree_lock);
The only advantage of the concurrent radix tree over this model is that
it can potentially do multiple modification operations at the same time.
Still, cool that you used it ;-)
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-25 7:49 ` Peter Zijlstra
@ 2008-07-25 8:27 ` Benjamin Herrenschmidt
2008-07-25 8:36 ` Sebastien Dugue
2008-07-25 8:34 ` Sebastien Dugue
1 sibling, 1 reply; 13+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-25 8:27 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Nick Piggin, Tim Chavez, Linux-rt, linux-kernel, Jean Pierre Dion,
linux-ppc, Sebastien Dugue, Paul Mackerras, Gilles Carry
On Fri, 2008-07-25 at 09:49 +0200, Peter Zijlstra wrote:
>
>
> The only advantage of the concurrent radix tree over this model is that
> it can potentially do multiple modification operations at the same time.
Yup, we do not need that for the irq revmap... concurrent lookup is all we need.
Cheers,
Ben.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-25 7:49 ` Peter Zijlstra
2008-07-25 8:27 ` Benjamin Herrenschmidt
@ 2008-07-25 8:34 ` Sebastien Dugue
1 sibling, 0 replies; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-25 8:34 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Nick Piggin, Tim Chavez, Linux-rt, linux-kernel, Jean Pierre Dion,
linux-ppc, Paul Mackerras, Gilles Carry
Hi Peter,
On Fri, 25 Jul 2008 09:49:37 +0200 Peter Zijlstra <peterz@infradead.org> wr=
ote:
> On Thu, 2008-07-24 at 14:18 +0200, Sebastien Dugue wrote:
> > On Thu, 24 Jul 2008 21:11:34 +1000 Nick Piggin <nickpiggin@yahoo.com.au=
> wrote:
> >=20
> > > On Thursday 24 July 2008 20:50, Sebastien Dugue wrote:
> > > > From: Sebastien Dugue <sebastien.dugue@bull.net>
> > > > Date: Tue, 22 Jul 2008 11:56:41 +0200
> > > > Subject: [PATCH][RT] powerpc - Make the irq reverse mapping radix t=
ree
> > > > lockless
> > > >
> > > > The radix tree used by interrupt controllers for their irq reverse
> > > > mapping (currently only the XICS found on pSeries) have a complex l=
ocking
> > > > scheme dating back to before the advent of the concurrent radix tre=
e on
> > > > preempt-rt.
> > > >
> > > > Take advantage of this and of the fact that the items of the tree=
are
> > > > pointers to a static array (irq_map) elements which can never go un=
der us
> > > > to simplify the locking.
> > > >
> > > > Concurrency between readers and writers are handled by the intrin=
sic
> > > > properties of the concurrent radix tree. Concurrency between the tr=
ee
> > > > initialization which is done asynchronously with readers and writer=
s access
> > > > is handled via an atomic variable (revmap_trees_allocated) set when=
the
> > > > tree has been initialized and checked before any reader or writer a=
ccess
> > > > just like we used to check for tree.gfp_mask !=3D 0 before.
> > >=20
> > > Hmm, RCU radix tree is in mainline too for quite a while. I thought
> > > Ben had already converted this code over ages ago...
> >=20
> > Mainline does not have the concurrent radix tree which this patch
> > is based on, but maybe it's overkill and the RCU radix tree is enough.
> > Not sure, will have to think about it a bit more.
>=20
> Should be. The model of the concurrent radix tree can be mapped to
> spinlock + rcu radix tree.
>=20
> So instead of:
>=20
> > =EF=BB=BF+ DEFINE_RADIX_TREE_CONTEXT(ctx, tree);
> > + radix_tree_lock(&ctx);
> > + radix_tree_insert(ctx.tree, hwirq, &irq_map[virq]);
> > + radix_tree_unlock(&ctx);
>=20
>=20
> you then write:
>=20
> spin_lock(&host->revmap_data.tree_lock);
> radix_tree_insert(&host->revmap_data.tree, hwirq, &irq_map[virq]);
> spin_unlock(&host->revmap_data.tree_lock);
>=20
Cool, that will indeed makes it much easier to have something applicable
to mainline which works with preempt-rt.
>=20
> The only advantage of the concurrent radix tree over this model is that
> it can potentially do multiple modification operations at the same time.
Well in theory that can happen if a module is loaded which creates a mapp=
ing
while another one is unloaded at the same time. The time window is pretty n=
arrow,
but still present nonetheless. That's why I chose to use the concurrent ver=
sion.
>=20
> Still, cool that you used it ;-)
Yep, looked like what was needed until I realized it was not available in
mainline. Nice work though and good paper for explaining it all.
Sebastien.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-25 8:27 ` Benjamin Herrenschmidt
@ 2008-07-25 8:36 ` Sebastien Dugue
2008-07-25 8:40 ` Benjamin Herrenschmidt
0 siblings, 1 reply; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-25 8:36 UTC (permalink / raw)
To: benh
Cc: Nick Piggin, Tim Chavez, Linux-rt, Peter Zijlstra, linux-kernel,
Jean Pierre Dion, linux-ppc, Paul Mackerras, Gilles Carry
On Fri, 25 Jul 2008 18:27:20 +1000 Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> On Fri, 2008-07-25 at 09:49 +0200, Peter Zijlstra wrote:
> >
> >
> > The only advantage of the concurrent radix tree over this model is that
> > it can potentially do multiple modification operations at the same time.
>
> Yup, we do not need that for the irq revmap... concurrent lookup is all we need.
>
Shouldn't we care about concurrent insertion and deletion in the tree? I agree
that concern might be a bit artificial but in theory that can happen.
Sebastien.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-25 8:36 ` Sebastien Dugue
@ 2008-07-25 8:40 ` Benjamin Herrenschmidt
2008-07-25 8:47 ` Sebastien Dugue
0 siblings, 1 reply; 13+ messages in thread
From: Benjamin Herrenschmidt @ 2008-07-25 8:40 UTC (permalink / raw)
To: Sebastien Dugue
Cc: Nick Piggin, Tim Chavez, Linux-rt, Peter Zijlstra, linux-kernel,
Jean Pierre Dion, linux-ppc, Paul Mackerras, Gilles Carry
On Fri, 2008-07-25 at 10:36 +0200, Sebastien Dugue wrote:
> On Fri, 25 Jul 2008 18:27:20 +1000 Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
>
> > On Fri, 2008-07-25 at 09:49 +0200, Peter Zijlstra wrote:
> > >
> > >
> > > The only advantage of the concurrent radix tree over this model is that
> > > it can potentially do multiple modification operations at the same time.
> >
> > Yup, we do not need that for the irq revmap... concurrent lookup is all we need.
> >
>
> Shouldn't we care about concurrent insertion and deletion in the tree? I agree
> that concern might be a bit artificial but in theory that can happen.
Yes, we just need to protect it with a big hammer, like a spinlock, it's
not a performance critical code path.
Ben.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless
2008-07-25 8:40 ` Benjamin Herrenschmidt
@ 2008-07-25 8:47 ` Sebastien Dugue
0 siblings, 0 replies; 13+ messages in thread
From: Sebastien Dugue @ 2008-07-25 8:47 UTC (permalink / raw)
To: benh
Cc: Nick Piggin, Tim Chavez, Linux-rt, Peter Zijlstra, linux-kernel,
Jean Pierre Dion, linux-ppc, Paul Mackerras, Gilles Carry
On Fri, 25 Jul 2008 18:40:21 +1000 Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> On Fri, 2008-07-25 at 10:36 +0200, Sebastien Dugue wrote:
> > On Fri, 25 Jul 2008 18:27:20 +1000 Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> >
> > > On Fri, 2008-07-25 at 09:49 +0200, Peter Zijlstra wrote:
> > > >
> > > >
> > > > The only advantage of the concurrent radix tree over this model is that
> > > > it can potentially do multiple modification operations at the same time.
> > >
> > > Yup, we do not need that for the irq revmap... concurrent lookup is all we need.
> > >
> >
> > Shouldn't we care about concurrent insertion and deletion in the tree? I agree
> > that concern might be a bit artificial but in theory that can happen.
>
> Yes, we just need to protect it with a big hammer, like a spinlock, it's
> not a performance critical code path.
Agreed. Will look into this in the next few days.
Thanks,
Sebastien.
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2008-07-25 8:47 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-24 10:23 [PATCH 0/2][RT] powerpc - fix bug in irq reverse mapping radix tree (Resend) Sebastien Dugue
2008-07-24 10:48 ` [PATCH 1/2][RT] powerpc - XICS: move the call to irq_radix_revmap from xics_startup to xics_host_map Sebastien Dugue
2008-07-24 10:50 ` [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless Sebastien Dugue
2008-07-24 11:11 ` Nick Piggin
2008-07-24 12:18 ` Sebastien Dugue
2008-07-25 7:49 ` Peter Zijlstra
2008-07-25 8:27 ` Benjamin Herrenschmidt
2008-07-25 8:36 ` Sebastien Dugue
2008-07-25 8:40 ` Benjamin Herrenschmidt
2008-07-25 8:47 ` Sebastien Dugue
2008-07-25 8:34 ` Sebastien Dugue
2008-07-25 5:13 ` Benjamin Herrenschmidt
-- strict thread matches above, loose matches on Subject: below --
2008-07-23 14:59 [PATCH 0/2][RT] powerpc - fix bug in irq reverse mapping radix tree Sebastien Dugue
2008-07-23 15:01 ` [PATCH 2/2][RT] powerpc - Make the irq reverse mapping radix tree lockless Sebastien Dugue
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).