* [PATCH 5/5] irq: move irq_desc according to smp_affinity v5
@ 2008-12-06 3:00 Yinghai Lu
2008-12-08 13:42 ` Ingo Molnar
0 siblings, 1 reply; 6+ messages in thread
From: Yinghai Lu @ 2008-12-06 3:00 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel, Yinghai Lu
impact: new feature move irq_desc with sparseirq
if CONFIG_MOVE_IRQ_DESC is set
make irq_desc to go with affinity aka irq_desc moving etc
call move_irq_desc in irq_complete_move()
legacy irq_desc is not moved, because they are allocated via static array
v3: add calling to irq_to_desc after calling ack/eoi instead of passing desc
for logical apic mode, need to add move_desc_in_progress_in_same_domain. otherwise it will not get moved. ==> also could need two phase to get irq_desc moved.
for example: 0xff is old affinity, and need to set 0xf, and then set to 0xf0.
[ or we need to change domain definition to cpus on the same node ? ]
LBSuse:~ # cat /proc/irq/22/smp_affinity
00000000,00000000,00000000,000000ff
LBSuse:~ # echo f > /proc/irq/22/smp_affinity
LBSuse:~ # cat /proc/irq/22/smp_affinity
00000000,00000000,00000000,0000000f
LBSuse:~ # tail /var/log/messages
...
Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started.
Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present
LBSuse:~ # echo f0 > /proc/irq/22/smp_affinity
LBSuse:~ # tail /var/log/messages
Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started.
Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present
Oct 27 12:36:46 LBSuse kernel: move irq_desc for 22 aka 0x16 to cpu 7 node 1
Oct 27 12:36:46 LBSuse kernel: alloc kstat_irqs on cpu 7 node 1
Oct 27 12:36:46 LBSuse kernel: alloc irq_cfg on cpu 7 node 1
Oct 27 12:36:46 LBSuse kernel: alloc irq_2_pin on cpu 7 node 1
so assume the user space program should update /proc/irq/XX/smp_affinity to 03 or 0f at first on boot
or we change irq_default_affinity ?
for physical apic is much simple
on 4 sockets 16 cores system
irq_desc is moving..
when
# echo 10 > /proc/irq/134483967/smp_affinity
# echo 100 > /proc/irq/134483967/smp_affinity
# echo 1000 > /proc/irq/134483967/smp_affinity
got
Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1
Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1
Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1
Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2
Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2
Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2
Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3
Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3
Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/Kconfig | 9 ++
arch/x86/kernel/io_apic.c | 143 +++++++++++++++++++++++++++++++++++++++++++++-
kernel/irq/chip.c | 30 +++++++++
kernel/irq/handle.c | 124 +++++++++++++++++++++++++++++++++++++++
4 files changed, 301 insertions(+), 5 deletions(-)
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -253,6 +253,15 @@ config SPARSE_IRQ
If you don't know what to do here, say Y.
+config MOVE_IRQ_DESC
+ bool "Move irq desc when changing irq smp_affinity"
+ depends on SPARSE_IRQ && SMP
+ default y
+ help
+ This enables moving irq_desc to cpu/node that irq will use handled.
+
+ If you don't know what to do here, say Y.
+
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
Index: linux-2.6/arch/x86/kernel/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6/arch/x86/kernel/io_apic.c
@@ -141,6 +141,9 @@ struct irq_cfg {
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
+#ifdef CONFIG_MOVE_IRQ_DESC
+ u8 move_desc_in_progress_in_same_domain : 1;
+#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -223,6 +226,122 @@ void arch_init_chip_data(struct irq_desc
}
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+
+static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg,
+ int cpu)
+{
+ struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+ cfg->irq_2_pin = NULL;
+ old_entry = old_cfg->irq_2_pin;
+ if (!old_entry)
+ return;
+
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry)
+ return;
+
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ head = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+
+ while (old_entry) {
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ entry = head;
+ while (entry) {
+ head = entry->next;
+ kfree(entry);
+ entry = head;
+ }
+ /* still use the old one */
+ return;
+ }
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ tail->next = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+ }
+
+ tail->next = NULL;
+ cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry, *next;
+
+ if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+ return;
+
+ entry = old_cfg->irq_2_pin;
+
+ while (entry) {
+ next = entry->next;
+ kfree(entry);
+ entry = next;
+ }
+ old_cfg->irq_2_pin = NULL;
+}
+
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu)
+{
+ struct irq_cfg *cfg;
+ struct irq_cfg *old_cfg;
+
+ cfg = get_one_free_irq_cfg(cpu);
+
+ if (!cfg)
+ return;
+
+ desc->chip_data = cfg;
+
+ old_cfg = old_desc->chip_data;
+
+ memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+ init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+ kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ struct irq_cfg *old_cfg, *cfg;
+
+ old_cfg = old_desc->chip_data;
+ cfg = desc->chip_data;
+
+ if (old_cfg == cfg)
+ return;
+
+ if (old_cfg) {
+ free_irq_2_pin(old_cfg, cfg);
+ free_irq_cfg(old_cfg);
+ old_desc->chip_data = NULL;
+ }
+}
+
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+ struct irq_cfg *cfg = desc->chip_data;
+
+ if (!cfg->move_in_progress) {
+ /* it means that domain is not changed */
+ if (!cpus_intersects(desc->affinity, mask))
+ cfg->move_desc_in_progress_in_same_domain = 1;
+ }
+}
+#endif
+
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
{
@@ -231,9 +350,11 @@ static struct irq_cfg *irq_cfg(unsigned
#endif
+#ifndef CONFIG_MOVE_IRQ_DESC
static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
{
}
+#endif
struct io_apic {
unsigned int index;
@@ -2346,14 +2467,34 @@ static void irq_complete_move(struct irq
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress))
+ if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_MOVE_IRQ_DESC
+ if (likely(!cfg->move_desc_in_progress_in_same_domain))
+ return;
+
+ /* domain is not change, but affinity is changed */
+ me = smp_processor_id();
+ if (cpu_isset(me, desc->affinity)) {
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+ cfg->move_desc_in_progress_in_same_domain = 0;
+ }
+#endif
return;
+ }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
cpumask_t cleanup_mask;
+#ifdef CONFIG_MOVE_IRQ_DESC
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+#endif
+
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -90,6 +90,32 @@ static void init_kstat_irqs(struct irq_d
desc->kstat_irqs = (unsigned int *)ptr;
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc,
+ int cpu, int nr)
+{
+ unsigned long bytes;
+
+ init_kstat_irqs(desc, cpu, nr);
+
+ if (desc->kstat_irqs != old_desc->kstat_irqs) {
+ /* Compute how many bytes we need per irq and allocate them */
+ bytes = nr * sizeof(unsigned int);
+
+ memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
+ }
+}
+
+static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ if (old_desc->kstat_irqs == desc->kstat_irqs)
+ return;
+
+ kfree(old_desc->kstat_irqs);
+ old_desc->kstat_irqs = NULL;
+}
+#endif
+
void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
{
}
@@ -110,6 +136,23 @@ static void init_one_irq_desc(int irq, s
arch_init_chip_data(desc, cpu);
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu)
+{
+ memcpy(desc, old_desc, sizeof(struct irq_desc));
+ desc->cpu = cpu;
+ lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+ arch_init_copy_chip_data(old_desc, desc, cpu);
+}
+
+static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ free_kstat_irqs(old_desc, desc);
+ arch_free_chip_data(old_desc, desc);
+}
+#endif
/*
* Protect the sparse_irqs:
*/
@@ -203,6 +246,73 @@ out_unlock:
return desc;
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
+ int cpu)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+ unsigned long flags;
+ int node;
+
+ irq = old_desc->irq;
+
+ spin_lock_irqsave(&sparse_irq_lock, flags);
+
+ /* We have to check it to avoid races with another CPU */
+ desc = irq_desc_ptrs[irq];
+
+ if (desc && old_desc != desc)
+ goto out_unlock;
+
+ node = cpu_to_node(cpu);
+ desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
+ irq, cpu, node);
+ if (!desc) {
+ printk(KERN_ERR "can not get new irq_desc for moving\n");
+ /* still use old one */
+ desc = old_desc;
+ goto out_unlock;
+ }
+ init_copy_one_irq_desc(irq, old_desc, desc, cpu);
+
+ irq_desc_ptrs[irq] = desc;
+
+ /* free the old one */
+ free_one_irq_desc(old_desc, desc);
+ kfree(old_desc);
+
+out_unlock:
+ spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+ return desc;
+}
+
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+{
+ int old_cpu;
+ int node, old_node;
+
+ /* those all static, do move them */
+ if (desc->irq < NR_IRQS_LEGACY)
+ return desc;
+
+ old_cpu = desc->cpu;
+ printk(KERN_DEBUG "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
+ if (old_cpu != cpu) {
+ node = cpu_to_node(cpu);
+ old_node = cpu_to_node(old_cpu);
+ if (old_node != node)
+ desc = __real_move_irq_desc(desc, cpu);
+ else
+ desc->cpu = cpu;
+ }
+
+ return desc;
+}
+#endif
+
#else
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
@@ -337,8 +447,13 @@ unsigned int __do_IRQ(unsigned int irq)
/*
* No locking required for CPU-local interrupts:
*/
- if (desc->chip->ack)
+ if (desc->chip->ack) {
desc->chip->ack(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
if (likely(!(desc->status & IRQ_DISABLED))) {
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
@@ -349,8 +464,13 @@ unsigned int __do_IRQ(unsigned int irq)
}
spin_lock(&desc->lock);
- if (desc->chip->ack)
+ if (desc->chip->ack) {
desc->chip->ack(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
/*
* REPLAY is when Linux resends an IRQ that was dropped earlier
* WAITING is used by probe to mark irqs that are being tested
Index: linux-2.6/kernel/irq/chip.c
===================================================================
--- linux-2.6.orig/kernel/irq/chip.c
+++ linux-2.6/kernel/irq/chip.c
@@ -354,6 +354,10 @@ handle_level_irq(unsigned int irq, struc
spin_lock(&desc->lock);
mask_ack_irq(desc, irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
if (unlikely(desc->status & IRQ_INPROGRESS))
goto out_unlock;
@@ -431,6 +435,10 @@ handle_fasteoi_irq(unsigned int irq, str
desc->status &= ~IRQ_INPROGRESS;
out:
desc->chip->eoi(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
spin_unlock(&desc->lock);
}
@@ -467,12 +475,20 @@ handle_edge_irq(unsigned int irq, struct
!desc->action)) {
desc->status |= (IRQ_PENDING | IRQ_MASKED);
mask_ack_irq(desc, irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
goto out_unlock;
}
kstat_incr_irqs_this_cpu(irq, desc);
/* Start handling the irq */
desc->chip->ack(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
/* Mark the IRQ currently in progress.*/
desc->status |= IRQ_INPROGRESS;
@@ -533,8 +549,13 @@ handle_percpu_irq(unsigned int irq, stru
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- if (desc->chip->eoi)
+ if (desc->chip->eoi) {
desc->chip->eoi(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
}
void
@@ -569,8 +590,13 @@ __set_irq_handler(unsigned int irq, irq_
/* Uninstall? */
if (handle == handle_bad_irq) {
- if (desc->chip != &no_irq_chip)
+ if (desc->chip != &no_irq_chip) {
mask_ack_irq(desc, irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
desc->status |= IRQ_DISABLED;
desc->depth = 1;
}
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 2008-12-06 3:00 [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 Yinghai Lu @ 2008-12-08 13:42 ` Ingo Molnar 2008-12-08 19:18 ` Yinghai Lu 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu 0 siblings, 2 replies; 6+ messages in thread From: Ingo Molnar @ 2008-12-08 13:42 UTC (permalink / raw) To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel * Yinghai Lu <yinghai@kernel.org> wrote: > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > > spin_unlock(&desc->lock); > } > @@ -467,12 +475,20 @@ handle_edge_irq(unsigned int irq, struct > !desc->action)) { > desc->status |= (IRQ_PENDING | IRQ_MASKED); > mask_ack_irq(desc, irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > goto out_unlock; > } > kstat_incr_irqs_this_cpu(irq, desc); > > /* Start handling the irq */ > desc->chip->ack(irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > > /* Mark the IRQ currently in progress.*/ > desc->status |= IRQ_INPROGRESS; > @@ -533,8 +549,13 @@ handle_percpu_irq(unsigned int irq, stru > if (!noirqdebug) > note_interrupt(irq, desc, action_ret); > > - if (desc->chip->eoi) > + if (desc->chip->eoi) { > desc->chip->eoi(irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > + } > } > > void > @@ -569,8 +590,13 @@ __set_irq_handler(unsigned int irq, irq_ > > /* Uninstall? */ > if (handle == handle_bad_irq) { > - if (desc->chip != &no_irq_chip) > + if (desc->chip != &no_irq_chip) { > mask_ack_irq(desc, irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif this patch adds a ton of #ifdefs to important .c files, which could all have been avoided by introducing a new method: desc = irq_remap_to_desc(irq, desc); which would do something like: static struct irq_desc * irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) { #ifdef CONFIG_MOVE_IRQ_DESC return irq_to_desc(irq); #else return desc; #endif } right? Ingo ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 2008-12-08 13:42 ` Ingo Molnar @ 2008-12-08 19:18 ` Yinghai Lu 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu 1 sibling, 0 replies; 6+ messages in thread From: Yinghai Lu @ 2008-12-08 19:18 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel On Mon, Dec 8, 2008 at 5:42 AM, Ingo Molnar <mingo@elte.hu> wrote: > >> mask_ack_irq(desc, irq); >> +#ifdef CONFIG_MOVE_IRQ_DESC >> + /* get new one */ >> + desc = irq_to_desc(irq); >> +#endif > > this patch adds a ton of #ifdefs to important .c files, which could all > have been avoided by introducing a new method: > > desc = irq_remap_to_desc(irq, desc); > > which would do something like: > > static struct irq_desc * > irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) > { > #ifdef CONFIG_MOVE_IRQ_DESC > return irq_to_desc(irq); > #else > return desc; > #endif > } > > right? yes. will work on it. YH ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] irq: move irq_desc according to smp_affinity v6 2008-12-08 13:42 ` Ingo Molnar 2008-12-08 19:18 ` Yinghai Lu @ 2008-12-08 22:07 ` Yinghai Lu 2008-12-09 3:41 ` Ingo Molnar 1 sibling, 1 reply; 6+ messages in thread From: Yinghai Lu @ 2008-12-08 22:07 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel impact: new feature move irq_desc with sparseirq if CONFIG_MOVE_IRQ_DESC is set make irq_desc to go with affinity aka irq_desc moving etc call move_irq_desc in irq_complete_move() legacy irq_desc is not moved, because they are allocated via static array v3: add calling to irq_to_desc after calling ack/eoi instead of passing desc v6: use irq_remap_to_desc to avoid some #ifdef according to Ingo for logical apic mode, need to add move_desc_in_progress_in_same_domain. otherwise it will not get moved. ==> also could need two phase to get irq_desc moved. for example: 0xff is old affinity, and need to set 0xf, and then set to 0xf0. [ or we need to change domain definition to cpus on the same node ? ] LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,000000ff LBSuse:~ # echo f > /proc/irq/22/smp_affinity LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,0000000f LBSuse:~ # tail /var/log/messages ... Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present LBSuse:~ # echo f0 > /proc/irq/22/smp_affinity LBSuse:~ # tail /var/log/messages Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present Oct 27 12:36:46 LBSuse kernel: move irq_desc for 22 aka 0x16 to cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc kstat_irqs on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_cfg on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_2_pin on cpu 7 node 1 so assume the user space program should update /proc/irq/XX/smp_affinity to 03 or 0f at first on boot or we change irq_default_affinity ? for physical apic is much simple on 4 sockets 16 cores system irq_desc is moving.. when # echo 10 > /proc/irq/134483967/smp_affinity # echo 100 > /proc/irq/134483967/smp_affinity # echo 1000 > /proc/irq/134483967/smp_affinity got Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1 Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2 Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3 Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 9 ++ arch/x86/kernel/io_apic.c | 143 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/irq.h | 10 +++ kernel/irq/chip.c | 12 +++ kernel/irq/handle.c | 119 +++++++++++++++++++++++++++++++++++++- 5 files changed, 288 insertions(+), 5 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -253,6 +253,15 @@ config SPARSE_IRQ If you don't know what to do here, say Y. +config MOVE_IRQ_DESC + bool "Move irq desc when changing irq smp_affinity" + depends on SPARSE_IRQ && SMP + default y + help + This enables moving irq_desc to cpu/node that irq will use handled. + + If you don't know what to do here, say Y. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Index: linux-2.6/arch/x86/kernel/io_apic.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/io_apic.c +++ linux-2.6/arch/x86/kernel/io_apic.c @@ -141,6 +141,9 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; +#ifdef CONFIG_MOVE_IRQ_DESC + u8 move_desc_in_progress_in_same_domain : 1; +#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -223,6 +226,122 @@ void arch_init_chip_data(struct irq_desc } } +#ifdef CONFIG_MOVE_IRQ_DESC + +static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, + int cpu) +{ + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } + + tail->next = NULL; + cfg->irq_2_pin = head; +} + +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; + + entry = old_cfg->irq_2_pin; + + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; +} + +void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; + + cfg = get_one_free_irq_cfg(cpu); + + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); +} + +static void free_irq_cfg(struct irq_cfg *old_cfg) +{ + kfree(old_cfg); +} + +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +{ + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } +} + +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_in_progress_in_same_domain = 1; + } +} +#endif + #else static struct irq_cfg *irq_cfg(unsigned int irq) { @@ -231,9 +350,11 @@ static struct irq_cfg *irq_cfg(unsigned #endif +#ifndef CONFIG_MOVE_IRQ_DESC static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } +#endif struct io_apic { unsigned int index; @@ -2346,14 +2467,34 @@ static void irq_complete_move(struct irq struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { +#ifdef CONFIG_MOVE_IRQ_DESC + if (likely(!cfg->move_desc_in_progress_in_same_domain)) + return; + + /* domain is not change, but affinity is changed */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_in_progress_in_same_domain = 0; + } +#endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; +#ifdef CONFIG_MOVE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +#endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); Index: linux-2.6/kernel/irq/handle.c =================================================================== --- linux-2.6.orig/kernel/irq/handle.c +++ linux-2.6/kernel/irq/handle.c @@ -90,6 +90,32 @@ static void init_kstat_irqs(struct irq_d desc->kstat_irqs = (unsigned int *)ptr; } +#ifdef CONFIG_MOVE_IRQ_DESC +static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, + int cpu, int nr) +{ + unsigned long bytes; + + init_kstat_irqs(desc, cpu, nr); + + if (desc->kstat_irqs != old_desc->kstat_irqs) { + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); + } +} + +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) +{ + if (old_desc->kstat_irqs == desc->kstat_irqs) + return; + + kfree(old_desc->kstat_irqs); + old_desc->kstat_irqs = NULL; +} +#endif + void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) { } @@ -110,6 +136,23 @@ static void init_one_irq_desc(int irq, s arch_init_chip_data(desc, cpu); } +#ifdef CONFIG_MOVE_IRQ_DESC +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + memcpy(desc, old_desc, sizeof(struct irq_desc)); + desc->cpu = cpu; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + arch_init_copy_chip_data(old_desc, desc, cpu); +} + +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) +{ + free_kstat_irqs(old_desc, desc); + arch_free_chip_data(old_desc, desc); +} +#endif /* * Protect the sparse_irqs: */ @@ -203,6 +246,73 @@ out_unlock: return desc; } +#ifdef CONFIG_MOVE_IRQ_DESC +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, + int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long flags; + int node; + + irq = old_desc->irq; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + + if (desc && old_desc != desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not get new irq_desc for moving\n"); + /* still use old one */ + desc = old_desc; + goto out_unlock; + } + init_copy_one_irq_desc(irq, old_desc, desc, cpu); + + irq_desc_ptrs[irq] = desc; + + /* free the old one */ + free_one_irq_desc(old_desc, desc); + kfree(old_desc); + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +{ + int old_cpu; + int node, old_node; + + /* those all static, do move them */ + if (desc->irq < NR_IRQS_LEGACY) + return desc; + + old_cpu = desc->cpu; + printk(KERN_DEBUG "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); + if (old_cpu != cpu) { + node = cpu_to_node(cpu); + old_node = cpu_to_node(old_cpu); + if (old_node != node) + desc = __real_move_irq_desc(desc, cpu); + else + desc->cpu = cpu; + } + + return desc; +} +#endif + #else struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { @@ -337,8 +447,11 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + /* get new one */ + desc = irq_remap_to_desc(irq, desc); + } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -349,8 +462,10 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); + } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested Index: linux-2.6/kernel/irq/chip.c =================================================================== --- linux-2.6.orig/kernel/irq/chip.c +++ linux-2.6/kernel/irq/chip.c @@ -354,6 +354,7 @@ handle_level_irq(unsigned int irq, struc spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -431,6 +432,7 @@ handle_fasteoi_irq(unsigned int irq, str desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -467,12 +469,14 @@ handle_edge_irq(unsigned int irq, struct !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -533,8 +537,10 @@ handle_percpu_irq(unsigned int irq, stru if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@ -569,8 +575,10 @@ __set_irq_handler(unsigned int irq, irq_ /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } Index: linux-2.6/include/linux/irq.h =================================================================== --- linux-2.6.orig/include/linux/irq.h +++ linux-2.6/include/linux/irq.h @@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(st #endif +static inline struct irq_desc * +irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_MOVE_IRQ_DESC + return irq_to_desc(irq); +#else + return desc; +#endif +} + /* * Migration helpers for obsolete names, they will go away: */ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] irq: move irq_desc according to smp_affinity v6 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu @ 2008-12-09 3:41 ` Ingo Molnar 2008-12-11 8:15 ` [PATCH] irq: move irq_desc according to smp_affinity v7 Yinghai Lu 0 siblings, 1 reply; 6+ messages in thread From: Ingo Molnar @ 2008-12-09 3:41 UTC (permalink / raw) To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel * Yinghai Lu <yinghai@kernel.org> wrote: > for physical apic is much simple > on 4 sockets 16 cores system > irq_desc is moving.. > when > # echo 10 > /proc/irq/134483967/smp_affinity > # echo 100 > /proc/irq/134483967/smp_affinity > # echo 1000 > /proc/irq/134483967/smp_affinity > got > Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1 > Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1 > Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1 > Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2 > Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2 > Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2 > Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3 > Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3 > Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3 > > Signed-off-by: Yinghai Lu <yinghai@kernel.org> Neat feature! i'm wondering, have you tried to characterise the cost savings of moving the irq desc? It will certainly save three heavy cross-NUMA cachemisses on x86 per rare irq source. A way to attempt to measure this would be to write some quick debug hack that prints the cycle count of one specific IRQ source, in do_IRQ(), from the entry of do_IRQ() to the exit of do_IRQ(), using rdtscl(). Pick an IRQ that you can trigger arbitrarily, and printk the cycle cost at the end of do_IRQ(). [if irq == your_debug_irq - otherwise you can get a lot of printks and not too good measurements]. plus perhaps add some quick hack that makes the irq_desc/chip_data/kstat_irqs migration dependent on a sysctl, such as 'panic_timeout' (tunable via 'echo 1 > /proc/sys/kernel/panic'). Then you could try to trigger your debug IRQ and the cycle cost printk in two modes: echo 0 > /proc/sys/kernel/panic [ migrate the IRQ to another domain and trigger the IRQ - wait for the cycle printout. Both cache-cold and cache-hot numbers are interesting. ] echo 1 > /proc/sys/kernel/panic [ re-migrate the debug IRQ via /proc/irq/*/smp_affinity to make sure it's NUMA-local, then trigger the debug IRQ and record cache-cold and cache-hot cycle counts. ] it's hard to measure this reliably, as on x86 the numa factor is usually pretty low, so the local versus remote cachemiss cost is hard to separate. A few comments about the patch too: > +config MOVE_IRQ_DESC > + bool "Move irq desc when changing irq smp_affinity" > + depends on SPARSE_IRQ && SMP > + default y new feature - should be default-no. > + help > + This enables moving irq_desc to cpu/node that irq will use handled. > + > + If you don't know what to do here, say Y. Later on i think we should just select this in the NUMA case, instead of complicating the user's selection. It's OK to have it configurable now - should it cause problems. > + > config X86_FIND_SMP_CONFIG > def_bool y > depends on X86_MPPARSE || X86_VOYAGER > Index: linux-2.6/arch/x86/kernel/io_apic.c > =================================================================== > --- linux-2.6.orig/arch/x86/kernel/io_apic.c > +++ linux-2.6/arch/x86/kernel/io_apic.c > @@ -141,6 +141,9 @@ struct irq_cfg { > unsigned move_cleanup_count; > u8 vector; > u8 move_in_progress : 1; > +#ifdef CONFIG_MOVE_IRQ_DESC > + u8 move_desc_in_progress_in_same_domain : 1; > +#endif way too long field name - please rename to move_desc_pending or so. > @@ -223,6 +226,122 @@ void arch_init_chip_data(struct irq_desc > } > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > + > +static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, > + int cpu) > +{ small style nit, it's a tiny bit tidier to break the line the following way: static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) [ as this way we have all the parameters on a single line, and the return type stands out on a separate line. ] > + struct irq_pin_list *old_entry, *head, *tail, *entry; > + > + cfg->irq_2_pin = NULL; > + old_entry = old_cfg->irq_2_pin; > + if (!old_entry) > + return; > + > + entry = get_one_free_irq_2_pin(cpu); > + if (!entry) > + return; > + > + entry->apic = old_entry->apic; > + entry->pin = old_entry->pin; > + head = entry; > + tail = entry; > + old_entry = old_entry->next; for mass-initialization please try to structure it a bit: > + entry->apic = old_entry->apic; > + entry->pin = old_entry->pin; > + head = entry; > + tail = entry; > + > + old_entry = old_entry->next; it's much easier to validate such constructs. For example, once vertically aligned, i immediately saw an oddity in it - why is 'old_entry' initialized twice? > + > + while (old_entry) { > + entry = get_one_free_irq_2_pin(cpu); > + if (!entry) { > + entry = head; > + while (entry) { > + head = entry->next; > + kfree(entry); > + entry = head; > + } > + /* still use the old one */ > + return; > + } same here: > + entry->apic = old_entry->apic; > + entry->pin = old_entry->pin; > + tail->next = entry; > + tail = entry; > + old_entry = old_entry->next; > + } > + > + tail->next = NULL; > + cfg->irq_2_pin = head; > +} > + > +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) > +{ > + struct irq_pin_list *entry, *next; > + > + if (old_cfg->irq_2_pin == cfg->irq_2_pin) > + return; > + > + entry = old_cfg->irq_2_pin; > + > + while (entry) { > + next = entry->next; > + kfree(entry); > + entry = next; > + } > + old_cfg->irq_2_pin = NULL; > +} > + > +void arch_init_copy_chip_data(struct irq_desc *old_desc, > + struct irq_desc *desc, int cpu) > +{ > + struct irq_cfg *cfg; > + struct irq_cfg *old_cfg; > + > + cfg = get_one_free_irq_cfg(cpu); > + > + if (!cfg) > + return; > + > + desc->chip_data = cfg; > + > + old_cfg = old_desc->chip_data; > + > + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); > + > + init_copy_irq_2_pin(old_cfg, cfg, cpu); > +} > + > +static void free_irq_cfg(struct irq_cfg *old_cfg) > +{ > + kfree(old_cfg); > +} > + > +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) > +{ > + struct irq_cfg *old_cfg, *cfg; > + > + old_cfg = old_desc->chip_data; > + cfg = desc->chip_data; > + > + if (old_cfg == cfg) > + return; > + > + if (old_cfg) { > + free_irq_2_pin(old_cfg, cfg); > + free_irq_cfg(old_cfg); > + old_desc->chip_data = NULL; > + } > +} > + > +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) > +{ > + struct irq_cfg *cfg = desc->chip_data; > + > + if (!cfg->move_in_progress) { > + /* it means that domain is not changed */ > + if (!cpus_intersects(desc->affinity, mask)) > + cfg->move_desc_in_progress_in_same_domain = 1; > + } > +} > +#endif > + > #else > static struct irq_cfg *irq_cfg(unsigned int irq) > { > @@ -231,9 +350,11 @@ static struct irq_cfg *irq_cfg(unsigned > > #endif > > +#ifndef CONFIG_MOVE_IRQ_DESC > static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) > { > } > +#endif > > struct io_apic { > unsigned int index; > @@ -2346,14 +2467,34 @@ static void irq_complete_move(struct irq > struct irq_cfg *cfg = desc->chip_data; > unsigned vector, me; > > - if (likely(!cfg->move_in_progress)) > + if (likely(!cfg->move_in_progress)) { > +#ifdef CONFIG_MOVE_IRQ_DESC > + if (likely(!cfg->move_desc_in_progress_in_same_domain)) > + return; > + > + /* domain is not change, but affinity is changed */ > + me = smp_processor_id(); > + if (cpu_isset(me, desc->affinity)) { > + *descp = desc = move_irq_desc(desc, me); > + /* get the new one */ > + cfg = desc->chip_data; > + cfg->move_desc_in_progress_in_same_domain = 0; > + } > +#endif > return; > + } > > vector = ~get_irq_regs()->orig_ax; > me = smp_processor_id(); > if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { > cpumask_t cleanup_mask; > > +#ifdef CONFIG_MOVE_IRQ_DESC > + *descp = desc = move_irq_desc(desc, me); > + /* get the new one */ > + cfg = desc->chip_data; > +#endif > + > cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); > cfg->move_cleanup_count = cpus_weight(cleanup_mask); > send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); > Index: linux-2.6/kernel/irq/handle.c > =================================================================== > --- linux-2.6.orig/kernel/irq/handle.c > +++ linux-2.6/kernel/irq/handle.c > @@ -90,6 +90,32 @@ static void init_kstat_irqs(struct irq_d > desc->kstat_irqs = (unsigned int *)ptr; > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > +static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, > + int cpu, int nr) > +{ > + unsigned long bytes; > + > + init_kstat_irqs(desc, cpu, nr); > + > + if (desc->kstat_irqs != old_desc->kstat_irqs) { > + /* Compute how many bytes we need per irq and allocate them */ > + bytes = nr * sizeof(unsigned int); > + > + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); > + } > +} > + > +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) > +{ > + if (old_desc->kstat_irqs == desc->kstat_irqs) > + return; > + > + kfree(old_desc->kstat_irqs); > + old_desc->kstat_irqs = NULL; > +} > +#endif > + > void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) > { > } > @@ -110,6 +136,23 @@ static void init_one_irq_desc(int irq, s > arch_init_chip_data(desc, cpu); > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, > + struct irq_desc *desc, int cpu) > +{ > + memcpy(desc, old_desc, sizeof(struct irq_desc)); > + desc->cpu = cpu; > + lockdep_set_class(&desc->lock, &irq_desc_lock_class); > + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); > + arch_init_copy_chip_data(old_desc, desc, cpu); > +} > + > +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) > +{ > + free_kstat_irqs(old_desc, desc); > + arch_free_chip_data(old_desc, desc); > +} > +#endif > /* > * Protect the sparse_irqs: > */ > @@ -203,6 +246,73 @@ out_unlock: > return desc; > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, > + int cpu) > +{ > + struct irq_desc *desc; > + unsigned int irq; > + unsigned long flags; > + int node; > + > + irq = old_desc->irq; > + > + spin_lock_irqsave(&sparse_irq_lock, flags); > + > + /* We have to check it to avoid races with another CPU */ > + desc = irq_desc_ptrs[irq]; > + > + if (desc && old_desc != desc) > + goto out_unlock; > + > + node = cpu_to_node(cpu); > + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); > + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", > + irq, cpu, node); > + if (!desc) { > + printk(KERN_ERR "can not get new irq_desc for moving\n"); > + /* still use old one */ > + desc = old_desc; > + goto out_unlock; > + } > + init_copy_one_irq_desc(irq, old_desc, desc, cpu); > + > + irq_desc_ptrs[irq] = desc; > + > + /* free the old one */ > + free_one_irq_desc(old_desc, desc); > + kfree(old_desc); > + > +out_unlock: > + spin_unlock_irqrestore(&sparse_irq_lock, flags); > + > + return desc; > +} > + > +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) > +{ > + int old_cpu; > + int node, old_node; > + > + /* those all static, do move them */ > + if (desc->irq < NR_IRQS_LEGACY) > + return desc; > + > + old_cpu = desc->cpu; > + printk(KERN_DEBUG "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); > + if (old_cpu != cpu) { > + node = cpu_to_node(cpu); > + old_node = cpu_to_node(old_cpu); > + if (old_node != node) > + desc = __real_move_irq_desc(desc, cpu); > + else > + desc->cpu = cpu; > + } > + > + return desc; > +} > +#endif Still a bit too much of #ifdeffery for my taste in kernel/irq/*.c, we tend to have higher maintenance costs in files that have a lot of #ifdefs. Wouldnt it look neater if you introduced a new kernel/irq/numa_migrate.c function that would provide these methods, with the prototypes being #ifdef-ed to inlines in the !CONFIG_MOVE_IRQ_DESC case in kernel/irq/internals.h? i'd also suggest to rename the config option to the more descriptive: CONFIG_NUMA_MIGRATE_IRQ_DESC name. > /* > * No locking required for CPU-local interrupts: > */ > - if (desc->chip->ack) > + if (desc->chip->ack) { > desc->chip->ack(irq); > + /* get new one */ > + desc = irq_remap_to_desc(irq, desc); > + } thanks for fixing this - it looks much nicer now! Ingo ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] irq: move irq_desc according to smp_affinity v7 2008-12-09 3:41 ` Ingo Molnar @ 2008-12-11 8:15 ` Yinghai Lu 0 siblings, 0 replies; 6+ messages in thread From: Yinghai Lu @ 2008-12-11 8:15 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel impact: new feature move irq_desc with sparseirq if CONFIG_NUMA_MIGRATE_IRQ_DESC is set make irq_desc to go with affinity aka irq_desc moving etc call move_irq_desc in irq_complete_move() legacy irq_desc is not moved, because they are allocated via static array v3: add calling to irq_to_desc after calling ack/eoi instead of passing desc v6: use irq_remap_to_desc to avoid some #ifdef according to Ingo for logical apic mode, need to add move_desc_in_progress_in_same_domain. otherwise it will not get moved. ==> also could need two phase to get irq_desc moved. for example: 0xff is old affinity, and need to set 0xf, and then set to 0xf0. [ or we need to change domain definition to cpus on the same node ? ] LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,000000ff LBSuse:~ # echo f > /proc/irq/22/smp_affinity LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,0000000f LBSuse:~ # tail /var/log/messages ... Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present LBSuse:~ # echo f0 > /proc/irq/22/smp_affinity LBSuse:~ # tail /var/log/messages Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present Oct 27 12:36:46 LBSuse kernel: move irq_desc for 22 aka 0x16 to cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc kstat_irqs on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_cfg on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_2_pin on cpu 7 node 1 so assume the user space program should update /proc/irq/XX/smp_affinity to 03 or 0f at first on boot or we change irq_default_affinity ? for physical apic is much simple on 4 sockets 16 cores system irq_desc is moving.. when # echo 10 > /proc/irq/134483967/smp_affinity # echo 100 > /proc/irq/134483967/smp_affinity # echo 1000 > /proc/irq/134483967/smp_affinity got Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1 Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2 Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3 Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 9 ++ arch/x86/kernel/io_apic.c | 142 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/irq.h | 10 +++ kernel/irq/Makefile | 1 kernel/irq/chip.c | 12 +++ kernel/irq/handle.c | 15 +++- kernel/irq/internals.h | 5 + kernel/irq/numa_migrate.c | 125 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 311 insertions(+), 8 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -253,6 +253,15 @@ config SPARSE_IRQ If you don't know what to do here, say Y. +config NUMA_MIGRATE_IRQ_DESC + bool "Move irq desc when changing irq smp_affinity" + depends on SPARSE_IRQ && SMP + default n + help + This enables moving irq_desc to cpu/node that irq will use handled. + + If you don't know what to do here, say N. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Index: linux-2.6/arch/x86/kernel/io_apic.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/io_apic.c +++ linux-2.6/arch/x86/kernel/io_apic.c @@ -141,6 +141,9 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + u8 move_desc_pending : 1; +#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc } } +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + +static void +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +{ + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } + + tail->next = NULL; + cfg->irq_2_pin = head; +} + +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; + + entry = old_cfg->irq_2_pin; + + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; +} + +void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; + + cfg = get_one_free_irq_cfg(cpu); + + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); +} + +static void free_irq_cfg(struct irq_cfg *old_cfg) +{ + kfree(old_cfg); +} + +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +{ + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } +} + +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_pending = 1; + } +} +#endif + #else static struct irq_cfg *irq_cfg(unsigned int irq) { @@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned #endif +#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } +#endif struct io_apic { unsigned int index; @@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + if (likely(!cfg->move_desc_pending)) + return; + + /* domain is not change, but affinity is changed */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_pending = 0; + } +#endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +#endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); Index: linux-2.6/kernel/irq/handle.c =================================================================== --- linux-2.6.orig/kernel/irq/handle.c +++ linux-2.6/kernel/irq/handle.c @@ -23,7 +23,7 @@ /* * lockdep: we want to handle all irq_desc locks as a single lock-class: */ -static struct lock_class_key irq_desc_lock_class; +struct lock_class_key irq_desc_lock_class; /** * handle_bad_irq - handle spurious and unhandled irqs @@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = { #endif }; -static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) { unsigned long bytes; char *ptr; @@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, s /* * Protect the sparse_irqs: */ -static DEFINE_SPINLOCK(sparse_irq_lock); +DEFINE_SPINLOCK(sparse_irq_lock); struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; @@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + /* get new one */ + desc = irq_remap_to_desc(irq, desc); + } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); + } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested Index: linux-2.6/kernel/irq/chip.c =================================================================== --- linux-2.6.orig/kernel/irq/chip.c +++ linux-2.6/kernel/irq/chip.c @@ -354,6 +354,7 @@ handle_level_irq(unsigned int irq, struc spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -431,6 +432,7 @@ handle_fasteoi_irq(unsigned int irq, str desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -467,12 +469,14 @@ handle_edge_irq(unsigned int irq, struct !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -533,8 +537,10 @@ handle_percpu_irq(unsigned int irq, stru if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@ -569,8 +575,10 @@ __set_irq_handler(unsigned int irq, irq_ /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } Index: linux-2.6/include/linux/irq.h =================================================================== --- linux-2.6.orig/include/linux/irq.h +++ linux-2.6/include/linux/irq.h @@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(st #endif +static inline struct irq_desc * +irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + return irq_to_desc(irq); +#else + return desc; +#endif +} + /* * Migration helpers for obsolete names, they will go away: */ Index: linux-2.6/kernel/irq/numa_migrate.c =================================================================== --- /dev/null +++ linux-2.6/kernel/irq/numa_migrate.c @@ -0,0 +1,127 @@ +/* + * linux/kernel/irq/handle.c + * + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the core interrupt handling code. + * + * Detailed information is available in Documentation/DocBook/genericirq + * + */ + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> + +#include "internals.h" + +static void init_copy_kstat_irqs(struct irq_desc *old_desc, + struct irq_desc *desc, + int cpu, int nr) +{ + unsigned long bytes; + + init_kstat_irqs(desc, cpu, nr); + + if (desc->kstat_irqs != old_desc->kstat_irqs) { + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); + } +} + +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) +{ + if (old_desc->kstat_irqs == desc->kstat_irqs) + return; + + kfree(old_desc->kstat_irqs); + old_desc->kstat_irqs = NULL; +} + +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + memcpy(desc, old_desc, sizeof(struct irq_desc)); + desc->cpu = cpu; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + arch_init_copy_chip_data(old_desc, desc, cpu); +} + +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) +{ + free_kstat_irqs(old_desc, desc); + arch_free_chip_data(old_desc, desc); +} + +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, + int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long flags; + int node; + + irq = old_desc->irq; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + + if (desc && old_desc != desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not get new irq_desc for moving\n"); + /* still use old one */ + desc = old_desc; + goto out_unlock; + } + init_copy_one_irq_desc(irq, old_desc, desc, cpu); + + irq_desc_ptrs[irq] = desc; + + /* free the old one */ + free_one_irq_desc(old_desc, desc); + kfree(old_desc); + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +{ + int old_cpu; + int node, old_node; + + /* those all static, do move them */ + if (desc->irq < NR_IRQS_LEGACY) + return desc; + + old_cpu = desc->cpu; + printk(KERN_DEBUG + "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); + if (old_cpu != cpu) { + node = cpu_to_node(cpu); + old_node = cpu_to_node(old_cpu); + if (old_node != node) + desc = __real_move_irq_desc(desc, cpu); + else + desc->cpu = cpu; + } + + return desc; +} + Index: linux-2.6/kernel/irq/Makefile =================================================================== --- linux-2.6.orig/kernel/irq/Makefile +++ linux-2.6/kernel/irq/Makefile @@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o re obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o +obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o Index: linux-2.6/kernel/irq/internals.h =================================================================== --- linux-2.6.orig/kernel/irq/internals.h +++ linux-2.6/kernel/irq/internals.h @@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern struct lock_class_key irq_desc_lock_class; +extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern spinlock_t sparse_irq_lock; +extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; + #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void register_handler_proc(unsigned int irq, struct irqaction *action); ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-12-11 8:16 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-12-06 3:00 [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 Yinghai Lu 2008-12-08 13:42 ` Ingo Molnar 2008-12-08 19:18 ` Yinghai Lu 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu 2008-12-09 3:41 ` Ingo Molnar 2008-12-11 8:15 ` [PATCH] irq: move irq_desc according to smp_affinity v7 Yinghai Lu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox