* [PATCH 5/5] irq: move irq_desc according to smp_affinity v5
@ 2008-12-06 3:00 Yinghai Lu
2008-12-08 13:42 ` Ingo Molnar
0 siblings, 1 reply; 6+ messages in thread
From: Yinghai Lu @ 2008-12-06 3:00 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Andrew Morton
Cc: linux-kernel, Yinghai Lu
impact: new feature move irq_desc with sparseirq
if CONFIG_MOVE_IRQ_DESC is set
make irq_desc to go with affinity aka irq_desc moving etc
call move_irq_desc in irq_complete_move()
legacy irq_desc is not moved, because they are allocated via static array
v3: add calling to irq_to_desc after calling ack/eoi instead of passing desc
for logical apic mode, need to add move_desc_in_progress_in_same_domain. otherwise it will not get moved. ==> also could need two phase to get irq_desc moved.
for example: 0xff is old affinity, and need to set 0xf, and then set to 0xf0.
[ or we need to change domain definition to cpus on the same node ? ]
LBSuse:~ # cat /proc/irq/22/smp_affinity
00000000,00000000,00000000,000000ff
LBSuse:~ # echo f > /proc/irq/22/smp_affinity
LBSuse:~ # cat /proc/irq/22/smp_affinity
00000000,00000000,00000000,0000000f
LBSuse:~ # tail /var/log/messages
...
Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started.
Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present
LBSuse:~ # echo f0 > /proc/irq/22/smp_affinity
LBSuse:~ # tail /var/log/messages
Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started.
Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present
Oct 27 12:36:46 LBSuse kernel: move irq_desc for 22 aka 0x16 to cpu 7 node 1
Oct 27 12:36:46 LBSuse kernel: alloc kstat_irqs on cpu 7 node 1
Oct 27 12:36:46 LBSuse kernel: alloc irq_cfg on cpu 7 node 1
Oct 27 12:36:46 LBSuse kernel: alloc irq_2_pin on cpu 7 node 1
so assume the user space program should update /proc/irq/XX/smp_affinity to 03 or 0f at first on boot
or we change irq_default_affinity ?
for physical apic is much simple
on 4 sockets 16 cores system
irq_desc is moving..
when
# echo 10 > /proc/irq/134483967/smp_affinity
# echo 100 > /proc/irq/134483967/smp_affinity
# echo 1000 > /proc/irq/134483967/smp_affinity
got
Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1
Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1
Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1
Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2
Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2
Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2
Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3
Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3
Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
---
arch/x86/Kconfig | 9 ++
arch/x86/kernel/io_apic.c | 143 +++++++++++++++++++++++++++++++++++++++++++++-
kernel/irq/chip.c | 30 +++++++++
kernel/irq/handle.c | 124 +++++++++++++++++++++++++++++++++++++++
4 files changed, 301 insertions(+), 5 deletions(-)
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -253,6 +253,15 @@ config SPARSE_IRQ
If you don't know what to do here, say Y.
+config MOVE_IRQ_DESC
+ bool "Move irq desc when changing irq smp_affinity"
+ depends on SPARSE_IRQ && SMP
+ default y
+ help
+ This enables moving irq_desc to cpu/node that irq will use handled.
+
+ If you don't know what to do here, say Y.
+
config X86_FIND_SMP_CONFIG
def_bool y
depends on X86_MPPARSE || X86_VOYAGER
Index: linux-2.6/arch/x86/kernel/io_apic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/io_apic.c
+++ linux-2.6/arch/x86/kernel/io_apic.c
@@ -141,6 +141,9 @@ struct irq_cfg {
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
+#ifdef CONFIG_MOVE_IRQ_DESC
+ u8 move_desc_in_progress_in_same_domain : 1;
+#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -223,6 +226,122 @@ void arch_init_chip_data(struct irq_desc
}
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+
+static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg,
+ int cpu)
+{
+ struct irq_pin_list *old_entry, *head, *tail, *entry;
+
+ cfg->irq_2_pin = NULL;
+ old_entry = old_cfg->irq_2_pin;
+ if (!old_entry)
+ return;
+
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry)
+ return;
+
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ head = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+
+ while (old_entry) {
+ entry = get_one_free_irq_2_pin(cpu);
+ if (!entry) {
+ entry = head;
+ while (entry) {
+ head = entry->next;
+ kfree(entry);
+ entry = head;
+ }
+ /* still use the old one */
+ return;
+ }
+ entry->apic = old_entry->apic;
+ entry->pin = old_entry->pin;
+ tail->next = entry;
+ tail = entry;
+ old_entry = old_entry->next;
+ }
+
+ tail->next = NULL;
+ cfg->irq_2_pin = head;
+}
+
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry, *next;
+
+ if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+ return;
+
+ entry = old_cfg->irq_2_pin;
+
+ while (entry) {
+ next = entry->next;
+ kfree(entry);
+ entry = next;
+ }
+ old_cfg->irq_2_pin = NULL;
+}
+
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu)
+{
+ struct irq_cfg *cfg;
+ struct irq_cfg *old_cfg;
+
+ cfg = get_one_free_irq_cfg(cpu);
+
+ if (!cfg)
+ return;
+
+ desc->chip_data = cfg;
+
+ old_cfg = old_desc->chip_data;
+
+ memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+
+ init_copy_irq_2_pin(old_cfg, cfg, cpu);
+}
+
+static void free_irq_cfg(struct irq_cfg *old_cfg)
+{
+ kfree(old_cfg);
+}
+
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ struct irq_cfg *old_cfg, *cfg;
+
+ old_cfg = old_desc->chip_data;
+ cfg = desc->chip_data;
+
+ if (old_cfg == cfg)
+ return;
+
+ if (old_cfg) {
+ free_irq_2_pin(old_cfg, cfg);
+ free_irq_cfg(old_cfg);
+ old_desc->chip_data = NULL;
+ }
+}
+
+static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
+{
+ struct irq_cfg *cfg = desc->chip_data;
+
+ if (!cfg->move_in_progress) {
+ /* it means that domain is not changed */
+ if (!cpus_intersects(desc->affinity, mask))
+ cfg->move_desc_in_progress_in_same_domain = 1;
+ }
+}
+#endif
+
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
{
@@ -231,9 +350,11 @@ static struct irq_cfg *irq_cfg(unsigned
#endif
+#ifndef CONFIG_MOVE_IRQ_DESC
static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask)
{
}
+#endif
struct io_apic {
unsigned int index;
@@ -2346,14 +2467,34 @@ static void irq_complete_move(struct irq
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress))
+ if (likely(!cfg->move_in_progress)) {
+#ifdef CONFIG_MOVE_IRQ_DESC
+ if (likely(!cfg->move_desc_in_progress_in_same_domain))
+ return;
+
+ /* domain is not change, but affinity is changed */
+ me = smp_processor_id();
+ if (cpu_isset(me, desc->affinity)) {
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+ cfg->move_desc_in_progress_in_same_domain = 0;
+ }
+#endif
return;
+ }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
cpumask_t cleanup_mask;
+#ifdef CONFIG_MOVE_IRQ_DESC
+ *descp = desc = move_irq_desc(desc, me);
+ /* get the new one */
+ cfg = desc->chip_data;
+#endif
+
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
cfg->move_cleanup_count = cpus_weight(cleanup_mask);
send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
Index: linux-2.6/kernel/irq/handle.c
===================================================================
--- linux-2.6.orig/kernel/irq/handle.c
+++ linux-2.6/kernel/irq/handle.c
@@ -90,6 +90,32 @@ static void init_kstat_irqs(struct irq_d
desc->kstat_irqs = (unsigned int *)ptr;
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc,
+ int cpu, int nr)
+{
+ unsigned long bytes;
+
+ init_kstat_irqs(desc, cpu, nr);
+
+ if (desc->kstat_irqs != old_desc->kstat_irqs) {
+ /* Compute how many bytes we need per irq and allocate them */
+ bytes = nr * sizeof(unsigned int);
+
+ memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
+ }
+}
+
+static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ if (old_desc->kstat_irqs == desc->kstat_irqs)
+ return;
+
+ kfree(old_desc->kstat_irqs);
+ old_desc->kstat_irqs = NULL;
+}
+#endif
+
void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
{
}
@@ -110,6 +136,23 @@ static void init_one_irq_desc(int irq, s
arch_init_chip_data(desc, cpu);
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+ struct irq_desc *desc, int cpu)
+{
+ memcpy(desc, old_desc, sizeof(struct irq_desc));
+ desc->cpu = cpu;
+ lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+ arch_init_copy_chip_data(old_desc, desc, cpu);
+}
+
+static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
+{
+ free_kstat_irqs(old_desc, desc);
+ arch_free_chip_data(old_desc, desc);
+}
+#endif
/*
* Protect the sparse_irqs:
*/
@@ -203,6 +246,73 @@ out_unlock:
return desc;
}
+#ifdef CONFIG_MOVE_IRQ_DESC
+static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
+ int cpu)
+{
+ struct irq_desc *desc;
+ unsigned int irq;
+ unsigned long flags;
+ int node;
+
+ irq = old_desc->irq;
+
+ spin_lock_irqsave(&sparse_irq_lock, flags);
+
+ /* We have to check it to avoid races with another CPU */
+ desc = irq_desc_ptrs[irq];
+
+ if (desc && old_desc != desc)
+ goto out_unlock;
+
+ node = cpu_to_node(cpu);
+ desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+ printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
+ irq, cpu, node);
+ if (!desc) {
+ printk(KERN_ERR "can not get new irq_desc for moving\n");
+ /* still use old one */
+ desc = old_desc;
+ goto out_unlock;
+ }
+ init_copy_one_irq_desc(irq, old_desc, desc, cpu);
+
+ irq_desc_ptrs[irq] = desc;
+
+ /* free the old one */
+ free_one_irq_desc(old_desc, desc);
+ kfree(old_desc);
+
+out_unlock:
+ spin_unlock_irqrestore(&sparse_irq_lock, flags);
+
+ return desc;
+}
+
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+{
+ int old_cpu;
+ int node, old_node;
+
+ /* those all static, do move them */
+ if (desc->irq < NR_IRQS_LEGACY)
+ return desc;
+
+ old_cpu = desc->cpu;
+ printk(KERN_DEBUG "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
+ if (old_cpu != cpu) {
+ node = cpu_to_node(cpu);
+ old_node = cpu_to_node(old_cpu);
+ if (old_node != node)
+ desc = __real_move_irq_desc(desc, cpu);
+ else
+ desc->cpu = cpu;
+ }
+
+ return desc;
+}
+#endif
+
#else
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
@@ -337,8 +447,13 @@ unsigned int __do_IRQ(unsigned int irq)
/*
* No locking required for CPU-local interrupts:
*/
- if (desc->chip->ack)
+ if (desc->chip->ack) {
desc->chip->ack(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
if (likely(!(desc->status & IRQ_DISABLED))) {
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
@@ -349,8 +464,13 @@ unsigned int __do_IRQ(unsigned int irq)
}
spin_lock(&desc->lock);
- if (desc->chip->ack)
+ if (desc->chip->ack) {
desc->chip->ack(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
/*
* REPLAY is when Linux resends an IRQ that was dropped earlier
* WAITING is used by probe to mark irqs that are being tested
Index: linux-2.6/kernel/irq/chip.c
===================================================================
--- linux-2.6.orig/kernel/irq/chip.c
+++ linux-2.6/kernel/irq/chip.c
@@ -354,6 +354,10 @@ handle_level_irq(unsigned int irq, struc
spin_lock(&desc->lock);
mask_ack_irq(desc, irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
if (unlikely(desc->status & IRQ_INPROGRESS))
goto out_unlock;
@@ -431,6 +435,10 @@ handle_fasteoi_irq(unsigned int irq, str
desc->status &= ~IRQ_INPROGRESS;
out:
desc->chip->eoi(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
spin_unlock(&desc->lock);
}
@@ -467,12 +475,20 @@ handle_edge_irq(unsigned int irq, struct
!desc->action)) {
desc->status |= (IRQ_PENDING | IRQ_MASKED);
mask_ack_irq(desc, irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
goto out_unlock;
}
kstat_incr_irqs_this_cpu(irq, desc);
/* Start handling the irq */
desc->chip->ack(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
/* Mark the IRQ currently in progress.*/
desc->status |= IRQ_INPROGRESS;
@@ -533,8 +549,13 @@ handle_percpu_irq(unsigned int irq, stru
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- if (desc->chip->eoi)
+ if (desc->chip->eoi) {
desc->chip->eoi(irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
}
void
@@ -569,8 +590,13 @@ __set_irq_handler(unsigned int irq, irq_
/* Uninstall? */
if (handle == handle_bad_irq) {
- if (desc->chip != &no_irq_chip)
+ if (desc->chip != &no_irq_chip) {
mask_ack_irq(desc, irq);
+#ifdef CONFIG_MOVE_IRQ_DESC
+ /* get new one */
+ desc = irq_to_desc(irq);
+#endif
+ }
desc->status |= IRQ_DISABLED;
desc->depth = 1;
}
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 2008-12-06 3:00 [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 Yinghai Lu @ 2008-12-08 13:42 ` Ingo Molnar 2008-12-08 19:18 ` Yinghai Lu 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu 0 siblings, 2 replies; 6+ messages in thread From: Ingo Molnar @ 2008-12-08 13:42 UTC (permalink / raw) To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel * Yinghai Lu <yinghai@kernel.org> wrote: > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > > spin_unlock(&desc->lock); > } > @@ -467,12 +475,20 @@ handle_edge_irq(unsigned int irq, struct > !desc->action)) { > desc->status |= (IRQ_PENDING | IRQ_MASKED); > mask_ack_irq(desc, irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > goto out_unlock; > } > kstat_incr_irqs_this_cpu(irq, desc); > > /* Start handling the irq */ > desc->chip->ack(irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > > /* Mark the IRQ currently in progress.*/ > desc->status |= IRQ_INPROGRESS; > @@ -533,8 +549,13 @@ handle_percpu_irq(unsigned int irq, stru > if (!noirqdebug) > note_interrupt(irq, desc, action_ret); > > - if (desc->chip->eoi) > + if (desc->chip->eoi) { > desc->chip->eoi(irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif > + } > } > > void > @@ -569,8 +590,13 @@ __set_irq_handler(unsigned int irq, irq_ > > /* Uninstall? */ > if (handle == handle_bad_irq) { > - if (desc->chip != &no_irq_chip) > + if (desc->chip != &no_irq_chip) { > mask_ack_irq(desc, irq); > +#ifdef CONFIG_MOVE_IRQ_DESC > + /* get new one */ > + desc = irq_to_desc(irq); > +#endif this patch adds a ton of #ifdefs to important .c files, which could all have been avoided by introducing a new method: desc = irq_remap_to_desc(irq, desc); which would do something like: static struct irq_desc * irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) { #ifdef CONFIG_MOVE_IRQ_DESC return irq_to_desc(irq); #else return desc; #endif } right? Ingo ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 2008-12-08 13:42 ` Ingo Molnar @ 2008-12-08 19:18 ` Yinghai Lu 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu 1 sibling, 0 replies; 6+ messages in thread From: Yinghai Lu @ 2008-12-08 19:18 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel On Mon, Dec 8, 2008 at 5:42 AM, Ingo Molnar <mingo@elte.hu> wrote: > >> mask_ack_irq(desc, irq); >> +#ifdef CONFIG_MOVE_IRQ_DESC >> + /* get new one */ >> + desc = irq_to_desc(irq); >> +#endif > > this patch adds a ton of #ifdefs to important .c files, which could all > have been avoided by introducing a new method: > > desc = irq_remap_to_desc(irq, desc); > > which would do something like: > > static struct irq_desc * > irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) > { > #ifdef CONFIG_MOVE_IRQ_DESC > return irq_to_desc(irq); > #else > return desc; > #endif > } > > right? yes. will work on it. YH ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] irq: move irq_desc according to smp_affinity v6 2008-12-08 13:42 ` Ingo Molnar 2008-12-08 19:18 ` Yinghai Lu @ 2008-12-08 22:07 ` Yinghai Lu 2008-12-09 3:41 ` Ingo Molnar 1 sibling, 1 reply; 6+ messages in thread From: Yinghai Lu @ 2008-12-08 22:07 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel impact: new feature move irq_desc with sparseirq if CONFIG_MOVE_IRQ_DESC is set make irq_desc to go with affinity aka irq_desc moving etc call move_irq_desc in irq_complete_move() legacy irq_desc is not moved, because they are allocated via static array v3: add calling to irq_to_desc after calling ack/eoi instead of passing desc v6: use irq_remap_to_desc to avoid some #ifdef according to Ingo for logical apic mode, need to add move_desc_in_progress_in_same_domain. otherwise it will not get moved. ==> also could need two phase to get irq_desc moved. for example: 0xff is old affinity, and need to set 0xf, and then set to 0xf0. [ or we need to change domain definition to cpus on the same node ? ] LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,000000ff LBSuse:~ # echo f > /proc/irq/22/smp_affinity LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,0000000f LBSuse:~ # tail /var/log/messages ... Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present LBSuse:~ # echo f0 > /proc/irq/22/smp_affinity LBSuse:~ # tail /var/log/messages Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present Oct 27 12:36:46 LBSuse kernel: move irq_desc for 22 aka 0x16 to cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc kstat_irqs on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_cfg on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_2_pin on cpu 7 node 1 so assume the user space program should update /proc/irq/XX/smp_affinity to 03 or 0f at first on boot or we change irq_default_affinity ? for physical apic is much simple on 4 sockets 16 cores system irq_desc is moving.. when # echo 10 > /proc/irq/134483967/smp_affinity # echo 100 > /proc/irq/134483967/smp_affinity # echo 1000 > /proc/irq/134483967/smp_affinity got Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1 Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2 Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3 Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 9 ++ arch/x86/kernel/io_apic.c | 143 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/irq.h | 10 +++ kernel/irq/chip.c | 12 +++ kernel/irq/handle.c | 119 +++++++++++++++++++++++++++++++++++++- 5 files changed, 288 insertions(+), 5 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -253,6 +253,15 @@ config SPARSE_IRQ If you don't know what to do here, say Y. +config MOVE_IRQ_DESC + bool "Move irq desc when changing irq smp_affinity" + depends on SPARSE_IRQ && SMP + default y + help + This enables moving irq_desc to cpu/node that irq will use handled. + + If you don't know what to do here, say Y. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Index: linux-2.6/arch/x86/kernel/io_apic.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/io_apic.c +++ linux-2.6/arch/x86/kernel/io_apic.c @@ -141,6 +141,9 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; +#ifdef CONFIG_MOVE_IRQ_DESC + u8 move_desc_in_progress_in_same_domain : 1; +#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -223,6 +226,122 @@ void arch_init_chip_data(struct irq_desc } } +#ifdef CONFIG_MOVE_IRQ_DESC + +static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, + int cpu) +{ + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } + + tail->next = NULL; + cfg->irq_2_pin = head; +} + +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; + + entry = old_cfg->irq_2_pin; + + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; +} + +void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; + + cfg = get_one_free_irq_cfg(cpu); + + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); +} + +static void free_irq_cfg(struct irq_cfg *old_cfg) +{ + kfree(old_cfg); +} + +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +{ + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } +} + +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_in_progress_in_same_domain = 1; + } +} +#endif + #else static struct irq_cfg *irq_cfg(unsigned int irq) { @@ -231,9 +350,11 @@ static struct irq_cfg *irq_cfg(unsigned #endif +#ifndef CONFIG_MOVE_IRQ_DESC static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } +#endif struct io_apic { unsigned int index; @@ -2346,14 +2467,34 @@ static void irq_complete_move(struct irq struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { +#ifdef CONFIG_MOVE_IRQ_DESC + if (likely(!cfg->move_desc_in_progress_in_same_domain)) + return; + + /* domain is not change, but affinity is changed */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_in_progress_in_same_domain = 0; + } +#endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; +#ifdef CONFIG_MOVE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +#endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); Index: linux-2.6/kernel/irq/handle.c =================================================================== --- linux-2.6.orig/kernel/irq/handle.c +++ linux-2.6/kernel/irq/handle.c @@ -90,6 +90,32 @@ static void init_kstat_irqs(struct irq_d desc->kstat_irqs = (unsigned int *)ptr; } +#ifdef CONFIG_MOVE_IRQ_DESC +static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, + int cpu, int nr) +{ + unsigned long bytes; + + init_kstat_irqs(desc, cpu, nr); + + if (desc->kstat_irqs != old_desc->kstat_irqs) { + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); + } +} + +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) +{ + if (old_desc->kstat_irqs == desc->kstat_irqs) + return; + + kfree(old_desc->kstat_irqs); + old_desc->kstat_irqs = NULL; +} +#endif + void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) { } @@ -110,6 +136,23 @@ static void init_one_irq_desc(int irq, s arch_init_chip_data(desc, cpu); } +#ifdef CONFIG_MOVE_IRQ_DESC +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + memcpy(desc, old_desc, sizeof(struct irq_desc)); + desc->cpu = cpu; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + arch_init_copy_chip_data(old_desc, desc, cpu); +} + +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) +{ + free_kstat_irqs(old_desc, desc); + arch_free_chip_data(old_desc, desc); +} +#endif /* * Protect the sparse_irqs: */ @@ -203,6 +246,73 @@ out_unlock: return desc; } +#ifdef CONFIG_MOVE_IRQ_DESC +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, + int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long flags; + int node; + + irq = old_desc->irq; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + + if (desc && old_desc != desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not get new irq_desc for moving\n"); + /* still use old one */ + desc = old_desc; + goto out_unlock; + } + init_copy_one_irq_desc(irq, old_desc, desc, cpu); + + irq_desc_ptrs[irq] = desc; + + /* free the old one */ + free_one_irq_desc(old_desc, desc); + kfree(old_desc); + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +{ + int old_cpu; + int node, old_node; + + /* those all static, do move them */ + if (desc->irq < NR_IRQS_LEGACY) + return desc; + + old_cpu = desc->cpu; + printk(KERN_DEBUG "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); + if (old_cpu != cpu) { + node = cpu_to_node(cpu); + old_node = cpu_to_node(old_cpu); + if (old_node != node) + desc = __real_move_irq_desc(desc, cpu); + else + desc->cpu = cpu; + } + + return desc; +} +#endif + #else struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { @@ -337,8 +447,11 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + /* get new one */ + desc = irq_remap_to_desc(irq, desc); + } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -349,8 +462,10 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); + } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested Index: linux-2.6/kernel/irq/chip.c =================================================================== --- linux-2.6.orig/kernel/irq/chip.c +++ linux-2.6/kernel/irq/chip.c @@ -354,6 +354,7 @@ handle_level_irq(unsigned int irq, struc spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -431,6 +432,7 @@ handle_fasteoi_irq(unsigned int irq, str desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -467,12 +469,14 @@ handle_edge_irq(unsigned int irq, struct !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -533,8 +537,10 @@ handle_percpu_irq(unsigned int irq, stru if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@ -569,8 +575,10 @@ __set_irq_handler(unsigned int irq, irq_ /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } Index: linux-2.6/include/linux/irq.h =================================================================== --- linux-2.6.orig/include/linux/irq.h +++ linux-2.6/include/linux/irq.h @@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(st #endif +static inline struct irq_desc * +irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_MOVE_IRQ_DESC + return irq_to_desc(irq); +#else + return desc; +#endif +} + /* * Migration helpers for obsolete names, they will go away: */ ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] irq: move irq_desc according to smp_affinity v6 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu @ 2008-12-09 3:41 ` Ingo Molnar 2008-12-11 8:15 ` [PATCH] irq: move irq_desc according to smp_affinity v7 Yinghai Lu 0 siblings, 1 reply; 6+ messages in thread From: Ingo Molnar @ 2008-12-09 3:41 UTC (permalink / raw) To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel * Yinghai Lu <yinghai@kernel.org> wrote: > for physical apic is much simple > on 4 sockets 16 cores system > irq_desc is moving.. > when > # echo 10 > /proc/irq/134483967/smp_affinity > # echo 100 > /proc/irq/134483967/smp_affinity > # echo 1000 > /proc/irq/134483967/smp_affinity > got > Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1 > Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1 > Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1 > Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2 > Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2 > Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2 > Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3 > Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3 > Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3 > > Signed-off-by: Yinghai Lu <yinghai@kernel.org> Neat feature! i'm wondering, have you tried to characterise the cost savings of moving the irq desc? It will certainly save three heavy cross-NUMA cachemisses on x86 per rare irq source. A way to attempt to measure this would be to write some quick debug hack that prints the cycle count of one specific IRQ source, in do_IRQ(), from the entry of do_IRQ() to the exit of do_IRQ(), using rdtscl(). Pick an IRQ that you can trigger arbitrarily, and printk the cycle cost at the end of do_IRQ(). [if irq == your_debug_irq - otherwise you can get a lot of printks and not too good measurements]. plus perhaps add some quick hack that makes the irq_desc/chip_data/kstat_irqs migration dependent on a sysctl, such as 'panic_timeout' (tunable via 'echo 1 > /proc/sys/kernel/panic'). Then you could try to trigger your debug IRQ and the cycle cost printk in two modes: echo 0 > /proc/sys/kernel/panic [ migrate the IRQ to another domain and trigger the IRQ - wait for the cycle printout. Both cache-cold and cache-hot numbers are interesting. ] echo 1 > /proc/sys/kernel/panic [ re-migrate the debug IRQ via /proc/irq/*/smp_affinity to make sure it's NUMA-local, then trigger the debug IRQ and record cache-cold and cache-hot cycle counts. ] it's hard to measure this reliably, as on x86 the numa factor is usually pretty low, so the local versus remote cachemiss cost is hard to separate. A few comments about the patch too: > +config MOVE_IRQ_DESC > + bool "Move irq desc when changing irq smp_affinity" > + depends on SPARSE_IRQ && SMP > + default y new feature - should be default-no. > + help > + This enables moving irq_desc to cpu/node that irq will use handled. > + > + If you don't know what to do here, say Y. Later on i think we should just select this in the NUMA case, instead of complicating the user's selection. It's OK to have it configurable now - should it cause problems. > + > config X86_FIND_SMP_CONFIG > def_bool y > depends on X86_MPPARSE || X86_VOYAGER > Index: linux-2.6/arch/x86/kernel/io_apic.c > =================================================================== > --- linux-2.6.orig/arch/x86/kernel/io_apic.c > +++ linux-2.6/arch/x86/kernel/io_apic.c > @@ -141,6 +141,9 @@ struct irq_cfg { > unsigned move_cleanup_count; > u8 vector; > u8 move_in_progress : 1; > +#ifdef CONFIG_MOVE_IRQ_DESC > + u8 move_desc_in_progress_in_same_domain : 1; > +#endif way too long field name - please rename to move_desc_pending or so. > @@ -223,6 +226,122 @@ void arch_init_chip_data(struct irq_desc > } > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > + > +static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, > + int cpu) > +{ small style nit, it's a tiny bit tidier to break the line the following way: static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) [ as this way we have all the parameters on a single line, and the return type stands out on a separate line. ] > + struct irq_pin_list *old_entry, *head, *tail, *entry; > + > + cfg->irq_2_pin = NULL; > + old_entry = old_cfg->irq_2_pin; > + if (!old_entry) > + return; > + > + entry = get_one_free_irq_2_pin(cpu); > + if (!entry) > + return; > + > + entry->apic = old_entry->apic; > + entry->pin = old_entry->pin; > + head = entry; > + tail = entry; > + old_entry = old_entry->next; for mass-initialization please try to structure it a bit: > + entry->apic = old_entry->apic; > + entry->pin = old_entry->pin; > + head = entry; > + tail = entry; > + > + old_entry = old_entry->next; it's much easier to validate such constructs. For example, once vertically aligned, i immediately saw an oddity in it - why is 'old_entry' initialized twice? > + > + while (old_entry) { > + entry = get_one_free_irq_2_pin(cpu); > + if (!entry) { > + entry = head; > + while (entry) { > + head = entry->next; > + kfree(entry); > + entry = head; > + } > + /* still use the old one */ > + return; > + } same here: > + entry->apic = old_entry->apic; > + entry->pin = old_entry->pin; > + tail->next = entry; > + tail = entry; > + old_entry = old_entry->next; > + } > + > + tail->next = NULL; > + cfg->irq_2_pin = head; > +} > + > +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) > +{ > + struct irq_pin_list *entry, *next; > + > + if (old_cfg->irq_2_pin == cfg->irq_2_pin) > + return; > + > + entry = old_cfg->irq_2_pin; > + > + while (entry) { > + next = entry->next; > + kfree(entry); > + entry = next; > + } > + old_cfg->irq_2_pin = NULL; > +} > + > +void arch_init_copy_chip_data(struct irq_desc *old_desc, > + struct irq_desc *desc, int cpu) > +{ > + struct irq_cfg *cfg; > + struct irq_cfg *old_cfg; > + > + cfg = get_one_free_irq_cfg(cpu); > + > + if (!cfg) > + return; > + > + desc->chip_data = cfg; > + > + old_cfg = old_desc->chip_data; > + > + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); > + > + init_copy_irq_2_pin(old_cfg, cfg, cpu); > +} > + > +static void free_irq_cfg(struct irq_cfg *old_cfg) > +{ > + kfree(old_cfg); > +} > + > +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) > +{ > + struct irq_cfg *old_cfg, *cfg; > + > + old_cfg = old_desc->chip_data; > + cfg = desc->chip_data; > + > + if (old_cfg == cfg) > + return; > + > + if (old_cfg) { > + free_irq_2_pin(old_cfg, cfg); > + free_irq_cfg(old_cfg); > + old_desc->chip_data = NULL; > + } > +} > + > +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) > +{ > + struct irq_cfg *cfg = desc->chip_data; > + > + if (!cfg->move_in_progress) { > + /* it means that domain is not changed */ > + if (!cpus_intersects(desc->affinity, mask)) > + cfg->move_desc_in_progress_in_same_domain = 1; > + } > +} > +#endif > + > #else > static struct irq_cfg *irq_cfg(unsigned int irq) > { > @@ -231,9 +350,11 @@ static struct irq_cfg *irq_cfg(unsigned > > #endif > > +#ifndef CONFIG_MOVE_IRQ_DESC > static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) > { > } > +#endif > > struct io_apic { > unsigned int index; > @@ -2346,14 +2467,34 @@ static void irq_complete_move(struct irq > struct irq_cfg *cfg = desc->chip_data; > unsigned vector, me; > > - if (likely(!cfg->move_in_progress)) > + if (likely(!cfg->move_in_progress)) { > +#ifdef CONFIG_MOVE_IRQ_DESC > + if (likely(!cfg->move_desc_in_progress_in_same_domain)) > + return; > + > + /* domain is not change, but affinity is changed */ > + me = smp_processor_id(); > + if (cpu_isset(me, desc->affinity)) { > + *descp = desc = move_irq_desc(desc, me); > + /* get the new one */ > + cfg = desc->chip_data; > + cfg->move_desc_in_progress_in_same_domain = 0; > + } > +#endif > return; > + } > > vector = ~get_irq_regs()->orig_ax; > me = smp_processor_id(); > if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { > cpumask_t cleanup_mask; > > +#ifdef CONFIG_MOVE_IRQ_DESC > + *descp = desc = move_irq_desc(desc, me); > + /* get the new one */ > + cfg = desc->chip_data; > +#endif > + > cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); > cfg->move_cleanup_count = cpus_weight(cleanup_mask); > send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); > Index: linux-2.6/kernel/irq/handle.c > =================================================================== > --- linux-2.6.orig/kernel/irq/handle.c > +++ linux-2.6/kernel/irq/handle.c > @@ -90,6 +90,32 @@ static void init_kstat_irqs(struct irq_d > desc->kstat_irqs = (unsigned int *)ptr; > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > +static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, > + int cpu, int nr) > +{ > + unsigned long bytes; > + > + init_kstat_irqs(desc, cpu, nr); > + > + if (desc->kstat_irqs != old_desc->kstat_irqs) { > + /* Compute how many bytes we need per irq and allocate them */ > + bytes = nr * sizeof(unsigned int); > + > + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); > + } > +} > + > +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) > +{ > + if (old_desc->kstat_irqs == desc->kstat_irqs) > + return; > + > + kfree(old_desc->kstat_irqs); > + old_desc->kstat_irqs = NULL; > +} > +#endif > + > void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) > { > } > @@ -110,6 +136,23 @@ static void init_one_irq_desc(int irq, s > arch_init_chip_data(desc, cpu); > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, > + struct irq_desc *desc, int cpu) > +{ > + memcpy(desc, old_desc, sizeof(struct irq_desc)); > + desc->cpu = cpu; > + lockdep_set_class(&desc->lock, &irq_desc_lock_class); > + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); > + arch_init_copy_chip_data(old_desc, desc, cpu); > +} > + > +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) > +{ > + free_kstat_irqs(old_desc, desc); > + arch_free_chip_data(old_desc, desc); > +} > +#endif > /* > * Protect the sparse_irqs: > */ > @@ -203,6 +246,73 @@ out_unlock: > return desc; > } > > +#ifdef CONFIG_MOVE_IRQ_DESC > +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, > + int cpu) > +{ > + struct irq_desc *desc; > + unsigned int irq; > + unsigned long flags; > + int node; > + > + irq = old_desc->irq; > + > + spin_lock_irqsave(&sparse_irq_lock, flags); > + > + /* We have to check it to avoid races with another CPU */ > + desc = irq_desc_ptrs[irq]; > + > + if (desc && old_desc != desc) > + goto out_unlock; > + > + node = cpu_to_node(cpu); > + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); > + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", > + irq, cpu, node); > + if (!desc) { > + printk(KERN_ERR "can not get new irq_desc for moving\n"); > + /* still use old one */ > + desc = old_desc; > + goto out_unlock; > + } > + init_copy_one_irq_desc(irq, old_desc, desc, cpu); > + > + irq_desc_ptrs[irq] = desc; > + > + /* free the old one */ > + free_one_irq_desc(old_desc, desc); > + kfree(old_desc); > + > +out_unlock: > + spin_unlock_irqrestore(&sparse_irq_lock, flags); > + > + return desc; > +} > + > +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) > +{ > + int old_cpu; > + int node, old_node; > + > + /* those all static, do move them */ > + if (desc->irq < NR_IRQS_LEGACY) > + return desc; > + > + old_cpu = desc->cpu; > + printk(KERN_DEBUG "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); > + if (old_cpu != cpu) { > + node = cpu_to_node(cpu); > + old_node = cpu_to_node(old_cpu); > + if (old_node != node) > + desc = __real_move_irq_desc(desc, cpu); > + else > + desc->cpu = cpu; > + } > + > + return desc; > +} > +#endif Still a bit too much of #ifdeffery for my taste in kernel/irq/*.c, we tend to have higher maintenance costs in files that have a lot of #ifdefs. Wouldnt it look neater if you introduced a new kernel/irq/numa_migrate.c function that would provide these methods, with the prototypes being #ifdef-ed to inlines in the !CONFIG_MOVE_IRQ_DESC case in kernel/irq/internals.h? i'd also suggest to rename the config option to the more descriptive: CONFIG_NUMA_MIGRATE_IRQ_DESC name. > /* > * No locking required for CPU-local interrupts: > */ > - if (desc->chip->ack) > + if (desc->chip->ack) { > desc->chip->ack(irq); > + /* get new one */ > + desc = irq_remap_to_desc(irq, desc); > + } thanks for fixing this - it looks much nicer now! Ingo ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] irq: move irq_desc according to smp_affinity v7 2008-12-09 3:41 ` Ingo Molnar @ 2008-12-11 8:15 ` Yinghai Lu 0 siblings, 0 replies; 6+ messages in thread From: Yinghai Lu @ 2008-12-11 8:15 UTC (permalink / raw) To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Andrew Morton, linux-kernel impact: new feature move irq_desc with sparseirq if CONFIG_NUMA_MIGRATE_IRQ_DESC is set make irq_desc to go with affinity aka irq_desc moving etc call move_irq_desc in irq_complete_move() legacy irq_desc is not moved, because they are allocated via static array v3: add calling to irq_to_desc after calling ack/eoi instead of passing desc v6: use irq_remap_to_desc to avoid some #ifdef according to Ingo for logical apic mode, need to add move_desc_in_progress_in_same_domain. otherwise it will not get moved. ==> also could need two phase to get irq_desc moved. for example: 0xff is old affinity, and need to set 0xf, and then set to 0xf0. [ or we need to change domain definition to cpus on the same node ? ] LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,000000ff LBSuse:~ # echo f > /proc/irq/22/smp_affinity LBSuse:~ # cat /proc/irq/22/smp_affinity 00000000,00000000,00000000,0000000f LBSuse:~ # tail /var/log/messages ... Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present LBSuse:~ # echo f0 > /proc/irq/22/smp_affinity LBSuse:~ # tail /var/log/messages Oct 27 12:35:34 LBSuse kernel: klogd 1.4.1, log source = /proc/kmsg started. Oct 27 12:35:34 LBSuse kernel: eth0: no IPv6 routers present Oct 27 12:36:46 LBSuse kernel: move irq_desc for 22 aka 0x16 to cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc kstat_irqs on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_cfg on cpu 7 node 1 Oct 27 12:36:46 LBSuse kernel: alloc irq_2_pin on cpu 7 node 1 so assume the user space program should update /proc/irq/XX/smp_affinity to 03 or 0f at first on boot or we change irq_default_affinity ? for physical apic is much simple on 4 sockets 16 cores system irq_desc is moving.. when # echo 10 > /proc/irq/134483967/smp_affinity # echo 100 > /proc/irq/134483967/smp_affinity # echo 1000 > /proc/irq/134483967/smp_affinity got Nov 9 21:39:51 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc kstat_irqs on cpu 4 node 1 Nov 9 21:39:51 LBSuse kernel: alloc irq_cfg on cpu 4 node 1 Nov 9 21:40:05 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc kstat_irqs on cpu 8 node 2 Nov 9 21:40:05 LBSuse kernel: alloc irq_cfg on cpu 8 node 2 Nov 9 21:40:18 LBSuse kernel: move irq_desc for 134483967 aka 0x8040fff to cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc kstat_irqs on cpu 12 node 3 Nov 9 21:40:18 LBSuse kernel: alloc irq_cfg on cpu 12 node 3 Signed-off-by: Yinghai Lu <yinghai@kernel.org> --- arch/x86/Kconfig | 9 ++ arch/x86/kernel/io_apic.c | 142 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/irq.h | 10 +++ kernel/irq/Makefile | 1 kernel/irq/chip.c | 12 +++ kernel/irq/handle.c | 15 +++- kernel/irq/internals.h | 5 + kernel/irq/numa_migrate.c | 125 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 311 insertions(+), 8 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -253,6 +253,15 @@ config SPARSE_IRQ If you don't know what to do here, say Y. +config NUMA_MIGRATE_IRQ_DESC + bool "Move irq desc when changing irq smp_affinity" + depends on SPARSE_IRQ && SMP + default n + help + This enables moving irq_desc to cpu/node that irq will use handled. + + If you don't know what to do here, say N. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER Index: linux-2.6/arch/x86/kernel/io_apic.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/io_apic.c +++ linux-2.6/arch/x86/kernel/io_apic.c @@ -141,6 +141,9 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + u8 move_desc_pending : 1; +#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc } } +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + +static void +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +{ + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } + + tail->next = NULL; + cfg->irq_2_pin = head; +} + +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; + + entry = old_cfg->irq_2_pin; + + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; +} + +void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; + + cfg = get_one_free_irq_cfg(cpu); + + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); +} + +static void free_irq_cfg(struct irq_cfg *old_cfg) +{ + kfree(old_cfg); +} + +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +{ + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } +} + +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_pending = 1; + } +} +#endif + #else static struct irq_cfg *irq_cfg(unsigned int irq) { @@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned #endif +#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } +#endif struct io_apic { unsigned int index; @@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + if (likely(!cfg->move_desc_pending)) + return; + + /* domain is not change, but affinity is changed */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_pending = 0; + } +#endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +#endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); Index: linux-2.6/kernel/irq/handle.c =================================================================== --- linux-2.6.orig/kernel/irq/handle.c +++ linux-2.6/kernel/irq/handle.c @@ -23,7 +23,7 @@ /* * lockdep: we want to handle all irq_desc locks as a single lock-class: */ -static struct lock_class_key irq_desc_lock_class; +struct lock_class_key irq_desc_lock_class; /** * handle_bad_irq - handle spurious and unhandled irqs @@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = { #endif }; -static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) { unsigned long bytes; char *ptr; @@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, s /* * Protect the sparse_irqs: */ -static DEFINE_SPINLOCK(sparse_irq_lock); +DEFINE_SPINLOCK(sparse_irq_lock); struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; @@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + /* get new one */ + desc = irq_remap_to_desc(irq, desc); + } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); + } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested Index: linux-2.6/kernel/irq/chip.c =================================================================== --- linux-2.6.orig/kernel/irq/chip.c +++ linux-2.6/kernel/irq/chip.c @@ -354,6 +354,7 @@ handle_level_irq(unsigned int irq, struc spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -431,6 +432,7 @@ handle_fasteoi_irq(unsigned int irq, str desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -467,12 +469,14 @@ handle_edge_irq(unsigned int irq, struct !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -533,8 +537,10 @@ handle_percpu_irq(unsigned int irq, stru if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@ -569,8 +575,10 @@ __set_irq_handler(unsigned int irq, irq_ /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } Index: linux-2.6/include/linux/irq.h =================================================================== --- linux-2.6.orig/include/linux/irq.h +++ linux-2.6/include/linux/irq.h @@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(st #endif +static inline struct irq_desc * +irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + return irq_to_desc(irq); +#else + return desc; +#endif +} + /* * Migration helpers for obsolete names, they will go away: */ Index: linux-2.6/kernel/irq/numa_migrate.c =================================================================== --- /dev/null +++ linux-2.6/kernel/irq/numa_migrate.c @@ -0,0 +1,127 @@ +/* + * linux/kernel/irq/handle.c + * + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the core interrupt handling code. + * + * Detailed information is available in Documentation/DocBook/genericirq + * + */ + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> + +#include "internals.h" + +static void init_copy_kstat_irqs(struct irq_desc *old_desc, + struct irq_desc *desc, + int cpu, int nr) +{ + unsigned long bytes; + + init_kstat_irqs(desc, cpu, nr); + + if (desc->kstat_irqs != old_desc->kstat_irqs) { + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); + } +} + +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) +{ + if (old_desc->kstat_irqs == desc->kstat_irqs) + return; + + kfree(old_desc->kstat_irqs); + old_desc->kstat_irqs = NULL; +} + +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + memcpy(desc, old_desc, sizeof(struct irq_desc)); + desc->cpu = cpu; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + arch_init_copy_chip_data(old_desc, desc, cpu); +} + +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) +{ + free_kstat_irqs(old_desc, desc); + arch_free_chip_data(old_desc, desc); +} + +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, + int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long flags; + int node; + + irq = old_desc->irq; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + + if (desc && old_desc != desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not get new irq_desc for moving\n"); + /* still use old one */ + desc = old_desc; + goto out_unlock; + } + init_copy_one_irq_desc(irq, old_desc, desc, cpu); + + irq_desc_ptrs[irq] = desc; + + /* free the old one */ + free_one_irq_desc(old_desc, desc); + kfree(old_desc); + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +{ + int old_cpu; + int node, old_node; + + /* those all static, do move them */ + if (desc->irq < NR_IRQS_LEGACY) + return desc; + + old_cpu = desc->cpu; + printk(KERN_DEBUG + "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); + if (old_cpu != cpu) { + node = cpu_to_node(cpu); + old_node = cpu_to_node(old_cpu); + if (old_node != node) + desc = __real_move_irq_desc(desc, cpu); + else + desc->cpu = cpu; + } + + return desc; +} + Index: linux-2.6/kernel/irq/Makefile =================================================================== --- linux-2.6.orig/kernel/irq/Makefile +++ linux-2.6/kernel/irq/Makefile @@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o re obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o +obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o Index: linux-2.6/kernel/irq/internals.h =================================================================== --- linux-2.6.orig/kernel/irq/internals.h +++ linux-2.6/kernel/irq/internals.h @@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern struct lock_class_key irq_desc_lock_class; +extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern spinlock_t sparse_irq_lock; +extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; + #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void register_handler_proc(unsigned int irq, struct irqaction *action); ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-12-11 8:16 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-12-06 3:00 [PATCH 5/5] irq: move irq_desc according to smp_affinity v5 Yinghai Lu 2008-12-08 13:42 ` Ingo Molnar 2008-12-08 19:18 ` Yinghai Lu 2008-12-08 22:07 ` [PATCH] irq: move irq_desc according to smp_affinity v6 Yinghai Lu 2008-12-09 3:41 ` Ingo Molnar 2008-12-11 8:15 ` [PATCH] irq: move irq_desc according to smp_affinity v7 Yinghai Lu
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.