[PATCH] x86: SGU UV Fix irq affinity for hub based interrupts

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts
@ 2009-09-30 16:02 Dimitri Sivanich
  2009-09-30 16:10 ` Robin Holt
                   ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Dimitri Sivanich @ 2009-09-30 16:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel

This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
NODE affinity can be routed to cpus other than their originally assigned
cpu.  Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

More specific handling for NODE affinity will be added once the patch for
limiting irq affinity is in place.

 arch/x86/include/asm/uv/uv_irq.h |   15 +++-
 arch/x86/kernel/apic/io_apic.c   |   49 ++++++++++++-
 arch/x86/kernel/uv_irq.c         |  128 ++++++++++++++++++++++++++++++++---
 drivers/misc/sgi-xp/xpc_uv.c     |    5 -
 4 files changed, 180 insertions(+), 17 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-09-29 21:23:53.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c	2009-09-30 08:01:10.000000000 -0500
@@ -3748,9 +3748,10 @@ int arch_setup_ht_irq(unsigned int irq, 
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset)
+		       unsigned long mmr_offset, int restrict)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3766,6 +3767,11 @@ int arch_enable_uv_irq(char *irq_name, u
 	if (err != 0)
 		return err;
 
+	if (restrict == UV_AFFINITY_CPU)
+		desc->status |= IRQ_NO_BALANCING;
+	else
+		desc->status |= IRQ_MOVE_PCNTXT;
+
 	spin_lock_irqsave(&vector_lock, flags);
 	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
 				      irq_name);
@@ -3794,11 +3800,10 @@ int arch_enable_uv_irq(char *irq_name, u
  * Disable the specified MMR located on the specified blade so that MSIs are
  * longer allowed to be sent.
  */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
 {
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
-	int mmr_pnode;
 
 	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
 
@@ -3806,9 +3811,45 @@ void arch_disable_uv_irq(int mmr_blade, 
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
 	entry->mask = 1;
 
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 }
+
+int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+	unsigned int dest;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long mmr_offset;
+	unsigned mmr_pnode;
+
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
+		return -1;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode = apic->irq_dest_mode;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = dest;
+
+	/* Get previously stored MMR and pnode of hub sourcing interrupts */
+	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+		return -1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return 0;
+}
 #endif /* CONFIG_X86_64 */
 
 int __init io_apic_get_redir_entries (int ioapic)
Index: linux/drivers/misc/sgi-xp/xpc_uv.c
===================================================================
--- linux.orig/drivers/misc/sgi-xp/xpc_uv.c	2009-09-29 21:23:53.000000000 -0500
+++ linux/drivers/misc/sgi-xp/xpc_uv.c	2009-09-29 21:23:55.000000000 -0500
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_
 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
 
 #if defined CONFIG_X86_64
-	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+			UV_AFFINITY_CPU);
 	if (mq->irq < 0) {
 		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
 			-mq->irq);
@@ -136,7 +137,7 @@ static void
 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
 {
 #if defined CONFIG_X86_64
-	uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+	uv_teardown_irq(mq->irq);
 
 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
 	int mmr_pnode;
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h	2009-09-29 21:23:53.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h	2009-09-29 21:23:55.000000000 -0500
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
 		dest		: 32;
 };
 
+enum {
+	UV_AFFINITY_ALL,
+	UV_AFFINITY_NODE,
+	UV_AFFINITY_CPU
+};
+
 extern struct irq_chip uv_irq_chip;
 
-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
 extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);
 
 #endif /* _ASM_X86_UV_UV_IRQ_H */
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c	2009-09-29 21:23:53.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c	2009-09-30 07:56:44.000000000 -0500
@@ -9,10 +9,22 @@
  */
 
 #include <linux/module.h>
+#include <linux/rbtree.h>
 #include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+	struct rb_node list;
+	unsigned long offset;
+	int pnode;
+	int irq;
+};
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;
 
 static void uv_noop(unsigned int irq)
 {
@@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = {
 	.unmask		= uv_noop,
 	.eoi		= uv_ack_apic,
 	.end		= uv_noop,
+	.set_affinity	= uv_set_irq_affinity,
 };
 
 /*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+	struct rb_node **link = &uv_irq_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct uv_irq_2_mmr_pnode *n;
+	struct uv_irq_2_mmr_pnode *e;
+	unsigned long irqflags;
+
+	n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+				uv_blade_to_memory_nid(blade));
+	if (!n)
+		return -ENOMEM;
+
+	n->irq = irq;
+	n->offset = offset;
+	n->pnode = uv_blade_to_pnode(blade);
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	/* Find the right place in the rbtree: */
+	while (*link) {
+		parent = *link;
+		e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+		if (unlikely(irq == e->irq)) {
+			/* irq entry exists */
+			e->pnode = uv_blade_to_pnode(blade);
+			e->offset = offset;
+			spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+			kfree(n);
+			return 0;
+		}
+
+		if (irq < e->irq)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Insert the node into the rbtree. */
+	rb_link_node(&n->list, parent, link);
+	rb_insert_color(&n->list, &uv_irq_root);
+
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+	return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+	struct uv_irq_2_mmr_pnode *e;
+	struct rb_node *n;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	n = uv_irq_root.rb_node;
+	while (n) {
+		e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+		if (e->irq == irq) {
+			*offset = e->offset;
+			*pnode = e->pnode;
+			spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+			return 0;
+		}
+
+		if (irq < e->irq)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+	return -1;
+}
+
+/*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.
  */
 int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
-		 unsigned long mmr_offset)
+		 unsigned long mmr_offset, int restrict)
 {
-	int irq;
-	int ret;
+	int irq, ret;
+
+	irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
 
-	irq = create_irq();
 	if (irq <= 0)
 		return -EBUSY;
 
-	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
-	if (ret != irq)
+	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+		restrict);
+	if (ret == irq)
+		uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+	else
 		destroy_irq(irq);
 
 	return ret;
@@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
  *
  * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
  */
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
 {
-	arch_disable_uv_irq(mmr_blade, mmr_offset);
+	struct uv_irq_2_mmr_pnode *e;
+	struct rb_node *n;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	n = uv_irq_root.rb_node;
+	while (n) {
+		e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+		if (e->irq == irq) {
+			arch_disable_uv_irq(e->pnode, e->offset);
+			rb_erase(n, &uv_irq_root);
+			kfree(e);
+			break;
+		}
+		if (irq < e->irq)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
 	destroy_irq(irq);
 }
 EXPORT_SYMBOL_GPL(uv_teardown_irq);

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts
  2009-09-30 16:02 [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts Dimitri Sivanich
@ 2009-09-30 16:10 ` Robin Holt
  2009-10-12 19:34 ` Ingo Molnar
  2009-10-14  8:17 ` [tip:x86/apic] x86: SGI UV: Fix irq affinity for hub based interrupts tip-bot for Dimitri Sivanich
  2 siblings, 0 replies; 22+ messages in thread
From: Robin Holt @ 2009-09-30 16:10 UTC (permalink / raw)
  To: Dimitri Sivanich; +Cc: Ingo Molnar, linux-kernel

On Wed, Sep 30, 2009 at 11:02:59AM -0500, Dimitri Sivanich wrote:
> This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
> NODE affinity can be routed to cpus other than their originally assigned
> cpu.  Those with CPU affinity cannot be rerouted.
> 
> Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

Acked-by: Robin Holt <holt@sgi.com>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts
  2009-09-30 16:02 [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts Dimitri Sivanich
  2009-09-30 16:10 ` Robin Holt
@ 2009-10-12 19:34 ` Ingo Molnar
  2009-10-13 20:32   ` [PATCH] x86: Move SGI UV functionality out of generic IO-APIC code Dimitri Sivanich
       [not found]   ` <20091012193704.GA8708@sgi.com>
  2009-10-14  8:17 ` [tip:x86/apic] x86: SGI UV: Fix irq affinity for hub based interrupts tip-bot for Dimitri Sivanich
  2 siblings, 2 replies; 22+ messages in thread
From: Ingo Molnar @ 2009-10-12 19:34 UTC (permalink / raw)
  To: Dimitri Sivanich, Thomas Gleixner, H. Peter Anvin; +Cc: linux-kernel


* Dimitri Sivanich <sivanich@sgi.com> wrote:

> This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
> NODE affinity can be routed to cpus other than their originally assigned
> cpu.  Those with CPU affinity cannot be rerouted.
> 
> Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
> 
> ---
> 
> More specific handling for NODE affinity will be added once the patch for
> limiting irq affinity is in place.
> 
>  arch/x86/include/asm/uv/uv_irq.h |   15 +++-
>  arch/x86/kernel/apic/io_apic.c   |   49 ++++++++++++-

Could we please move all these UV specific functions out of the generic 
IO-APIC code? You can do it as a followup patch to this one, but i'd 
like to see the end result first before applying these - the interfacing 
is rather messy right now.

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] x86: Move SGI UV functionality out of generic IO-APIC code
  2009-10-12 19:34 ` Ingo Molnar
@ 2009-10-13 20:32   ` Dimitri Sivanich
  2009-10-14  8:18     ` [tip:x86/apic] x86, apic: " tip-bot for Dimitri Sivanich
       [not found]   ` <20091012193704.GA8708@sgi.com>
  1 sibling, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-13 20:32 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, linux-kernel

Move UV specific functionality out of the generic IO-APIC code.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

 arch/x86/include/asm/hw_irq.h    |   19 ++++
 arch/x86/include/asm/uv/uv_irq.h |    7 -
 arch/x86/kernel/apic/io_apic.c   |  140 +----------------------------------
 arch/x86/kernel/uv_irq.c         |  109 +++++++++++++++++++++++++++
 4 files changed, 133 insertions(+), 142 deletions(-)

Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h	2009-10-13 13:53:37.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h	2009-10-13 13:54:25.000000000 -0500
@@ -85,6 +85,25 @@ static inline void set_io_apic_irq_attr(
 	irq_attr->polarity   = polarity;
 }
 
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+	struct irq_pin_list *irq_2_pin;
+	cpumask_var_t domain;
+	cpumask_var_t old_domain;
+	unsigned move_cleanup_count;
+	u8 vector;
+	u8 move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+extern unsigned int set_desc_affinity(struct irq_desc *,
+					const struct cpumask *);
 extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
 					struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h	2009-10-13 13:53:47.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h	2009-10-13 13:56:11.000000000 -0500
@@ -31,13 +31,6 @@ enum {
 	UV_AFFINITY_CPU
 };
 
-extern struct irq_chip uv_irq_chip;
-
-extern int
-arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
-extern void arch_disable_uv_irq(int, unsigned long);
-extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
-
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-10-13 13:53:47.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c	2009-10-13 15:19:04.000000000 -0500
@@ -60,8 +60,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
 #include <asm/hw_irq.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>
 
 #include <asm/apic.h>
 
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free
 	return pin;
 }
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
-	struct irq_pin_list *irq_2_pin;
-	cpumask_var_t domain;
-	cpumask_var_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
 }
 
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	struct irq_cfg *cfg = NULL;
 	struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc
 /* end for move_irq_desc */
 
 #else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
@@ -1237,8 +1221,7 @@ next:
 	return err;
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -2228,7 +2211,7 @@ static int ioapic_retrigger_irq(unsigned
  */
 
 #ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
 {
 	cpumask_var_t cleanup_mask;
 
@@ -2272,15 +2255,12 @@ static void __target_IO_APIC_irq(unsigne
 	}
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
 /*
  * Either sets desc->affinity to a valid value, and returns
  * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
  * leaves desc->affinity untouched.
  */
-static unsigned int
+unsigned int
 set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
@@ -3741,116 +3721,6 @@ int arch_setup_ht_irq(unsigned int irq, 
 }
 #endif /* CONFIG_HT_IRQ */
 
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset, int restrict)
-{
-	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
-	int mmr_pnode;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long flags;
-	int err;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	cfg = irq_cfg(irq);
-
-	err = assign_irq_vector(irq, cfg, eligible_cpu);
-	if (err != 0)
-		return err;
-
-	if (restrict == UV_AFFINITY_CPU)
-		desc->status |= IRQ_NO_BALANCING;
-	else
-		desc->status |= IRQ_MOVE_PCNTXT;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-				      irq_name);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->vector		= cfg->vector;
-	entry->delivery_mode	= apic->irq_delivery_mode;
-	entry->dest_mode	= apic->irq_dest_mode;
-	entry->polarity		= 0;
-	entry->trigger		= 0;
-	entry->mask		= 0;
-	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
-
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
-{
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->mask = 1;
-
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-
-int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
-	unsigned int dest;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long mmr_offset;
-	unsigned mmr_pnode;
-
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
-		return -1;
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = dest;
-
-	/* Get previously stored MMR and pnode of hub sourcing interrupts */
-	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
-		return -1;
-
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return 0;
-}
-#endif /* CONFIG_X86_64 */
-
 int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c	2009-10-13 13:53:47.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c	2009-10-13 13:55:49.000000000 -0500
@@ -25,6 +25,7 @@ struct uv_irq_2_mmr_pnode{
 };
 static spinlock_t uv_irq_lock;
 static struct rb_root uv_irq_root;
+static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
 static void uv_noop(unsigned int irq)
 {
@@ -132,6 +133,114 @@ int uv_irq_2_mmr_info(int irq, unsigned 
 }
 
 /*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+		       unsigned long mmr_offset, int restrict)
+{
+	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg;
+	int mmr_pnode;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+			sizeof(unsigned long));
+
+	cfg = irq_cfg(irq);
+
+	err = assign_irq_vector(irq, cfg, eligible_cpu);
+	if (err != 0)
+		return err;
+
+	if (restrict == UV_AFFINITY_CPU)
+		desc->status |= IRQ_NO_BALANCING;
+	else
+		desc->status |= IRQ_MOVE_PCNTXT;
+
+	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+				      irq_name);
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+			sizeof(unsigned long));
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	entry->mask = 1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+	unsigned int dest;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long mmr_offset;
+	unsigned mmr_pnode;
+
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
+		return -1;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode = apic->irq_dest_mode;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = dest;
+
+	/* Get previously stored MMR and pnode of hub sourcing interrupts */
+	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+		return -1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return 0;
+}
+
+/*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [tip:x86/apic] x86: SGI UV: Fix irq affinity for hub based interrupts
  2009-09-30 16:02 [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts Dimitri Sivanich
  2009-09-30 16:10 ` Robin Holt
  2009-10-12 19:34 ` Ingo Molnar
@ 2009-10-14  8:17 ` tip-bot for Dimitri Sivanich
  2 siblings, 0 replies; 22+ messages in thread
From: tip-bot for Dimitri Sivanich @ 2009-10-14  8:17 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, tglx, sivanich, mingo

Commit-ID:  6c2c502910247d2820cb630e7b28fb6bdecdbf45
Gitweb:     http://git.kernel.org/tip/6c2c502910247d2820cb630e7b28fb6bdecdbf45
Author:     Dimitri Sivanich <sivanich@sgi.com>
AuthorDate: Wed, 30 Sep 2009 11:02:59 -0500
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 14 Oct 2009 09:17:01 +0200

x86: SGI UV: Fix irq affinity for hub based interrupts

This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
NODE affinity can be routed to cpus other than their originally
assigned cpu.  Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20090930160259.GA7822@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uv/uv_irq.h |   15 ++++-
 arch/x86/kernel/apic/io_apic.c   |   49 +++++++++++++-
 arch/x86/kernel/uv_irq.c         |  128 +++++++++++++++++++++++++++++++++++---
 drivers/misc/sgi-xp/xpc_uv.c     |    5 +-
 4 files changed, 180 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 9613c8c..5397e12 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
 		dest		: 32;
 };
 
+enum {
+	UV_AFFINITY_ALL,
+	UV_AFFINITY_NODE,
+	UV_AFFINITY_CPU
+};
+
 extern struct irq_chip uv_irq_chip;
 
-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
 extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);
 
 #endif /* _ASM_X86_UV_UV_IRQ_H */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8c718c9..bb52e7f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset)
+		       unsigned long mmr_offset, int restrict)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
 	int mmr_pnode;
 	unsigned long mmr_value;
@@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 	if (err != 0)
 		return err;
 
+	if (restrict == UV_AFFINITY_CPU)
+		desc->status |= IRQ_NO_BALANCING;
+	else
+		desc->status |= IRQ_MOVE_PCNTXT;
+
 	spin_lock_irqsave(&vector_lock, flags);
 	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
 				      irq_name);
@@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
  * Disable the specified MMR located on the specified blade so that MSIs are
  * longer allowed to be sent.
  */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
 {
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
-	int mmr_pnode;
 
 	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
 
@@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
 	entry->mask = 1;
 
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 }
+
+int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+	unsigned int dest;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long mmr_offset;
+	unsigned mmr_pnode;
+
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
+		return -1;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+	entry->vector = cfg->vector;
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode = apic->irq_dest_mode;
+	entry->polarity = 0;
+	entry->trigger = 0;
+	entry->mask = 0;
+	entry->dest = dest;
+
+	/* Get previously stored MMR and pnode of hub sourcing interrupts */
+	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+		return -1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return 0;
+}
 #endif /* CONFIG_X86_64 */
 
 int __init io_apic_get_redir_entries (int ioapic)
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index aeef529..9a83775 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -9,10 +9,22 @@
  */
 
 #include <linux/module.h>
+#include <linux/rbtree.h>
 #include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+	struct rb_node list;
+	unsigned long offset;
+	int pnode;
+	int irq;
+};
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;
 
 static void uv_noop(unsigned int irq)
 {
@@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = {
 	.unmask		= uv_noop,
 	.eoi		= uv_ack_apic,
 	.end		= uv_noop,
+	.set_affinity	= uv_set_irq_affinity,
 };
 
 /*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+	struct rb_node **link = &uv_irq_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct uv_irq_2_mmr_pnode *n;
+	struct uv_irq_2_mmr_pnode *e;
+	unsigned long irqflags;
+
+	n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+				uv_blade_to_memory_nid(blade));
+	if (!n)
+		return -ENOMEM;
+
+	n->irq = irq;
+	n->offset = offset;
+	n->pnode = uv_blade_to_pnode(blade);
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	/* Find the right place in the rbtree: */
+	while (*link) {
+		parent = *link;
+		e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+		if (unlikely(irq == e->irq)) {
+			/* irq entry exists */
+			e->pnode = uv_blade_to_pnode(blade);
+			e->offset = offset;
+			spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+			kfree(n);
+			return 0;
+		}
+
+		if (irq < e->irq)
+			link = &(*link)->rb_left;
+		else
+			link = &(*link)->rb_right;
+	}
+
+	/* Insert the node into the rbtree. */
+	rb_link_node(&n->list, parent, link);
+	rb_insert_color(&n->list, &uv_irq_root);
+
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+	return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+	struct uv_irq_2_mmr_pnode *e;
+	struct rb_node *n;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	n = uv_irq_root.rb_node;
+	while (n) {
+		e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+		if (e->irq == irq) {
+			*offset = e->offset;
+			*pnode = e->pnode;
+			spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+			return 0;
+		}
+
+		if (irq < e->irq)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+	return -1;
+}
+
+/*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.
  */
 int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
-		 unsigned long mmr_offset)
+		 unsigned long mmr_offset, int restrict)
 {
-	int irq;
-	int ret;
+	int irq, ret;
+
+	irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
 
-	irq = create_irq();
 	if (irq <= 0)
 		return -EBUSY;
 
-	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
-	if (ret != irq)
+	ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+		restrict);
+	if (ret == irq)
+		uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+	else
 		destroy_irq(irq);
 
 	return ret;
@@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
  *
  * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
  */
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
 {
-	arch_disable_uv_irq(mmr_blade, mmr_offset);
+	struct uv_irq_2_mmr_pnode *e;
+	struct rb_node *n;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&uv_irq_lock, irqflags);
+	n = uv_irq_root.rb_node;
+	while (n) {
+		e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+		if (e->irq == irq) {
+			arch_disable_uv_irq(e->pnode, e->offset);
+			rb_erase(n, &uv_irq_root);
+			kfree(e);
+			break;
+		}
+		if (irq < e->irq)
+			n = n->rb_left;
+		else
+			n = n->rb_right;
+	}
+	spin_unlock_irqrestore(&uv_irq_lock, irqflags);
 	destroy_irq(irq);
 }
 EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index c76677a..b5bbe59 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
 	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
 
 #if defined CONFIG_X86_64
-	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+			UV_AFFINITY_CPU);
 	if (mq->irq < 0) {
 		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
 			-mq->irq);
@@ -136,7 +137,7 @@ static void
 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
 {
 #if defined CONFIG_X86_64
-	uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+	uv_teardown_irq(mq->irq);
 
 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
 	int mmr_pnode;

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [tip:x86/apic] x86, apic: Move SGI UV functionality out of generic IO-APIC code
  2009-10-13 20:32   ` [PATCH] x86: Move SGI UV functionality out of generic IO-APIC code Dimitri Sivanich
@ 2009-10-14  8:18     ` tip-bot for Dimitri Sivanich
  0 siblings, 0 replies; 22+ messages in thread
From: tip-bot for Dimitri Sivanich @ 2009-10-14  8:18 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, tglx, sivanich, mingo

Commit-ID:  9338ad6ffb70eca97f335d93c54943828c8b209e
Gitweb:     http://git.kernel.org/tip/9338ad6ffb70eca97f335d93c54943828c8b209e
Author:     Dimitri Sivanich <sivanich@sgi.com>
AuthorDate: Tue, 13 Oct 2009 15:32:36 -0500
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 14 Oct 2009 09:17:09 +0200

x86, apic: Move SGI UV functionality out of generic IO-APIC code

Move UV specific functionality out of the generic IO-APIC code.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20091013203236.GD20543@sgi.com>
[ Cleaned up the code some more in their new places. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/hw_irq.h    |   29 ++++++--
 arch/x86/include/asm/uv/uv_irq.h |    7 --
 arch/x86/kernel/apic/io_apic.c   |  140 ++------------------------------------
 arch/x86/kernel/uv_irq.c         |  123 +++++++++++++++++++++++++++++++--
 4 files changed, 145 insertions(+), 154 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index ba180d9..56f0877 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -79,14 +79,31 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
 					int ioapic, int ioapic_pin,
 					int trigger, int polarity)
 {
-	irq_attr->ioapic     = ioapic;
-	irq_attr->ioapic_pin = ioapic_pin;
-	irq_attr->trigger    = trigger;
-	irq_attr->polarity   = polarity;
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
-					struct io_apic_irq_attr *irq_attr);
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
+struct irq_cfg {
+	struct irq_pin_list	*irq_2_pin;
+	cpumask_var_t		domain;
+	cpumask_var_t		old_domain;
+	unsigned		move_cleanup_count;
+	u8			vector;
+	u8			move_in_progress : 1;
+};
+
+extern struct irq_cfg *irq_cfg(unsigned int);
+extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void send_cleanup_vector(struct irq_cfg *);
+extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index 5397e12..d6b17c7 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,13 +31,6 @@ enum {
 	UV_AFFINITY_CPU
 };
 
-extern struct irq_chip uv_irq_chip;
-
-extern int
-arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
-extern void arch_disable_uv_irq(int, unsigned long);
-extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
-
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index bb52e7f..ce16b65 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -60,8 +60,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
 #include <asm/hw_irq.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_irq.h>
 
 #include <asm/apic.h>
 
@@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 	return pin;
 }
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
-	struct irq_pin_list *irq_2_pin;
-	cpumask_var_t domain;
-	cpumask_var_t old_domain;
-	unsigned move_cleanup_count;
-	u8 vector;
-	u8 move_in_progress : 1;
-};
-
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
 #ifdef CONFIG_SPARSE_IRQ
 static struct irq_cfg irq_cfgx[] = {
@@ -209,7 +193,7 @@ int __init arch_early_irq_init(void)
 }
 
 #ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	struct irq_cfg *cfg = NULL;
 	struct irq_desc *desc;
@@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
 /* end for move_irq_desc */
 
 #else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
 {
 	return irq < nr_irqs ? irq_cfgx + irq : NULL;
 }
@@ -1237,8 +1221,7 @@ next:
 	return err;
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
 {
 	int err;
 	unsigned long flags;
@@ -2245,7 +2228,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
 {
 	cpumask_var_t cleanup_mask;
 
@@ -2289,15 +2272,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
 	}
 }
 
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
 /*
  * Either sets desc->affinity to a valid value, and returns
  * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
  * leaves desc->affinity untouched.
  */
-static unsigned int
+unsigned int
 set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
@@ -3725,116 +3705,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 }
 #endif /* CONFIG_HT_IRQ */
 
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-		       unsigned long mmr_offset, int restrict)
-{
-	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg;
-	int mmr_pnode;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long flags;
-	int err;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	cfg = irq_cfg(irq);
-
-	err = assign_irq_vector(irq, cfg, eligible_cpu);
-	if (err != 0)
-		return err;
-
-	if (restrict == UV_AFFINITY_CPU)
-		desc->status |= IRQ_NO_BALANCING;
-	else
-		desc->status |= IRQ_MOVE_PCNTXT;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
-				      irq_name);
-	spin_unlock_irqrestore(&vector_lock, flags);
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->vector		= cfg->vector;
-	entry->delivery_mode	= apic->irq_delivery_mode;
-	entry->dest_mode	= apic->irq_dest_mode;
-	entry->polarity		= 0;
-	entry->trigger		= 0;
-	entry->mask		= 0;
-	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
-
-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
-{
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-
-	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	entry->mask = 1;
-
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-
-int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	struct irq_cfg *cfg = desc->chip_data;
-	unsigned int dest;
-	unsigned long mmr_value;
-	struct uv_IO_APIC_route_entry *entry;
-	unsigned long mmr_offset;
-	unsigned mmr_pnode;
-
-	dest = set_desc_affinity(desc, mask);
-	if (dest == BAD_APICID)
-		return -1;
-
-	mmr_value = 0;
-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = dest;
-
-	/* Get previously stored MMR and pnode of hub sourcing interrupts */
-	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
-		return -1;
-
-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
-	if (cfg->move_in_progress)
-		send_cleanup_vector(cfg);
-
-	return 0;
-}
-#endif /* CONFIG_X86_64 */
-
 int __init io_apic_get_redir_entries (int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 9a83775..61d805d 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -18,13 +18,16 @@
 
 /* MMR offset and pnode of hub sourcing interrupts for a given irq */
 struct uv_irq_2_mmr_pnode{
-	struct rb_node list;
-	unsigned long offset;
-	int pnode;
-	int irq;
+	struct rb_node		list;
+	unsigned long		offset;
+	int			pnode;
+	int			irq;
 };
-static spinlock_t uv_irq_lock;
-static struct rb_root uv_irq_root;
+
+static spinlock_t		uv_irq_lock;
+static struct rb_root		uv_irq_root;
+
+static int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
 static void uv_noop(unsigned int irq)
 {
@@ -132,6 +135,114 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
 }
 
 /*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+static int
+arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+		       unsigned long mmr_offset, int restrict)
+{
+	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg;
+	int mmr_pnode;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+			sizeof(unsigned long));
+
+	cfg = irq_cfg(irq);
+
+	err = assign_irq_vector(irq, cfg, eligible_cpu);
+	if (err != 0)
+		return err;
+
+	if (restrict == UV_AFFINITY_CPU)
+		desc->status |= IRQ_NO_BALANCING;
+	else
+		desc->status |= IRQ_MOVE_PCNTXT;
+
+	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+				      irq_name);
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
+
+	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
+{
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
+			sizeof(unsigned long));
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+	entry->mask = 1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+
+static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irq_cfg *cfg = desc->chip_data;
+	unsigned int dest;
+	unsigned long mmr_value;
+	struct uv_IO_APIC_route_entry *entry;
+	unsigned long mmr_offset;
+	unsigned mmr_pnode;
+
+	dest = set_desc_affinity(desc, mask);
+	if (dest == BAD_APICID)
+		return -1;
+
+	mmr_value = 0;
+	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= dest;
+
+	/* Get previously stored MMR and pnode of hub sourcing interrupts */
+	if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+		return -1;
+
+	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
+	return 0;
+}
+
+/*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2] x86/apic: limit irq affinity
       [not found]         ` <20091014122653.GA15048@elte.hu>
@ 2009-10-15  1:13           ` Dimitri Sivanich
  2009-10-15  5:30             ` Yinghai Lu
  0 siblings, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-15  1:13 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Yinghai Lu

This patch allows for hard restrictions to irq affinity via a new cpumask and
device node value in the irq_cfg structure.

The mask forces IRQ affinity to remain within the specified cpu domain.
On some UV systems, this domain will be limited to the nodes accessible
to the given node.  Currently other X86 systems will have all bits in
the cpumask set, so non-UV systems will remain unaffected at this time.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

Removed UV specific code from generic IO APIC code.

 arch/x86/Kconfig                   |    1 
 arch/x86/include/asm/hw_irq.h      |    3 
 arch/x86/include/asm/uv/uv_irq.h   |    1 
 arch/x86/include/asm/uv/uv_mmrs.h  |   25 +++++
 arch/x86/kernel/apic/io_apic.c     |  144 ++++++++++++++++++++++++++-------
 arch/x86/kernel/apic/x2apic_uv_x.c |    2 
 arch/x86/kernel/uv_irq.c           |   77 +++++++++++++++++
 7 files changed, 225 insertions(+), 28 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c	2009-10-14 15:04:23.000000000 -0500
@@ -168,6 +168,19 @@ void __init io_apic_disable_legacy(void)
 	nr_irqs_gsi = 0;
 }
 
+void (*set_irq_cfg_allowed)(cpumask_var_t, int) = NULL;
+/*
+ * Setup IRQ affinity restriction.
+ */
+static void set_irq_cfg_cpus_allowed(struct irq_cfg *irq_cfg)
+{
+	if (set_irq_cfg_allowed)
+		set_irq_cfg_allowed(irq_cfg->allowed, irq_cfg->node);
+	else
+		/* Default to allow anything */
+		cpumask_setall(irq_cfg->allowed);
+}
+
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
@@ -183,8 +196,11 @@ int __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
+		cfg->node = node;
 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+		zalloc_cpumask_var_node(&cfg[i].allowed, GFP_NOWAIT, node);
+		set_irq_cfg_cpus_allowed(&cfg[i]);
 		if (i < nr_legacy_irqs)
 			cpumask_setall(cfg[i].domain);
 	}
@@ -213,12 +229,19 @@ static struct irq_cfg *get_one_free_irq_
 	if (cfg) {
 		if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
 			kfree(cfg);
-			cfg = NULL;
-		} else if (!zalloc_cpumask_var_node(&cfg->old_domain,
+			return NULL;
+		}
+		if (!zalloc_cpumask_var_node(&cfg->old_domain,
 							  GFP_ATOMIC, node)) {
 			free_cpumask_var(cfg->domain);
 			kfree(cfg);
-			cfg = NULL;
+			return NULL;
+		}
+		if (!zalloc_cpumask_var_node(&cfg->allowed, GFP_ATOMIC, node)) {
+			free_cpumask_var(cfg->old_domain);
+			free_cpumask_var(cfg->domain);
+			kfree(cfg);
+			return NULL;
 		}
 	}
 
@@ -231,12 +254,14 @@ int arch_init_chip_data(struct irq_desc 
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(node);
+		cfg = desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
 		}
 	}
+	cfg->node = node;
+	set_irq_cfg_cpus_allowed(cfg);
 
 	return 0;
 }
@@ -318,6 +343,10 @@ void arch_init_copy_chip_data(struct irq
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
+	cfg->node = node;
+
+	set_irq_cfg_cpus_allowed(cfg);
+
 	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
@@ -1428,16 +1457,23 @@ static void setup_IO_APIC_irq(int apic_i
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
+	cpumask_var_t tmp_mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1487,7 @@ static void setup_IO_APIC_irq(int apic_i
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic_id].apicid, pin);
 		__clear_irq_vector(irq, cfg);
-		return;
+		goto error;
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1495,9 @@ static void setup_IO_APIC_irq(int apic_i
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
+error:
+	free_cpumask_var(tmp_mask);
+	return;
 }
 
 static struct {
@@ -2265,18 +2304,32 @@ set_desc_affinity(struct irq_desc *desc,
 {
 	struct irq_cfg *cfg;
 	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
+	cpumask_var_t tmp_mask;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return BAD_APICID;
 
-	cpumask_copy(desc->affinity, mask);
+	if (!cpumask_and(tmp_mask, mask, cfg->allowed))
+		goto error;
+
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	cpumask_copy(desc->affinity, tmp_mask);
+
+	free_cpumask_var(tmp_mask);
 
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+	free_cpumask_var(tmp_mask);
+	return BAD_APICID;
 }
 
 static int
@@ -2332,22 +2385,32 @@ migrate_ioapic_irq_desc(struct irq_desc 
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
+	cpumask_var_t tmp_mask;
 	unsigned int dest;
 	unsigned int irq;
 	int ret = -1;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return ret;
 
-	irq = desc->irq;
+	if (!cpumask_and(tmp_mask, mask, cfg->allowed))
+		goto error;
+
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
 	if (get_irte(irq, &irte))
-		return ret;
+		goto error;
 
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return ret;
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	ret = 0;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -2360,9 +2423,10 @@ migrate_ioapic_irq_desc(struct irq_desc 
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	cpumask_copy(desc->affinity, mask);
-
-	return 0;
+	cpumask_copy(desc->affinity, tmp_mask);
+error:
+	free_cpumask_var(tmp_mask);
+	return ret;
 }
 
 /*
@@ -3146,6 +3210,8 @@ unsigned int create_irq_nr(unsigned int 
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
+		cfg_new->node = node;
+		set_irq_cfg_cpus_allowed(cfg_new);
 		/* restore it, in case dynamic_irq_init clear it */
 		if (desc_new)
 			desc_new->chip_data = cfg_new;
@@ -3197,16 +3263,25 @@ static int msi_compose_msg(struct pci_de
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
+	cpumask_var_t tmp_mask;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (err)
-		return err;
+		goto error;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	if (irq_remapped(irq)) {
 		struct irte irte;
@@ -3264,6 +3339,8 @@ static int msi_compose_msg(struct pci_de
 				MSI_DATA_DELIVERY_LOWPRI) |
 			MSI_DATA_VECTOR(cfg->vector);
 	}
+error:
+	free_cpumask_var(tmp_mask);
 	return err;
 }
 
@@ -3681,19 +3758,28 @@ static struct irq_chip ht_irq_chip = {
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
+	cpumask_var_t tmp_mask;
 	int err;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed)) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-						    apic->target_cpus());
+		dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3717,6 +3803,8 @@ int arch_setup_ht_irq(unsigned int irq, 
 
 		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
+error:
+	free_cpumask_var(tmp_mask);
 	return err;
 }
 #endif /* CONFIG_HT_IRQ */
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h	2009-10-14 12:44:57.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h	2009-10-14 12:48:50.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
 };
 
 /* ========================================================================= */
+/*                     UVH_LB_SOCKET_DESTINATION_TABLE                       */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+    unsigned long	v;
+    struct uvh_lb_socket_destination_table_s {
+	unsigned long	rsvd_0  :  1;  /*    */
+	unsigned long	node_id : 14;  /* RW */
+	unsigned long	chip_id :  1;  /* RW */
+	unsigned long	parity  :  1;  /* RW */
+	unsigned long	rsvd_17_63: 47;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
 /*                          UVH_LOCAL_INT0_CONFIG                            */
 /* ========================================================================= */
 #define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig	2009-10-14 12:44:57.000000000 -0500
+++ linux/arch/x86/Kconfig	2009-10-14 15:04:23.000000000 -0500
@@ -365,6 +365,7 @@ config X86_UV
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
 	depends on X86_X2APIC
+	depends on NUMA_IRQ_DESC
 	---help---
 	  This option is needed in order to support SGI Ultraviolet systems.
 	  If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c	2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c	2009-10-14 14:27:10.000000000 -0500
@@ -241,6 +241,83 @@ static int uv_set_irq_affinity(unsigned 
 }
 
 /*
+ * Setup the cpumask for IRQ restriction for a given UV node.
+ */
+static void uv_set_irq_cfg_cpus_allowed(struct cpumask *mask, int node)
+{
+#ifdef CONFIG_SPARSE_IRQ
+	unsigned long pnode_tbl[UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH];
+	unsigned long *pa, *pa_end;
+	int cpu, i;
+
+	/* Assume nodes accessible from node 0 */
+	if (node < 0)
+		node = 0;
+
+	pa = uv_global_mmr64_address(uv_node_to_pnode(node),
+			UVH_LB_SOCKET_DESTINATION_TABLE);
+
+	for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++, i++)
+		pnode_tbl[i] = UV_NASID_TO_PNODE(*pa &
+			UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+	cpumask_clear(mask);
+
+	pa = pnode_tbl;
+	pa_end = pa + UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH;
+
+	/* Select the cpus on nodes accessible from our hub */
+	for_each_possible_cpu(cpu) {
+		int p = uv_cpu_to_pnode(cpu);
+
+		if (p < *pa) {
+			while (p < *pa) {
+				pa--;
+				if (pa < pnode_tbl) {
+					pa++;
+					break;
+				}
+			}
+			if (*pa == p)
+				cpumask_set_cpu(cpu, mask);
+			continue;
+		}
+
+		while (*pa < p) {
+			pa++;
+			if (pa == pa_end) {
+				pa--;
+				break;
+			}
+		}
+
+		if (*pa == p)
+			cpumask_set_cpu(cpu, mask);
+	}
+#else
+	cpumask_setall(mask);
+#endif
+}
+
+/*
+ * Setup IRQ affinity restriction for IRQ's setup prior to the availability
+ * of UV topology information.
+ */
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+	struct irq_cfg *cfg;
+	int i;
+
+	set_irq_cfg_allowed = uv_set_irq_cfg_cpus_allowed;
+	/* Set allowed mask now that topology information is known */
+	for (i = 0; i < NR_IRQS; i++) {
+		cfg = irq_cfg(i);
+		if (cfg)
+			uv_set_irq_cfg_cpus_allowed(cfg->allowed, cfg->node);
+	}
+}
+
+/*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2009-10-14 12:44:57.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2009-10-14 15:04:23.000000000 -0500
@@ -23,6 +23,7 @@
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/uv/bios.h>
@@ -659,5 +660,6 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
+	arch_init_uv_cfg_cpus_allowed();
 	proc_mkdir("sgi_uv", NULL);
 }
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h	2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h	2009-10-14 12:48:50.000000000 -0500
@@ -94,11 +94,14 @@ struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
 	cpumask_var_t domain;
 	cpumask_var_t old_domain;
+	cpumask_var_t allowed;
+	int node;
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
 };
 
+extern void (*set_irq_cfg_allowed)(cpumask_var_t, int);
 extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h	2009-10-14 12:48:50.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h	2009-10-14 12:48:50.000000000 -0500
@@ -31,6 +31,7 @@ enum {
 	UV_AFFINITY_CPU
 };
 
+extern void arch_init_uv_cfg_cpus_allowed(void);
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] x86/apic: limit irq affinity
  2009-10-15  1:13           ` [PATCH v2] x86/apic: limit irq affinity Dimitri Sivanich
@ 2009-10-15  5:30             ` Yinghai Lu
  2009-10-15 13:50               ` Dimitri Sivanich
  0 siblings, 1 reply; 22+ messages in thread
From: Yinghai Lu @ 2009-10-15  5:30 UTC (permalink / raw)
  To: Dimitri Sivanich
  Cc: Ingo Molnar, linux-kernel, H. Peter Anvin, Thomas Gleixner

Dimitri Sivanich wrote:
> This patch allows for hard restrictions to irq affinity via a new cpumask and
> device node value in the irq_cfg structure.
> 
> The mask forces IRQ affinity to remain within the specified cpu domain.
> On some UV systems, this domain will be limited to the nodes accessible
> to the given node.  Currently other X86 systems will have all bits in
> the cpumask set, so non-UV systems will remain unaffected at this time.
> 
> Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
> 
> ---
> 
> Removed UV specific code from generic IO APIC code.
> 
>  arch/x86/Kconfig                   |    1 
>  arch/x86/include/asm/hw_irq.h      |    3 
>  arch/x86/include/asm/uv/uv_irq.h   |    1 
>  arch/x86/include/asm/uv/uv_mmrs.h  |   25 +++++
>  arch/x86/kernel/apic/io_apic.c     |  144 ++++++++++++++++++++++++++-------
>  arch/x86/kernel/apic/x2apic_uv_x.c |    2 
>  arch/x86/kernel/uv_irq.c           |   77 +++++++++++++++++
>  7 files changed, 225 insertions(+), 28 deletions(-)
> 
> Index: linux/arch/x86/kernel/apic/io_apic.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-10-14 12:48:50.000000000 -0500
> +++ linux/arch/x86/kernel/apic/io_apic.c	2009-10-14 15:04:23.000000000 -0500
> @@ -168,6 +168,19 @@ void __init io_apic_disable_legacy(void)
>  	nr_irqs_gsi = 0;
>  }
>  
> +void (*set_irq_cfg_allowed)(cpumask_var_t, int) = NULL;
> +/*
> + * Setup IRQ affinity restriction.
> + */
> +static void set_irq_cfg_cpus_allowed(struct irq_cfg *irq_cfg)
> +{
> +	if (set_irq_cfg_allowed)
> +		set_irq_cfg_allowed(irq_cfg->allowed, irq_cfg->node);
> +	else
> +		/* Default to allow anything */
> +		cpumask_setall(irq_cfg->allowed);
> +}
> +
>  int __init arch_early_irq_init(void)
>  {
>  	struct irq_cfg *cfg;
> @@ -183,8 +196,11 @@ int __init arch_early_irq_init(void)
>  	for (i = 0; i < count; i++) {
>  		desc = irq_to_desc(i);
>  		desc->chip_data = &cfg[i];
> +		cfg->node = node;
>  		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
>  		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
> +		zalloc_cpumask_var_node(&cfg[i].allowed, GFP_NOWAIT, node);
> +		set_irq_cfg_cpus_allowed(&cfg[i]);
>  		if (i < nr_legacy_irqs)
>  			cpumask_setall(cfg[i].domain);
>  	}
> @@ -213,12 +229,19 @@ static struct irq_cfg *get_one_free_irq_
>  	if (cfg) {
>  		if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
>  			kfree(cfg);
> -			cfg = NULL;
> -		} else if (!zalloc_cpumask_var_node(&cfg->old_domain,
> +			return NULL;
> +		}
> +		if (!zalloc_cpumask_var_node(&cfg->old_domain,
>  							  GFP_ATOMIC, node)) {
>  			free_cpumask_var(cfg->domain);
>  			kfree(cfg);
> -			cfg = NULL;
> +			return NULL;
> +		}
> +		if (!zalloc_cpumask_var_node(&cfg->allowed, GFP_ATOMIC, node)) {
> +			free_cpumask_var(cfg->old_domain);
> +			free_cpumask_var(cfg->domain);
> +			kfree(cfg);
> +			return NULL;
>  		}
>  	}
>  
> @@ -231,12 +254,14 @@ int arch_init_chip_data(struct irq_desc 
>  
>  	cfg = desc->chip_data;
>  	if (!cfg) {
> -		desc->chip_data = get_one_free_irq_cfg(node);
> +		cfg = desc->chip_data = get_one_free_irq_cfg(node);
>  		if (!desc->chip_data) {
>  			printk(KERN_ERR "can not alloc irq_cfg\n");
>  			BUG_ON(1);
>  		}
>  	}
> +	cfg->node = node;
> +	set_irq_cfg_cpus_allowed(cfg);
>  
>  	return 0;
>  }
> @@ -318,6 +343,10 @@ void arch_init_copy_chip_data(struct irq
>  
>  	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
>  
> +	cfg->node = node;
> +
> +	set_irq_cfg_cpus_allowed(cfg);
> +
>  	init_copy_irq_2_pin(old_cfg, cfg, node);
>  }
>  
> @@ -1428,16 +1457,23 @@ static void setup_IO_APIC_irq(int apic_i
>  	struct irq_cfg *cfg;
>  	struct IO_APIC_route_entry entry;
>  	unsigned int dest;
> +	cpumask_var_t tmp_mask;
>  
>  	if (!IO_APIC_IRQ(irq))
>  		return;
>  
>  	cfg = desc->chip_data;
>  
> -	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
> +	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
>  		return;
>  
> -	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
> +	if (!cpumask_and(tmp_mask, apic->target_cpus(), cfg->allowed))
> +		goto error;
> +
> +	if (assign_irq_vector(irq, cfg, tmp_mask))
> +		goto error;
> +
> +	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);

can you check if we can reuse target_cpus for this purpose?

YH

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] x86/apic: limit irq affinity
  2009-10-15  5:30             ` Yinghai Lu
@ 2009-10-15 13:50               ` Dimitri Sivanich
  2009-10-20 12:56                 ` Dimitri Sivanich
  0 siblings, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-15 13:50 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Ingo Molnar, linux-kernel, H. Peter Anvin, Thomas Gleixner

On Wed, Oct 14, 2009 at 10:30:12PM -0700, Yinghai Lu wrote:
> Dimitri Sivanich wrote:
> > This patch allows for hard restrictions to irq affinity via a new cpumask and
> > device node value in the irq_cfg structure.
> > 
> > The mask forces IRQ affinity to remain within the specified cpu domain.
> > On some UV systems, this domain will be limited to the nodes accessible
> > to the given node.  Currently other X86 systems will have all bits in
> > the cpumask set, so non-UV systems will remain unaffected at this time.
> > 
> 
> can you check if we can reuse target_cpus for this purpose?
>

Yinghai,

The 'target_cpus' mask is in struct 'apic'.  It is a platform level mask
(only one mask per platform).

The 'allowed' mask that I am adding is a per irq level mask (one mask per irq).
Each irq might be coming from a device attached to a different node, and each
of those nodes might require its irqs to have a different mask.

For example, say there is a pci device attached to node 2.  Node 2 might only
want (or be able) to route its interrupts to nodes 0-127.  It would have
'allowed' mask allowing only processors existing on the first 128 nodes.  A
pci device attached to node 150 would have a different allowed mask, since it
would route its interrupts to a different set of nodes.

Use of something like 'target_cpus' would require recalculating this every
time someone changes affinity, instead of just 'and'ing in the mask.

Dimitri

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2] x86/apic: limit irq affinity
  2009-10-15 13:50               ` Dimitri Sivanich
@ 2009-10-20 12:56                 ` Dimitri Sivanich
  2009-10-20 13:38                   ` [PATCH v3] " Dimitri Sivanich
  0 siblings, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-20 12:56 UTC (permalink / raw)
  To: Yinghai Lu, Ingo Molnar; +Cc: H. Peter Anvin, Thomas Gleixner, linux-kernel

On Thu, Oct 15, 2009 at 08:50:39AM -0500, Dimitri Sivanich wrote:
> On Wed, Oct 14, 2009 at 10:30:12PM -0700, Yinghai Lu wrote:
> > Dimitri Sivanich wrote:
> > > This patch allows for hard restrictions to irq affinity via a new cpumask and
> > > device node value in the irq_cfg structure.
> > > 
> > > The mask forces IRQ affinity to remain within the specified cpu domain.
> > > On some UV systems, this domain will be limited to the nodes accessible
> > > to the given node.  Currently other X86 systems will have all bits in
> > > the cpumask set, so non-UV systems will remain unaffected at this time.
> > > 
> > 
> > can you check if we can reuse target_cpus for this purpose?
> >
>  
> The 'target_cpus' mask is in struct 'apic'.  It is a platform level mask
> (only one mask per platform).
> 
> The 'allowed' mask that I am adding is a per irq level mask (one mask per irq).
> Each irq might be coming from a device attached to a different node, and each
> of those nodes might require its irqs to have a different mask.
>

Assuming that the real issue here is in adding any more cpumasks to irq_cfg, I've created another version of the patch that does not add the cpumask to irq_cfg.  The UV specific irq code will store these cpumasks (one per node).

Will send this shortly.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v3] x86/apic: limit irq affinity
  2009-10-20 12:56                 ` Dimitri Sivanich
@ 2009-10-20 13:38                   ` Dimitri Sivanich
  2009-10-20 18:58                     ` Yinghai Lu
  2009-10-21  1:12                     ` [PATCH v4] " Dimitri Sivanich
  0 siblings, 2 replies; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-20 13:38 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Yinghai Lu, H. Peter Anvin, Thomas Gleixner

This patch allows for hard restrictions to irq affinity on x86 systems.

Affinity is masked to allow only those cpus which the subarchitecture
deems accessible by the given irq.

On some UV systems, this domain will be limited to the nodes accessible
to the irq's node.  Initially other X86 systems will not mask off any cpus
so non-UV systems will remain unaffected.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

Removed allowed cpumask from irq_cfg.  Storing allowed cpumasks in UV
specific IRQ code.

 arch/x86/Kconfig                   |    1 
 arch/x86/include/asm/hw_irq.h      |    3 
 arch/x86/include/asm/uv/uv_irq.h   |    1 
 arch/x86/include/asm/uv/uv_mmrs.h  |   25 ++++++
 arch/x86/kernel/apic/io_apic.c     |  123 ++++++++++++++++++++++++++-------
 arch/x86/kernel/apic/x2apic_uv_x.c |    4 -
 arch/x86/kernel/uv_irq.c           |   58 +++++++++++++++
 7 files changed, 189 insertions(+), 26 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-10-19 15:22:52.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c	2009-10-19 20:57:29.000000000 -0500
@@ -168,6 +168,17 @@ void __init io_apic_disable_legacy(void)
 	nr_irqs_gsi = 0;
 }
 
+static int default_irq_allowed_and(struct irq_cfg *cfg, struct cpumask *dstp,
+						const struct cpumask *srcp)
+{
+	cpumask_copy(dstp, srcp);
+
+	return 1;
+}
+
+int (*x86_irq_allowed_and)(struct irq_cfg *, struct cpumask *,
+		const struct cpumask *) = default_irq_allowed_and;
+
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
@@ -183,6 +194,7 @@ int __init arch_early_irq_init(void)
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
+		cfg->node = node;
 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
 		if (i < nr_legacy_irqs)
@@ -231,12 +243,13 @@ int arch_init_chip_data(struct irq_desc 
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(node);
+		cfg = desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
 		}
 	}
+	cfg->node = node;
 
 	return 0;
 }
@@ -318,6 +331,8 @@ void arch_init_copy_chip_data(struct irq
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
+	cfg->node = node;
+
 	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
@@ -1428,16 +1443,23 @@ static void setup_IO_APIC_irq(int apic_i
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
+	cpumask_var_t tmp_mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	if (!x86_irq_allowed_and(cfg, tmp_mask, apic->target_cpus()))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1473,7 @@ static void setup_IO_APIC_irq(int apic_i
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic_id].apicid, pin);
 		__clear_irq_vector(irq, cfg);
-		return;
+		goto error;
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1481,9 @@ static void setup_IO_APIC_irq(int apic_i
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
+error:
+	free_cpumask_var(tmp_mask);
+	return;
 }
 
 static struct {
@@ -2282,18 +2307,32 @@ set_desc_affinity(struct irq_desc *desc,
 {
 	struct irq_cfg *cfg;
 	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
+	cpumask_var_t tmp_mask;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return BAD_APICID;
 
-	cpumask_copy(desc->affinity, mask);
+	if (!x86_irq_allowed_and(cfg, tmp_mask, mask))
+		goto error;
+
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	cpumask_copy(desc->affinity, tmp_mask);
+
+	free_cpumask_var(tmp_mask);
 
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+	free_cpumask_var(tmp_mask);
+	return BAD_APICID;
 }
 
 static int
@@ -2349,22 +2388,32 @@ migrate_ioapic_irq_desc(struct irq_desc 
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
+	cpumask_var_t tmp_mask;
 	unsigned int dest;
 	unsigned int irq;
 	int ret = -1;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return ret;
 
-	irq = desc->irq;
+	if (!x86_irq_allowed_and(cfg, tmp_mask, mask))
+		goto error;
+
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
 	if (get_irte(irq, &irte))
-		return ret;
+		goto error;
 
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return ret;
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+	ret = 0;
+
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -2377,9 +2426,10 @@ migrate_ioapic_irq_desc(struct irq_desc 
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	cpumask_copy(desc->affinity, mask);
-
-	return 0;
+	cpumask_copy(desc->affinity, tmp_mask);
+error:
+	free_cpumask_var(tmp_mask);
+	return ret;
 }
 
 /*
@@ -3163,6 +3213,7 @@ unsigned int create_irq_nr(unsigned int 
 
 	if (irq > 0) {
 		dynamic_irq_init(irq);
+		cfg_new->node = node;
 		/* restore it, in case dynamic_irq_init clear it */
 		if (desc_new)
 			desc_new->chip_data = cfg_new;
@@ -3214,16 +3265,25 @@ static int msi_compose_msg(struct pci_de
 	struct irq_cfg *cfg;
 	int err;
 	unsigned dest;
+	cpumask_var_t tmp_mask;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	if (!x86_irq_allowed_and(cfg, tmp_mask, apic->target_cpus())) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (err)
-		return err;
+		goto error;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	if (irq_remapped(irq)) {
 		struct irte irte;
@@ -3281,6 +3341,8 @@ static int msi_compose_msg(struct pci_de
 				MSI_DATA_DELIVERY_LOWPRI) |
 			MSI_DATA_VECTOR(cfg->vector);
 	}
+error:
+	free_cpumask_var(tmp_mask);
 	return err;
 }
 
@@ -3698,19 +3760,28 @@ static struct irq_chip ht_irq_chip = {
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
+	cpumask_var_t tmp_mask;
 	int err;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	if (!x86_irq_allowed_and(cfg, tmp_mask, apic->target_cpus())) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-						    apic->target_cpus());
+		dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3734,6 +3805,8 @@ int arch_setup_ht_irq(unsigned int irq, 
 
 		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
+error:
+	free_cpumask_var(tmp_mask);
 	return err;
 }
 #endif /* CONFIG_HT_IRQ */
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h	2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h	2009-10-19 20:57:29.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
 };
 
 /* ========================================================================= */
+/*                     UVH_LB_SOCKET_DESTINATION_TABLE                       */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+    unsigned long	v;
+    struct uvh_lb_socket_destination_table_s {
+	unsigned long	rsvd_0  :  1;  /*    */
+	unsigned long	node_id : 14;  /* RW */
+	unsigned long	chip_id :  1;  /* RW */
+	unsigned long	parity  :  1;  /* RW */
+	unsigned long	rsvd_17_63: 47;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
 /*                          UVH_LOCAL_INT0_CONFIG                            */
 /* ========================================================================= */
 #define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig	2009-10-19 15:22:52.000000000 -0500
+++ linux/arch/x86/Kconfig	2009-10-19 20:57:29.000000000 -0500
@@ -365,6 +365,7 @@ config X86_UV
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
 	depends on X86_X2APIC
+	depends on NUMA_IRQ_DESC
 	---help---
 	  This option is needed in order to support SGI Ultraviolet systems.
 	  If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c	2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c	2009-10-20 08:23:08.000000000 -0500
@@ -242,6 +242,64 @@ static int uv_set_irq_affinity(unsigned 
 	return 0;
 }
 
+static cpumask_var_t *uv_irq_cpus_allowed;
+
+int uv_irq_cpus_allowed_and(struct irq_cfg *cfg, struct cpumask *dstp,
+				const struct cpumask *srcp)
+{
+	int bid;
+
+	if (cfg == NULL || cfg->node < 0) {
+		cpumask_copy(dstp, srcp);
+		return 1;
+	}
+
+	bid = uv_node_to_blade_id(cfg->node);
+
+	return cpumask_and(dstp, srcp, uv_irq_cpus_allowed[bid]);
+}
+
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+	int bid;
+
+	uv_irq_cpus_allowed = kzalloc(uv_num_possible_blades() *
+			sizeof(cpumask_var_t *), GFP_KERNEL);
+
+	if (uv_irq_cpus_allowed == NULL) {
+		printk(KERN_EMERG "Out of memory");
+		return;
+	}
+
+	for_each_possible_blade(bid) {
+		unsigned long *pa;
+		int i;
+
+		if (!zalloc_cpumask_var_node(&uv_irq_cpus_allowed[bid],
+				GFP_KERNEL, uv_blade_to_memory_nid(bid))) {
+			printk(KERN_EMERG "Out of memory on blade %d", bid);
+			return;
+		}
+
+		pa = uv_global_mmr64_address(uv_blade_to_pnode(bid),
+			UVH_LB_SOCKET_DESTINATION_TABLE);
+
+		for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++,
+				i++) {
+			int cpu;
+			int pnode = UV_NASID_TO_PNODE(*pa &
+				UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+			for_each_possible_cpu(cpu)
+				if (uv_cpu_to_pnode(cpu) == pnode)
+					cpumask_set_cpu(cpu,
+						uv_irq_cpus_allowed[bid]);
+		}
+	}
+
+	x86_irq_allowed_and = uv_irq_cpus_allowed_and;
+}
+
 /*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2009-10-19 15:22:52.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2009-10-19 20:57:29.000000000 -0500
@@ -23,6 +23,7 @@
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/uv/bios.h>
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 static const struct cpumask *uv_target_cpus(void)
 {
-	return cpumask_of(0);
+	return cpu_online_mask;
 }
 
 static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -659,5 +660,6 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
+	arch_init_uv_cfg_cpus_allowed();
 	proc_mkdir("sgi_uv", NULL);
 }
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h	2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h	2009-10-19 20:57:29.000000000 -0500
@@ -94,11 +94,14 @@ struct irq_cfg {
 	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
+	int			node;
 	unsigned		move_cleanup_count;
 	u8			vector;
 	u8			move_in_progress : 1;
 };
 
+extern int (*x86_irq_allowed_and)(struct irq_cfg *, struct cpumask *,
+					const struct cpumask *);
 extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h	2009-10-19 13:11:47.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h	2009-10-19 20:57:29.000000000 -0500
@@ -31,6 +31,7 @@ enum {
 	UV_AFFINITY_CPU
 };
 
+extern void arch_init_uv_cfg_cpus_allowed(void);
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v3] x86/apic: limit irq affinity
  2009-10-20 13:38                   ` [PATCH v3] " Dimitri Sivanich
@ 2009-10-20 18:58                     ` Yinghai Lu
  2009-10-21  1:06                       ` Dimitri Sivanich
  2009-10-21  1:12                     ` [PATCH v4] " Dimitri Sivanich
  1 sibling, 1 reply; 22+ messages in thread
From: Yinghai Lu @ 2009-10-20 18:58 UTC (permalink / raw)
  To: Dimitri Sivanich
  Cc: Ingo Molnar, linux-kernel, H. Peter Anvin, Thomas Gleixner

Dimitri Sivanich wrote:
> This patch allows for hard restrictions to irq affinity on x86 systems.
> 
> Affinity is masked to allow only those cpus which the subarchitecture
> deems accessible by the given irq.
> 
> On some UV systems, this domain will be limited to the nodes accessible
> to the irq's node.  Initially other X86 systems will not mask off any cpus
> so non-UV systems will remain unaffected.
> 
> Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
> 
> ---
> 
> Removed allowed cpumask from irq_cfg.  Storing allowed cpumasks in UV
> specific IRQ code.
> 
>  arch/x86/Kconfig                   |    1 
>  arch/x86/include/asm/hw_irq.h      |    3 
>  arch/x86/include/asm/uv/uv_irq.h   |    1 
>  arch/x86/include/asm/uv/uv_mmrs.h  |   25 ++++++
>  arch/x86/kernel/apic/io_apic.c     |  123 ++++++++++++++++++++++++++-------
>  arch/x86/kernel/apic/x2apic_uv_x.c |    4 -
>  arch/x86/kernel/uv_irq.c           |   58 +++++++++++++++
>  7 files changed, 189 insertions(+), 26 deletions(-)
> 
> Index: linux/arch/x86/kernel/apic/io_apic.c
> ===================================================================
> --- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-10-19 15:22:52.000000000 -0500
> +++ linux/arch/x86/kernel/apic/io_apic.c	2009-10-19 20:57:29.000000000 -0500
> @@ -168,6 +168,17 @@ void __init io_apic_disable_legacy(void)
>  	nr_irqs_gsi = 0;
>  }
>  
> +static int default_irq_allowed_and(struct irq_cfg *cfg, struct cpumask *dstp,
> +						const struct cpumask *srcp)
> +{
> +	cpumask_copy(dstp, srcp);
> +
> +	return 1;
> +}
> +
> +int (*x86_irq_allowed_and)(struct irq_cfg *, struct cpumask *,
> +		const struct cpumask *) = default_irq_allowed_and;
> +
>  int __init arch_early_irq_init(void)
>  {
>  	struct irq_cfg *cfg;
> @@ -183,6 +194,7 @@ int __init arch_early_irq_init(void)
>  	for (i = 0; i < count; i++) {
>  		desc = irq_to_desc(i);
>  		desc->chip_data = &cfg[i];
> +		cfg->node = node;
>  		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
>  		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
>  		if (i < nr_legacy_irqs)
> @@ -231,12 +243,13 @@ int arch_init_chip_data(struct irq_desc 
>  
>  	cfg = desc->chip_data;
>  	if (!cfg) {
> -		desc->chip_data = get_one_free_irq_cfg(node);
> +		cfg = desc->chip_data = get_one_free_irq_cfg(node);
>  		if (!desc->chip_data) {
>  			printk(KERN_ERR "can not alloc irq_cfg\n");
>  			BUG_ON(1);
>  		}
>  	}
> +	cfg->node = node;

how about desc->node ?

YH

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v3] x86/apic: limit irq affinity
  2009-10-20 18:58                     ` Yinghai Lu
@ 2009-10-21  1:06                       ` Dimitri Sivanich
  0 siblings, 0 replies; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-21  1:06 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Ingo Molnar, linux-kernel, H. Peter Anvin, Thomas Gleixner

On Tue, Oct 20, 2009 at 11:58:38AM -0700, Yinghai Lu wrote:
> Dimitri Sivanich wrote:
> > +	cfg->node = node;
> 
> how about desc->node ?
>

Actually, desc->node does work for most of the cases here.  In the case where
it doesn't work, we have a pci device with the node information in it's device. 

Sending updated patch shortly.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v4] x86/apic: limit irq affinity
  2009-10-20 13:38                   ` [PATCH v3] " Dimitri Sivanich
  2009-10-20 18:58                     ` Yinghai Lu
@ 2009-10-21  1:12                     ` Dimitri Sivanich
  2009-11-08 13:07                       ` [tip:x86/apic] x86/apic: Limit " tip-bot for Dimitri Sivanich
  1 sibling, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-10-21  1:12 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Yinghai Lu, H. Peter Anvin, Thomas Gleixner

This patch allows for hard numa restrictions to irq affinity on x86 systems.

Affinity is masked to allow only those cpus which the subarchitecture
deems accessible by the given irq.

On some UV systems, this domain will be limited to the nodes accessible
to the irq's node.  Initially other X86 systems will not mask off any cpus
so non-UV systems will remain unaffected.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

Removed allowed cpumask and node from irq_cfg.  Storing allowed cpumasks in UV
specific IRQ code.

 arch/x86/Kconfig                   |    1 
 arch/x86/include/asm/hw_irq.h      |    2 
 arch/x86/include/asm/uv/uv_irq.h   |    1 
 arch/x86/include/asm/uv/uv_mmrs.h  |   25 +++++++
 arch/x86/kernel/apic/io_apic.c     |  117 ++++++++++++++++++++++++++-------
 arch/x86/kernel/apic/x2apic_uv_x.c |    4 -
 arch/x86/kernel/uv_irq.c           |   58 ++++++++++++++++
 7 files changed, 184 insertions(+), 24 deletions(-)

Index: linux/arch/x86/kernel/apic/io_apic.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/io_apic.c	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/kernel/apic/io_apic.c	2009-10-20 20:03:14.000000000 -0500
@@ -168,6 +168,17 @@ void __init io_apic_disable_legacy(void)
 	nr_irqs_gsi = 0;
 }
 
+static int default_irq_allowed_and(int node, struct cpumask *dstp,
+						const struct cpumask *srcp)
+{
+	cpumask_copy(dstp, srcp);
+
+	return 1;
+}
+
+int (*x86_irq_allowed_and)(int, struct cpumask *, const struct cpumask *) =
+			default_irq_allowed_and;
+
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
@@ -1428,16 +1439,23 @@ static void setup_IO_APIC_irq(int apic_i
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
 	unsigned int dest;
+	cpumask_var_t tmp_mask;
 
 	if (!IO_APIC_IRQ(irq))
 		return;
 
 	cfg = desc->chip_data;
 
-	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	if (!x86_irq_allowed_and(desc->node, tmp_mask, apic->target_cpus()))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1469,7 @@ static void setup_IO_APIC_irq(int apic_i
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic_id].apicid, pin);
 		__clear_irq_vector(irq, cfg);
-		return;
+		goto error;
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1477,9 @@ static void setup_IO_APIC_irq(int apic_i
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
+error:
+	free_cpumask_var(tmp_mask);
+	return;
 }
 
 static struct {
@@ -2282,18 +2303,32 @@ set_desc_affinity(struct irq_desc *desc,
 {
 	struct irq_cfg *cfg;
 	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
+	cpumask_var_t tmp_mask;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return BAD_APICID;
 
-	cpumask_copy(desc->affinity, mask);
+	if (!x86_irq_allowed_and(desc->node, tmp_mask, mask))
+		goto error;
+
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	cpumask_copy(desc->affinity, tmp_mask);
+
+	free_cpumask_var(tmp_mask);
 
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+	free_cpumask_var(tmp_mask);
+	return BAD_APICID;
 }
 
 static int
@@ -2349,22 +2384,32 @@ migrate_ioapic_irq_desc(struct irq_desc 
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
+	cpumask_var_t tmp_mask;
 	unsigned int dest;
 	unsigned int irq;
 	int ret = -1;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	irq = desc->irq;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
 		return ret;
 
-	irq = desc->irq;
+	if (!x86_irq_allowed_and(desc->node, tmp_mask, mask))
+		goto error;
+
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
 	if (get_irte(irq, &irte))
-		return ret;
+		goto error;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return ret;
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	ret = 0;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -2377,9 +2422,10 @@ migrate_ioapic_irq_desc(struct irq_desc 
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	cpumask_copy(desc->affinity, mask);
-
-	return 0;
+	cpumask_copy(desc->affinity, tmp_mask);
+error:
+	free_cpumask_var(tmp_mask);
+	return ret;
 }
 
 /*
@@ -3212,18 +3258,29 @@ static int msi_compose_msg(struct pci_de
 			   struct msi_msg *msg, u8 hpet_id)
 {
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 	int err;
 	unsigned dest;
+	cpumask_var_t tmp_mask;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	desc = irq_to_desc(irq);
+	if (!x86_irq_allowed_and(desc->node, tmp_mask, apic->target_cpus())) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (err)
-		return err;
+		goto error;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	if (irq_remapped(irq)) {
 		struct irte irte;
@@ -3281,6 +3338,8 @@ static int msi_compose_msg(struct pci_de
 				MSI_DATA_DELIVERY_LOWPRI) |
 			MSI_DATA_VECTOR(cfg->vector);
 	}
+error:
+	free_cpumask_var(tmp_mask);
 	return err;
 }
 
@@ -3698,19 +3757,29 @@ static struct irq_chip ht_irq_chip = {
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
+	cpumask_var_t tmp_mask;
 	int err;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	if (!x86_irq_allowed_and(dev_to_node(&dev->dev), tmp_mask,
+				apic->target_cpus())) {
+		err = -ENOSPC;
+		goto error;
+	}
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-						    apic->target_cpus());
+		dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3734,6 +3803,8 @@ int arch_setup_ht_irq(unsigned int irq, 
 
 		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
+error:
+	free_cpumask_var(tmp_mask);
 	return err;
 }
 #endif /* CONFIG_HT_IRQ */
Index: linux/arch/x86/include/asm/uv/uv_mmrs.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_mmrs.h	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_mmrs.h	2009-10-20 19:54:42.000000000 -0500
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
 };
 
 /* ========================================================================= */
+/*                     UVH_LB_SOCKET_DESTINATION_TABLE                       */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+    unsigned long	v;
+    struct uvh_lb_socket_destination_table_s {
+	unsigned long	rsvd_0  :  1;  /*    */
+	unsigned long	node_id : 14;  /* RW */
+	unsigned long	chip_id :  1;  /* RW */
+	unsigned long	parity  :  1;  /* RW */
+	unsigned long	rsvd_17_63: 47;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
 /*                          UVH_LOCAL_INT0_CONFIG                            */
 /* ========================================================================= */
 #define UVH_LOCAL_INT0_CONFIG 0x61000UL
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/Kconfig	2009-10-20 19:54:42.000000000 -0500
@@ -365,6 +365,7 @@ config X86_UV
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
 	depends on X86_X2APIC
+	depends on NUMA_IRQ_DESC
 	---help---
 	  This option is needed in order to support SGI Ultraviolet systems.
 	  If you don't have one of these, you should say N here.
Index: linux/arch/x86/kernel/uv_irq.c
===================================================================
--- linux.orig/arch/x86/kernel/uv_irq.c	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/kernel/uv_irq.c	2009-10-20 19:54:42.000000000 -0500
@@ -242,6 +242,64 @@ static int uv_set_irq_affinity(unsigned 
 	return 0;
 }
 
+static cpumask_var_t *uv_irq_cpus_allowed;
+
+int uv_irq_cpus_allowed_and(int node, struct cpumask *dstp,
+				const struct cpumask *srcp)
+{
+	int bid;
+
+	if (node < 0) {
+		cpumask_copy(dstp, srcp);
+		return 1;
+	}
+
+	bid = uv_node_to_blade_id(node);
+
+	return cpumask_and(dstp, srcp, uv_irq_cpus_allowed[bid]);
+}
+
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+	int bid;
+
+	uv_irq_cpus_allowed = kzalloc(uv_num_possible_blades() *
+			sizeof(cpumask_var_t *), GFP_KERNEL);
+
+	if (uv_irq_cpus_allowed == NULL) {
+		printk(KERN_EMERG "Out of memory");
+		return;
+	}
+
+	for_each_possible_blade(bid) {
+		unsigned long *pa;
+		int i;
+
+		if (!zalloc_cpumask_var_node(&uv_irq_cpus_allowed[bid],
+				GFP_KERNEL, uv_blade_to_memory_nid(bid))) {
+			printk(KERN_EMERG "Out of memory on blade %d", bid);
+			return;
+		}
+
+		pa = uv_global_mmr64_address(uv_blade_to_pnode(bid),
+			UVH_LB_SOCKET_DESTINATION_TABLE);
+
+		for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++,
+				i++) {
+			int cpu;
+			int pnode = UV_NASID_TO_PNODE(*pa &
+				UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+			for_each_possible_cpu(cpu)
+				if (uv_cpu_to_pnode(cpu) == pnode)
+					cpumask_set_cpu(cpu,
+						uv_irq_cpus_allowed[bid]);
+		}
+	}
+
+	x86_irq_allowed_and = uv_irq_cpus_allowed_and;
+}
+
 /*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c
===================================================================
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/kernel/apic/x2apic_uv_x.c	2009-10-20 19:54:42.000000000 -0500
@@ -23,6 +23,7 @@
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/uv/bios.h>
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 static const struct cpumask *uv_target_cpus(void)
 {
-	return cpumask_of(0);
+	return cpu_online_mask;
 }
 
 static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -659,5 +660,6 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
+	arch_init_uv_cfg_cpus_allowed();
 	proc_mkdir("sgi_uv", NULL);
 }
Index: linux/arch/x86/include/asm/hw_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/hw_irq.h	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/include/asm/hw_irq.h	2009-10-20 19:54:42.000000000 -0500
@@ -99,6 +99,8 @@ struct irq_cfg {
 	u8			move_in_progress : 1;
 };
 
+extern int (*x86_irq_allowed_and)(int, struct cpumask *,
+					const struct cpumask *);
 extern struct irq_cfg *irq_cfg(unsigned int);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
 extern void send_cleanup_vector(struct irq_cfg *);
Index: linux/arch/x86/include/asm/uv/uv_irq.h
===================================================================
--- linux.orig/arch/x86/include/asm/uv/uv_irq.h	2009-10-20 19:54:36.000000000 -0500
+++ linux/arch/x86/include/asm/uv/uv_irq.h	2009-10-20 19:54:42.000000000 -0500
@@ -31,6 +31,7 @@ enum {
 	UV_AFFINITY_CPU
 };
 
+extern void arch_init_uv_cfg_cpus_allowed(void);
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [tip:x86/apic] x86/apic: Limit irq affinity
  2009-10-21  1:12                     ` [PATCH v4] " Dimitri Sivanich
@ 2009-11-08 13:07                       ` tip-bot for Dimitri Sivanich
  2009-11-08 14:53                         ` Ingo Molnar
  0 siblings, 1 reply; 22+ messages in thread
From: tip-bot for Dimitri Sivanich @ 2009-11-08 13:07 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, yinghai, suresh.b.siddha, tglx,
	sivanich, mingo

Commit-ID:  683c91f85d7a3e1092d7fa3ec5687af8cd379f02
Gitweb:     http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
Author:     Dimitri Sivanich <sivanich@sgi.com>
AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Sun, 8 Nov 2009 13:30:40 +0100

x86/apic: Limit irq affinity

This patch allows for hard numa restrictions to irq affinity on
x86 systems.

Affinity is masked to allow only those cpus which the
subarchitecture deems accessible by the given irq.

On some UV systems, this domain will be limited to the nodes
accessible to the irq's node.  Initially other X86 systems will
not mask off any cpus so non-UV systems will remain unaffected.

Added apic functions for getting numa irq cpumasks.  Systems other
than UV now simply return the mask passed in.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <20091021011233.GB32196@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                      |    1 +
 arch/x86/include/asm/apic.h           |   13 +++++
 arch/x86/include/asm/uv/uv_irq.h      |    3 +
 arch/x86/include/asm/uv/uv_mmrs.h     |   25 +++++++++
 arch/x86/kernel/apic/apic_flat_64.c   |    4 ++
 arch/x86/kernel/apic/apic_noop.c      |    2 +
 arch/x86/kernel/apic/bigsmp_32.c      |    2 +
 arch/x86/kernel/apic/es7000_32.c      |    4 ++
 arch/x86/kernel/apic/io_apic.c        |   91 ++++++++++++++++++++++++--------
 arch/x86/kernel/apic/numaq_32.c       |    2 +
 arch/x86/kernel/apic/probe_32.c       |    2 +
 arch/x86/kernel/apic/summit_32.c      |    2 +
 arch/x86/kernel/apic/x2apic_cluster.c |    2 +
 arch/x86/kernel/apic/x2apic_phys.c    |    2 +
 arch/x86/kernel/apic/x2apic_uv_x.c    |    6 ++-
 arch/x86/kernel/uv_irq.c              |   66 ++++++++++++++++++++++++
 16 files changed, 203 insertions(+), 24 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c876bac..93decdd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -363,6 +363,7 @@ config X86_UV
 	depends on X86_EXTENDED_PLATFORM
 	depends on NUMA
 	depends on X86_X2APIC
+	depends on NUMA_IRQ_DESC
 	---help---
 	  This option is needed in order to support SGI Ultraviolet systems.
 	  If you don't have one of these, you should say N here.
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 08a5f42..b7336ac 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -293,6 +293,9 @@ struct apic {
 	u32 irq_dest_mode;
 
 	const struct cpumask *(*target_cpus)(void);
+	struct cpumask *(*get_restricted_mask)(const struct cpumask *mask,
+					       int node);
+	void (*free_restricted_mask)(struct cpumask *mask);
 
 	int disable_esr;
 
@@ -474,6 +477,16 @@ static inline const struct cpumask *default_target_cpus(void)
 #endif
 }
 
+static inline struct cpumask *
+default_get_restricted_mask(const struct cpumask *mask, int node)
+{
+	return (struct cpumask *)mask;
+}
+
+static inline void default_free_restricted_mask(struct cpumask *mask)
+{
+}
+
 DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
 
 
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c7..af1b281 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,6 +31,9 @@ enum {
 	UV_AFFINITY_CPU
 };
 
+extern struct cpumask *uv_get_restricted_mask(const struct cpumask *, int);
+extern void uv_free_restricted_mask(struct cpumask *);
+extern void arch_init_uv_cfg_cpus_allowed(void);
 extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
 extern int uv_setup_irq(char *, int, int, unsigned long, int);
 extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 2cae46c..6b79c96 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -823,6 +823,31 @@ union uvh_lb_mcast_aoerr0_rpt_enable_u {
 };
 
 /* ========================================================================= */
+/*                     UVH_LB_SOCKET_DESTINATION_TABLE                       */
+/* ========================================================================= */
+#define UVH_LB_SOCKET_DESTINATION_TABLE 0x321000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_32 0x1800
+#define UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH 128
+
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_SHFT 1
+#define UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK 0x0000000000007ffeUL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_SHFT 15
+#define UVH_LB_SOCKET_DESTINATION_TABLE_CHIP_ID_MASK 0x0000000000008000UL
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_SHFT 16
+#define UVH_LB_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000000000010000UL
+
+union uvh_lb_socket_destination_table_u {
+    unsigned long	v;
+    struct uvh_lb_socket_destination_table_s {
+	unsigned long	rsvd_0  :  1;  /*    */
+	unsigned long	node_id : 14;  /* RW */
+	unsigned long	chip_id :  1;  /* RW */
+	unsigned long	parity  :  1;  /* RW */
+	unsigned long	rsvd_17_63: 47;  /*    */
+    } s;
+};
+
+/* ========================================================================= */
 /*                          UVH_LOCAL_INT0_CONFIG                            */
 /* ========================================================================= */
 #define UVH_LOCAL_INT0_CONFIG 0x61000UL
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index d0c99ab..a817e7b 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -174,6 +174,8 @@ struct apic apic_flat =  {
 	.irq_dest_mode			= 1, /* logical */
 
 	.target_cpus			= flat_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
@@ -323,6 +325,8 @@ struct apic apic_physflat =  {
 	.irq_dest_mode			= 0, /* physical */
 
 	.target_cpus			= physflat_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 9ab6ffb..1b8a4a1 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -147,6 +147,8 @@ struct apic apic_noop = {
 	.irq_dest_mode			= 1,
 
 	.target_cpus			= noop_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= noop_check_apicid_used,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 77a0641..46aaed4 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -216,6 +216,8 @@ struct apic apic_bigsmp = {
 	.irq_dest_mode			= 0,
 
 	.target_cpus			= bigsmp_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= bigsmp_check_apicid_used,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 89174f8..321c655 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -665,6 +665,8 @@ struct apic __refdata apic_es7000_cluster = {
 	.irq_dest_mode			= 1,
 
 	.target_cpus			= target_cpus_cluster,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= es7000_check_apicid_used,
@@ -730,6 +732,8 @@ struct apic __refdata apic_es7000 = {
 	.irq_dest_mode			= 0,
 
 	.target_cpus			= es7000_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= es7000_check_apicid_used,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 31e9db3..e347709 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1427,6 +1427,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 {
 	struct irq_cfg *cfg;
 	struct IO_APIC_route_entry entry;
+	struct cpumask *tmp_mask;
 	unsigned int dest;
 
 	if (!IO_APIC_IRQ(irq))
@@ -1434,10 +1435,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 
 	cfg = desc->chip_data;
 
-	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+	tmp_mask = apic->get_restricted_mask(apic->target_cpus(), desc->node);
+	if (!tmp_mask)
 		return;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	apic_printk(APIC_VERBOSE,KERN_DEBUG
 		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1451,7 +1456,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 		       mp_ioapics[apic_id].apicid, pin);
 		__clear_irq_vector(irq, cfg);
-		return;
+		goto error;
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
@@ -1459,6 +1464,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
+error:
+	apic->free_restricted_mask(tmp_mask);
 }
 
 static struct {
@@ -2278,18 +2285,30 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
+	struct cpumask *tmp_mask;
 
 	irq = desc->irq;
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
+
+	tmp_mask = apic->get_restricted_mask(mask, desc->node);
+	if (!tmp_mask)
 		return BAD_APICID;
 
-	cpumask_copy(desc->affinity, mask);
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	cpumask_copy(desc->affinity, tmp_mask);
+
+	apic->free_restricted_mask(tmp_mask);
 
 	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+
+error:
+	apic->free_restricted_mask(tmp_mask);
+	return BAD_APICID;
 }
 
 static int
@@ -2345,22 +2364,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
+	struct cpumask *tmp_mask;
 	unsigned int dest;
 	unsigned int irq;
 	int ret = -1;
 
-	if (!cpumask_intersects(mask, cpu_online_mask))
+	irq = desc->irq;
+
+	tmp_mask = apic->get_restricted_mask(mask, desc->node);
+	if (!tmp_mask)
 		return ret;
 
-	irq = desc->irq;
+	if (!cpumask_intersects(tmp_mask, cpu_online_mask))
+		goto error;
+
 	if (get_irte(irq, &irte))
-		return ret;
+		goto error;
 
 	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return ret;
+	if (assign_irq_vector(irq, cfg, tmp_mask))
+		goto error;
+
+	ret = 0;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -2373,9 +2400,10 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
 
-	cpumask_copy(desc->affinity, mask);
-
-	return 0;
+	cpumask_copy(desc->affinity, tmp_mask);
+error:
+	apic->free_restricted_mask(tmp_mask);
+	return ret;
 }
 
 /*
@@ -3243,18 +3271,26 @@ void destroy_irq(unsigned int irq)
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
 	struct irq_cfg *cfg;
+	struct irq_desc *desc;
 	int err;
 	unsigned dest;
+	struct cpumask *tmp_mask;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	desc = irq_to_desc(irq);
+
+	tmp_mask = apic->get_restricted_mask(apic->target_cpus(), desc->node);
+	if (!tmp_mask)
+		return -ENOMEM;
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (err)
-		return err;
+		goto error;
 
-	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
+	dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 	if (irq_remapped(irq)) {
 		struct irte irte;
@@ -3309,6 +3345,8 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 				MSI_DATA_DELIVERY_LOWPRI) |
 			MSI_DATA_VECTOR(cfg->vector);
 	}
+error:
+	apic->free_restricted_mask(tmp_mask);
 	return err;
 }
 
@@ -3697,19 +3735,25 @@ static struct irq_chip ht_irq_chip = {
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
 	struct irq_cfg *cfg;
+	struct cpumask *tmp_mask;
 	int err;
 
 	if (disable_apic)
 		return -ENXIO;
 
 	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+
+	tmp_mask = apic->get_restricted_mask(apic->target_cpus(),
+						dev_to_node(&dev->dev));
+	if (!tmp_mask)
+		return -ENOMEM;
+
+	err = assign_irq_vector(irq, cfg, tmp_mask);
 	if (!err) {
 		struct ht_irq_msg msg;
 		unsigned dest;
 
-		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
-						    apic->target_cpus());
+		dest = apic->cpu_mask_to_apicid_and(cfg->domain, tmp_mask);
 
 		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
@@ -3733,6 +3777,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
 	}
+	apic->free_restricted_mask(tmp_mask);
 	return err;
 }
 #endif /* CONFIG_HT_IRQ */
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index efa00e2..d46f4b5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -501,6 +501,8 @@ struct apic __refdata apic_numaq = {
 	.irq_dest_mode			= 0,
 
 	.target_cpus			= numaq_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 1,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= numaq_check_apicid_used,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0c0182c..22b6716 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -94,6 +94,8 @@ struct apic apic_default = {
 	.irq_dest_mode			= 1,
 
 	.target_cpus			= default_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 645ecc4..8d43a00 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -517,6 +517,8 @@ struct apic apic_summit = {
 	.irq_dest_mode			= 1,
 
 	.target_cpus			= summit_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 1,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= summit_check_apicid_used,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index a5371ec..d8cab82 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -198,6 +198,8 @@ struct apic apic_x2apic_cluster = {
 	.irq_dest_mode			= 1, /* logical */
 
 	.target_cpus			= x2apic_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index a8989aa..4e64e84 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -187,6 +187,8 @@ struct apic apic_x2apic_phys = {
 	.irq_dest_mode			= 0, /* physical */
 
 	.target_cpus			= x2apic_target_cpus,
+	.get_restricted_mask		= default_get_restricted_mask,
+	.free_restricted_mask		= default_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f5f5886..c1da399 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -23,6 +23,7 @@
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/uv/bios.h>
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second);
 
 static const struct cpumask *uv_target_cpus(void)
 {
-	return cpumask_of(0);
+	return cpu_online_mask;
 }
 
 static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
@@ -264,6 +265,8 @@ struct apic __refdata apic_x2apic_uv_x = {
 	.irq_dest_mode			= 0, /* physical */
 
 	.target_cpus			= uv_target_cpus,
+	.get_restricted_mask		= uv_get_restricted_mask,
+	.free_restricted_mask		= uv_free_restricted_mask,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
@@ -658,5 +661,6 @@ void __init uv_system_init(void)
 
 	uv_cpu_init();
 	uv_scir_register_cpu_notifier();
+	arch_init_uv_cfg_cpus_allowed();
 	proc_mkdir("sgi_uv", NULL);
 }
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
index 61d805d..b81273c 100644
--- a/arch/x86/kernel/uv_irq.c
+++ b/arch/x86/kernel/uv_irq.c
@@ -242,6 +242,72 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
 	return 0;
 }
 
+static cpumask_var_t *uv_irq_cpus_allowed;
+
+struct cpumask *uv_get_restricted_mask(const struct cpumask *mask, int node)
+{
+	cpumask_var_t tmp_mask;
+	int bid;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return NULL;
+
+	if (!uv_irq_cpus_allowed || node < 0) {
+		cpumask_copy(tmp_mask, mask);
+		return tmp_mask;
+	}
+
+	bid = uv_node_to_blade_id(node);
+
+	cpumask_and(tmp_mask, mask, uv_irq_cpus_allowed[bid]);
+
+	return tmp_mask;
+}
+
+void uv_free_restricted_mask(struct cpumask *mask)
+{
+	free_cpumask_var(mask);
+}
+
+void arch_init_uv_cfg_cpus_allowed(void)
+{
+	int bid;
+
+	uv_irq_cpus_allowed = kzalloc(uv_num_possible_blades() *
+			sizeof(cpumask_var_t *), GFP_KERNEL);
+
+	if (uv_irq_cpus_allowed == NULL) {
+		printk(KERN_EMERG "Out of memory");
+		return;
+	}
+
+	for_each_possible_blade(bid) {
+		unsigned long *pa;
+		int i;
+
+		if (!zalloc_cpumask_var_node(&uv_irq_cpus_allowed[bid],
+				GFP_KERNEL, uv_blade_to_memory_nid(bid))) {
+			printk(KERN_EMERG "Out of memory on blade %d", bid);
+			return;
+		}
+
+		pa = uv_global_mmr64_address(uv_blade_to_pnode(bid),
+			UVH_LB_SOCKET_DESTINATION_TABLE);
+
+		for (i = 0; i < UVH_LB_SOCKET_DESTINATION_TABLE_DEPTH; pa++,
+				i++) {
+			int cpu;
+			int pnode = UV_NASID_TO_PNODE(*pa &
+				UVH_LB_SOCKET_DESTINATION_TABLE_NODE_ID_MASK);
+
+			for_each_possible_cpu(cpu)
+				if (uv_cpu_to_pnode(cpu) == pnode)
+					cpumask_set_cpu(cpu,
+						uv_irq_cpus_allowed[bid]);
+		}
+	}
+}
+
 /*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [tip:x86/apic] x86/apic: Limit irq affinity
  2009-11-08 13:07                       ` [tip:x86/apic] x86/apic: Limit " tip-bot for Dimitri Sivanich
@ 2009-11-08 14:53                         ` Ingo Molnar
  2009-11-09 16:02                           ` Dimitri Sivanich
  0 siblings, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2009-11-08 14:53 UTC (permalink / raw)
  To: mingo, hpa, linux-kernel, yinghai, suresh.b.siddha, tglx,
	sivanich
  Cc: linux-tip-commits

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=unknown-8bit, Size: 734 bytes --]


* tip-bot for Dimitri Sivanich <sivanich@sgi.com> wrote:

> Commit-ID:  683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> Gitweb:     http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> Author:     Dimitri Sivanich <sivanich@sgi.com>
> AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> Committer:  Ingo Molnar <mingo@elte.hu>
> CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> 
> x86/apic: Limit irq affinity
> 
> This patch allows for hard numa restrictions to irq affinity on
> x86 systems.

-tip testing found a build failure:

arch/x86/kernel/apic/io_apic.c:1438: error: ‘struct irq_desc’ has no member named ‘node’
arch/x86/kernel/apic/io_apic.c:3286: error: ‘struct irq_desc’ has no member named ‘node’

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [tip:x86/apic] x86/apic: Limit irq affinity
  2009-11-08 14:53                         ` Ingo Molnar
@ 2009-11-09 16:02                           ` Dimitri Sivanich
  2009-11-10  4:40                             ` Ingo Molnar
  0 siblings, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-11-09 16:02 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: mingo, hpa, linux-kernel, yinghai, suresh.b.siddha, tglx

On Sun, Nov 08, 2009 at 03:53:55PM +0100, Ingo Molnar wrote:
> 
> * tip-bot for Dimitri Sivanich <sivanich@sgi.com> wrote:
> 
> > Commit-ID:  683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > Gitweb:     http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > Author:     Dimitri Sivanich <sivanich@sgi.com>
> > AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> > Committer:  Ingo Molnar <mingo@elte.hu>
> > CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> > 
> > x86/apic: Limit irq affinity
> > 
> > This patch allows for hard numa restrictions to irq affinity on
> > x86 systems.
> 
> -tip testing found a build failure:
> 
> arch/x86/kernel/apic/io_apic.c:1438: error: â€˜struct irq_descâ€™ has no member named â€˜nodeâ€™
> arch/x86/kernel/apic/io_apic.c:3286: error: â€˜struct irq_descâ€™ has no member named â€˜nodeâ€™
>

In the interest of doing some ifdef cleanup as well as fixing the build problem,
can I suggest that we remove the 'ifdef CONFIG_SMP' from the irq_desc?

Here's my suggested patch.


Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

 include/linux/irq.h |   70 +++++++++++++++++++-----------------------------
 kernel/irq/chip.c   |    2 -
 kernel/irq/handle.c |    4 --
 3 files changed, 29 insertions(+), 47 deletions(-)

Index: linux/include/linux/irq.h
===================================================================
--- linux.orig/include/linux/irq.h	2009-11-09 09:18:32.000000000 -0600
+++ linux/include/linux/irq.h	2009-11-09 09:19:50.000000000 -0600
@@ -193,13 +193,11 @@ struct irq_desc {
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
 	spinlock_t		lock;
-#ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
 	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
-#endif
 	atomic_t		threads_active;
 	wait_queue_head_t       wait_for_threads;
 #ifdef CONFIG_PROC_FS
@@ -423,6 +421,35 @@ extern int set_irq_msi(unsigned int irq,
 
 #endif /* !CONFIG_S390 */
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+	cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc:	pointer to old irq_desc struct
+ * @new_desc:	pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+					struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	cpumask_copy(new_desc->affinity, old_desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
 #ifdef CONFIG_SMP
 /**
  * alloc_desc_masks - allocate cpumasks for irq_desc
@@ -455,36 +482,6 @@ static inline bool alloc_desc_masks(stru
 	return true;
 }
 
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-	cpumask_setall(desc->affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_clear(desc->pending_mask);
-#endif
-}
-
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc:	pointer to old irq_desc struct
- * @new_desc:	pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	cpumask_copy(new_desc->affinity, old_desc->affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
-}
-
 static inline void free_desc_masks(struct irq_desc *old_desc,
 				   struct irq_desc *new_desc)
 {
@@ -503,15 +500,6 @@ static inline bool alloc_desc_masks(stru
 	return true;
 }
 
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-}
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-}
-
 static inline void free_desc_masks(struct irq_desc *old_desc,
 				   struct irq_desc *new_desc)
 {
Index: linux/kernel/irq/handle.c
===================================================================
--- linux.orig/kernel/irq/handle.c	2009-11-09 09:18:32.000000000 -0600
+++ linux/kernel/irq/handle.c	2009-11-09 09:19:50.000000000 -0600
@@ -110,9 +110,7 @@ static void init_one_irq_desc(int irq, s
 
 	spin_lock_init(&desc->lock);
 	desc->irq = irq;
-#ifdef CONFIG_SMP
 	desc->node = node;
-#endif
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_kstat_irqs(desc, node, nr_cpu_ids);
 	if (!desc->kstat_irqs) {
@@ -173,9 +171,7 @@ int __init early_irq_init(void)
 
 	for (i = 0; i < legacy_count; i++) {
 		desc[i].irq = i;
-#ifdef CONFIG_SMP
 		desc[i].node = node;
-#endif
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 		alloc_desc_masks(&desc[i], node, true);
Index: linux/kernel/irq/chip.c
===================================================================
--- linux.orig/kernel/irq/chip.c	2009-11-09 09:18:32.000000000 -0600
+++ linux/kernel/irq/chip.c	2009-11-09 09:19:50.000000000 -0600
@@ -45,12 +45,10 @@ void dynamic_irq_init(unsigned int irq)
 	desc->action = NULL;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
-#ifdef CONFIG_SMP
 	cpumask_setall(desc->affinity);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
-#endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [tip:x86/apic] x86/apic: Limit irq affinity
  2009-11-09 16:02                           ` Dimitri Sivanich
@ 2009-11-10  4:40                             ` Ingo Molnar
  2009-11-10 16:31                               ` Dimitri Sivanich
  0 siblings, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2009-11-10  4:40 UTC (permalink / raw)
  To: Dimitri Sivanich; +Cc: mingo, hpa, linux-kernel, yinghai, suresh.b.siddha, tglx


* Dimitri Sivanich <sivanich@sgi.com> wrote:

> On Sun, Nov 08, 2009 at 03:53:55PM +0100, Ingo Molnar wrote:
> > 
> > * tip-bot for Dimitri Sivanich <sivanich@sgi.com> wrote:
> > 
> > > Commit-ID:  683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > Gitweb:     http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > Author:     Dimitri Sivanich <sivanich@sgi.com>
> > > AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> > > Committer:  Ingo Molnar <mingo@elte.hu>
> > > CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> > > 
> > > x86/apic: Limit irq affinity
> > > 
> > > This patch allows for hard numa restrictions to irq affinity on
> > > x86 systems.
> > 
> > -tip testing found a build failure:
> > 
> > arch/x86/kernel/apic/io_apic.c:1438: error: ???struct irq_desc??? has no member named ???node???
> > arch/x86/kernel/apic/io_apic.c:3286: error: ???struct irq_desc??? has no member named ???node???
> >
> 
> In the interest of doing some ifdef cleanup as well as fixing the 
> build problem, can I suggest that we remove the 'ifdef CONFIG_SMP' 
> from the irq_desc?

What's the (data and code) size effect on UP kernels?

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [tip:x86/apic] x86/apic: Limit irq affinity
  2009-11-10  4:40                             ` Ingo Molnar
@ 2009-11-10 16:31                               ` Dimitri Sivanich
  2009-11-10 17:19                                 ` Ingo Molnar
  0 siblings, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-11-10 16:31 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: mingo, hpa, linux-kernel, yinghai, suresh.b.siddha, tglx

On Tue, Nov 10, 2009 at 05:40:25AM +0100, Ingo Molnar wrote:
> 
> * Dimitri Sivanich <sivanich@sgi.com> wrote:
> 
> > On Sun, Nov 08, 2009 at 03:53:55PM +0100, Ingo Molnar wrote:
> > > 
> > > * tip-bot for Dimitri Sivanich <sivanich@sgi.com> wrote:
> > > 
> > > > Commit-ID:  683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > > Gitweb:     http://git.kernel.org/tip/683c91f85d7a3e1092d7fa3ec5687af8cd379f02
> > > > Author:     Dimitri Sivanich <sivanich@sgi.com>
> > > > AuthorDate: Tue, 3 Nov 2009 12:40:37 -0600
> > > > Committer:  Ingo Molnar <mingo@elte.hu>
> > > > CommitDate: Sun, 8 Nov 2009 13:30:40 +0100
> > > > 
> > > > x86/apic: Limit irq affinity
> > > > 
> > > > This patch allows for hard numa restrictions to irq affinity on
> > > > x86 systems.
> > > 
> > > -tip testing found a build failure:
> > > 
> > > arch/x86/kernel/apic/io_apic.c:1438: error: ???struct irq_desc??? has no member named ???node???
> > > arch/x86/kernel/apic/io_apic.c:3286: error: ???struct irq_desc??? has no member named ???node???
> > >
> > 
> > In the interest of doing some ifdef cleanup as well as fixing the 
> > build problem, can I suggest that we remove the 'ifdef CONFIG_SMP' 
> > from the irq_desc?
> 
> What's the (data and code) size effect on UP kernels?
> 

While I'm not fully certain what is best to report, here is output from
both 'size' and 'readelf'.

For the x86_32 case (the default configuration with SMP_CONFIG off):

Without the patch to remove '#ifdef CONFIG_SMP' from irq_desc:
$ size vmlinux
   text    data     bss     dec     hex filename
6853617  674868 1333604 8862089  873989 vmlinux

With the patch to remove '#ifdef CONFIG_SMP' from irq_desc:
$ size vmlinux
size vmlinux
   text    data     bss     dec     hex filename
6853621  674996 1333604 8862221  873a0d vmlinux

So it looks like we have 4 bytes more text and 128 bytes more data.

Looking at elf data, no MemSiz difference is apparent.

Without the patch:
$ readelf -l vmlinux
..
..
Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x001000 0xc1000000 0x01000000 0x651000 0x651000 R E 0x1000
  LOAD           0x652000 0xc1651000 0x01651000 0xdea1e 0x225000 RWE 0x1000
  NOTE           0x43bd4c 0xc143ad4c 0x0143ad4c 0x00024 0x00024     0x4

With the patch:
Program Headers:
  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align
  LOAD           0x001000 0xc1000000 0x01000000 0x651000 0x651000 R E 0x1000
  LOAD           0x652000 0xc1651000 0x01651000 0xdea1e 0x225000 RWE 0x1000
  NOTE           0x43bd44 0xc143ad44 0x0143ad44 0x00024 0x00024     0x4





For the x86_64 case:
Without patch on x86_64:
$ size vmlinux
   text    data     bss     dec     hex filename
6879227  731284  883216 8493727  819a9f vmlinux

With patch on x86_64:
$ size vmlinux
   text    data     bss     dec     hex filename
6879242  731524  883216 8493982  819b9e vmlinux

Looks like 15 bytes text and 240 bytes data.


Looking at elf data, there's a 256 byte difference in MemSiz data
(here I'm showing the output diff without/with the patch):

Program Headers:
  Type           Offset             VirtAddr           PhysAddr
                 FileSiz            MemSiz              Flags  Align
  LOAD           0x0000000000200000 0xffffffff81000000 0x0000000001000000
                 0x0000000000807000 0x0000000000807000  R E    200000
  LOAD           0x0000000000c00000 0xffffffff81a00000 0x0000000001a00000
-                0x0000000000088ff8 0x0000000000088ff8  RWE    200000
+                0x00000000000890f8 0x00000000000890f8  RWE    200000

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [tip:x86/apic] x86/apic: Limit irq affinity
  2009-11-10 16:31                               ` Dimitri Sivanich
@ 2009-11-10 17:19                                 ` Ingo Molnar
  2009-11-10 18:57                                   ` [PATCH] Remove SMP ifdef from irq_desc Dimitri Sivanich
  0 siblings, 1 reply; 22+ messages in thread
From: Ingo Molnar @ 2009-11-10 17:19 UTC (permalink / raw)
  To: Dimitri Sivanich; +Cc: mingo, hpa, linux-kernel, yinghai, suresh.b.siddha, tglx


* Dimitri Sivanich <sivanich@sgi.com> wrote:

> So it looks like we have 4 bytes more text and 128 bytes more data.

> Looks like 15 bytes text and 240 bytes data.

fair enough - and it removes a good bit of #ifdef complexity. Mind 
sending it standalone, tested, properly changelogged and with the above 
data included?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] Remove SMP ifdef from irq_desc
  2009-11-10 17:19                                 ` Ingo Molnar
@ 2009-11-10 18:57                                   ` Dimitri Sivanich
  2009-11-16 10:49                                     ` Thomas Gleixner
  0 siblings, 1 reply; 22+ messages in thread
From: Dimitri Sivanich @ 2009-11-10 18:57 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: mingo, hpa, linux-kernel, yinghai, suresh.b.siddha, tglx

Remove the CONFIG_SMP ifdef from the irq_desc structure and consolidate
initializers for the smp/non-smp cases.

For the x86_32 case, this adds 4 bytes of text and 128 bytes of data.

For the x86_64 case, this adds 15 bytes of text and 240 bytes of data.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>

---

 include/linux/irq.h |   70 +++++++++++++++++++-----------------------------
 kernel/irq/chip.c   |    2 -
 kernel/irq/handle.c |    4 --
 3 files changed, 29 insertions(+), 47 deletions(-)

Index: linux/include/linux/irq.h
===================================================================
--- linux.orig/include/linux/irq.h	2009-11-10 11:54:52.000000000 -0600
+++ linux/include/linux/irq.h	2009-11-10 11:54:55.000000000 -0600
@@ -193,13 +193,11 @@ struct irq_desc {
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
 	spinlock_t		lock;
-#ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
 	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
 #endif
-#endif
 	atomic_t		threads_active;
 	wait_queue_head_t       wait_for_threads;
 #ifdef CONFIG_PROC_FS
@@ -423,6 +421,35 @@ extern int set_irq_msi(unsigned int irq,
 
 #endif /* !CONFIG_S390 */
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+	cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_clear(desc->pending_mask);
+#endif
+}
+
+/**
+ * init_copy_desc_masks - copy cpumasks for irq_desc
+ * @old_desc:	pointer to old irq_desc struct
+ * @new_desc:	pointer to new irq_desc struct
+ *
+ * Insures affinity and pending_masks are copied to new irq_desc.
+ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
+ * irq_desc struct so the copy is redundant.
+ */
+static inline void init_copy_desc_masks(struct irq_desc *old_desc,
+					struct irq_desc *new_desc)
+{
+#ifdef CONFIG_CPUMASK_OFFSTACK
+	cpumask_copy(new_desc->affinity, old_desc->affinity);
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
+#endif
+#endif
+}
+
 #ifdef CONFIG_SMP
 /**
  * alloc_desc_masks - allocate cpumasks for irq_desc
@@ -455,36 +482,6 @@ static inline bool alloc_desc_masks(stru
 	return true;
 }
 
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-	cpumask_setall(desc->affinity);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_clear(desc->pending_mask);
-#endif
-}
-
-/**
- * init_copy_desc_masks - copy cpumasks for irq_desc
- * @old_desc:	pointer to old irq_desc struct
- * @new_desc:	pointer to new irq_desc struct
- *
- * Insures affinity and pending_masks are copied to new irq_desc.
- * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
- * irq_desc struct so the copy is redundant.
- */
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	cpumask_copy(new_desc->affinity, old_desc->affinity);
-
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
-#endif
-#endif
-}
-
 static inline void free_desc_masks(struct irq_desc *old_desc,
 				   struct irq_desc *new_desc)
 {
@@ -503,15 +500,6 @@ static inline bool alloc_desc_masks(stru
 	return true;
 }
 
-static inline void init_desc_masks(struct irq_desc *desc)
-{
-}
-
-static inline void init_copy_desc_masks(struct irq_desc *old_desc,
-					struct irq_desc *new_desc)
-{
-}
-
 static inline void free_desc_masks(struct irq_desc *old_desc,
 				   struct irq_desc *new_desc)
 {
Index: linux/kernel/irq/handle.c
===================================================================
--- linux.orig/kernel/irq/handle.c	2009-11-10 11:54:52.000000000 -0600
+++ linux/kernel/irq/handle.c	2009-11-10 11:54:55.000000000 -0600
@@ -110,9 +110,7 @@ static void init_one_irq_desc(int irq, s
 
 	spin_lock_init(&desc->lock);
 	desc->irq = irq;
-#ifdef CONFIG_SMP
 	desc->node = node;
-#endif
 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
 	init_kstat_irqs(desc, node, nr_cpu_ids);
 	if (!desc->kstat_irqs) {
@@ -173,9 +171,7 @@ int __init early_irq_init(void)
 
 	for (i = 0; i < legacy_count; i++) {
 		desc[i].irq = i;
-#ifdef CONFIG_SMP
 		desc[i].node = node;
-#endif
 		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 		alloc_desc_masks(&desc[i], node, true);
Index: linux/kernel/irq/chip.c
===================================================================
--- linux.orig/kernel/irq/chip.c	2009-11-10 11:54:52.000000000 -0600
+++ linux/kernel/irq/chip.c	2009-11-10 11:54:55.000000000 -0600
@@ -45,12 +45,10 @@ void dynamic_irq_init(unsigned int irq)
 	desc->action = NULL;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
-#ifdef CONFIG_SMP
 	cpumask_setall(desc->affinity);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_clear(desc->pending_mask);
 #endif
-#endif
 	spin_unlock_irqrestore(&desc->lock, flags);
 }
 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Remove SMP ifdef from irq_desc
  2009-11-10 18:57                                   ` [PATCH] Remove SMP ifdef from irq_desc Dimitri Sivanich
@ 2009-11-16 10:49                                     ` Thomas Gleixner
  0 siblings, 0 replies; 22+ messages in thread
From: Thomas Gleixner @ 2009-11-16 10:49 UTC (permalink / raw)
  To: Dimitri Sivanich
  Cc: Ingo Molnar, mingo, hpa, linux-kernel, yinghai, suresh.b.siddha

On Tue, 10 Nov 2009, Dimitri Sivanich wrote:

> Remove the CONFIG_SMP ifdef from the irq_desc structure and consolidate
> initializers for the smp/non-smp cases.
> 
> For the x86_32 case, this adds 4 bytes of text and 128 bytes of data.
> For the x86_64 case, this adds 15 bytes of text and 240 bytes of data.

Hmm, on ARM and PowerPC the increase of data is 4K and I expect it's
minimum the same on other architectures. Not sure if we should worry
about it, but it's definitely a significant number.

Thanks,

	tglx




^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2009-11-16 10:50 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-09-30 16:02 [PATCH] x86: SGU UV Fix irq affinity for hub based interrupts Dimitri Sivanich
2009-09-30 16:10 ` Robin Holt
2009-10-12 19:34 ` Ingo Molnar
2009-10-13 20:32   ` [PATCH] x86: Move SGI UV functionality out of generic IO-APIC code Dimitri Sivanich
2009-10-14  8:18     ` [tip:x86/apic] x86, apic: " tip-bot for Dimitri Sivanich
     [not found]   ` <20091012193704.GA8708@sgi.com>
     [not found]     ` <20091014071014.GK784@elte.hu>
     [not found]       ` <20091014120225.GA9674@sgi.com>
     [not found]         ` <20091014122653.GA15048@elte.hu>
2009-10-15  1:13           ` [PATCH v2] x86/apic: limit irq affinity Dimitri Sivanich
2009-10-15  5:30             ` Yinghai Lu
2009-10-15 13:50               ` Dimitri Sivanich
2009-10-20 12:56                 ` Dimitri Sivanich
2009-10-20 13:38                   ` [PATCH v3] " Dimitri Sivanich
2009-10-20 18:58                     ` Yinghai Lu
2009-10-21  1:06                       ` Dimitri Sivanich
2009-10-21  1:12                     ` [PATCH v4] " Dimitri Sivanich
2009-11-08 13:07                       ` [tip:x86/apic] x86/apic: Limit " tip-bot for Dimitri Sivanich
2009-11-08 14:53                         ` Ingo Molnar
2009-11-09 16:02                           ` Dimitri Sivanich
2009-11-10  4:40                             ` Ingo Molnar
2009-11-10 16:31                               ` Dimitri Sivanich
2009-11-10 17:19                                 ` Ingo Molnar
2009-11-10 18:57                                   ` [PATCH] Remove SMP ifdef from irq_desc Dimitri Sivanich
2009-11-16 10:49                                     ` Thomas Gleixner
2009-10-14  8:17 ` [tip:x86/apic] x86: SGI UV: Fix irq affinity for hub based interrupts tip-bot for Dimitri Sivanich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).