[Linux-ia64] [PATCH] dynamic IRQ allocation

public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed

* [Linux-ia64] [PATCH] dynamic IRQ allocation
@ 2002-07-30  2:36 KOCHI, Takayoshi
  2002-07-30  5:01 ` Grant Grundler
                   ` (17 more replies)
  0 siblings, 18 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-07-30  2:36 UTC (permalink / raw)
  To: linux-ia64

Hi,

We'd like to change some of iosapic.c and IRQ (interrupt vector)
allocation behavior.

Now iosapic.c allocates an ia64 interrupt vector for each
possible _PRT entry.  So iosapic allocates vectors to
unpopulated PCI slots.
This patch fixes the behavior and only allocates vectors
for existing pci_dev only.

Now the ia64_alloc_irq() routine is somewhat(?) broken and
panics if there are more than 183 distinct entries for
interrupts.  This is easily broken with huge configuration
servers.

For example, if each PCI slot's 4 interrupt pins are
connected to differet IOSAPIC input pins and there are
more than 46 slots in a system (without PCI segment),
interrupt vectors are exhausted.  But for most cases,
a PCI card uses only INTA.  So allocating vectors for
all of _PRT entries is wasteful.

# This breaks the ACPI PCI hotplug patch I sent last week.
# I'll make an update patch for it if this is accepted.

But if such a huge server is fully populated with cards,
this patch doesn't help.  For such a case, we have to support
a PCI IRQ (interrupt vector) sharing with interrupts from
different IO SAPICs.

As usual, interrupt sharing often imply performance
degradation and such a configuration should be avoided.

So I think this patch is a reasonable solution for most
cases.

--- david-020722/arch/ia64/kernel/iosapic.c	Tue Jul 23 16:01:24 2002
+++ david-020722-pcihp/arch/ia64/kernel/iosapic.c	Mon Jul 29 18:51:10 2002
@@ -25,6 +25,7 @@
  * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
  * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
  * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping error
+ * 02/07/29	T. Kochi	Allocate interrupt vectors dynamically
  */
 /*
  * Here is what the interrupt logic between a PCI device and the CPU looks like:
@@ -99,7 +100,7 @@
 	unsigned int 	base_irq;	/* first irq assigned to this IOSAPIC */
 	unsigned short 	max_pin;	/* max input pin supported in this IOSAPIC */
 	unsigned char	pcat_compat;	/* 8259 compatibility flag */
-} iosapic_lists[256] __initdata;
+} iosapic_lists[256] __devinitdata;
 
 static int num_iosapic = 0;
 
@@ -107,7 +108,7 @@
 /*
  * Find an IOSAPIC associated with an IRQ
  */
-static inline int __init
+static inline int __devinit
 find_iosapic (unsigned int irq)
 {
 	int i;
@@ -135,21 +136,6 @@
 	return -1;
 }
 
-/*
- * Map PCI pin to the corresponding IA-64 interrupt vector.  If no such mapping exists,
- * return -1.
- */
-int
-pci_pin_to_vector (int bus, int slot, int pci_pin)
-{
-	struct pci_vector_struct *r;
-
-	for (r = pci_irq.route; r < pci_irq.route + pci_irq.num_routes; ++r)
-		if (r->bus = bus && (r->pci_id >> 16) = slot && r->pin = pci_pin)
-			return iosapic_irq_to_vector(r->irq);
-	return -1;
-}
-
 static void
 set_rte (unsigned int vector, unsigned long dest)
 {
@@ -568,7 +554,76 @@
 	set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
 }
 
-void __init
+/*
+ * Map PCI pin to the corresponding IA-64 global interrupt vector.
+ * If no such mapping exists, return -1.
+ */
+static int
+pci_pin_to_globalvector (int bus, int slot, int pci_pin)
+{
+	struct pci_vector_struct *r;
+
+	for (r = pci_irq.route; r < pci_irq.route + pci_irq.num_routes; ++r)
+		if (r->bus = bus && (r->pci_id >> 16) = slot && r->pin = pci_pin)
+			return r->irq;
+	return -1;
+}
+
+/*
+ * Map PCI pin to the corresponding IA-64 interrupt vector.  If no such mapping exists,
+ * try to allocate a new vector.  If it fails, return -1.
+ */
+static int
+pci_pin_to_vector (int bus, int slot, int pci_pin)
+{
+	int index, vector, pin, gv;
+	int base_irq, max_pin, pcat_compat;
+	char *addr;
+
+	gv = pci_pin_to_globalvector (bus, slot, pci_pin);
+
+	if (gv < 0) {
+		printk("PCI: no interrupt route for %02x:%02x pin %c\n", bus, slot, 'A' + pci_pin);
+		return -1;
+	}
+
+	vector = iosapic_irq_to_vector(gv);
+
+	if (vector < 0) {
+		/* we should allocate a vector for this interrupt line */
+
+		index = find_iosapic(gv);
+
+		if (index < 0) {
+			printk("PCI: IRQ %d has no IOSAPIC mapping\n", gv);
+			return -1;
+		}
+
+		addr = iosapic_lists[index].addr;
+		base_irq = iosapic_lists[index].base_irq;
+		max_pin = iosapic_lists[index].max_pin;
+		pcat_compat = iosapic_lists[index].pcat_compat;
+		pin = gv - base_irq;
+
+		if (pcat_compat && (gv < 16))
+			vector = isa_irq_to_vector(gv);
+		else {
+			/* new iosapic irq: allocate a vector for it */
+			vector = ia64_alloc_irq();
+		}
+
+		register_irq(gv, vector, pin, IOSAPIC_LOWEST_PRIORITY, 0, 0, base_irq, addr);
+
+#ifdef DEBUG_IRQ_ROUTING
+		printk("PCI: (B%d,I%d,P%d) -> IOSAPIC irq 0x%02x -> vector 0x%02x\n",
+		       bus, slot, pci_pin, gv, vector);
+#endif
+	}
+
+	return vector;
+}
+
+void __devinit
 iosapic_init (unsigned long phys_addr, unsigned int base_irq, int pcat_compat)
 {
 	int irq, max_pin, vector, pin;
@@ -632,71 +687,106 @@
 	}
 }
 
-void __init
-iosapic_init_pci_irq (void)
-{
-	int i, index, vector, pin;
-	int base_irq, max_pin, pcat_compat;
-	unsigned int irq;
-	char *addr;
-
-	if (0 != acpi_get_prt(&pci_irq.route, &pci_irq.num_routes))
-		return;
-
-	for (i = 0; i < pci_irq.num_routes; i++) {
-
-		irq = pci_irq.route[i].irq;
-
-		index = find_iosapic(irq);
-		if (index < 0) {
-			printk("PCI: IRQ %u has no IOSAPIC mapping\n", irq);
-			continue;
-		}
-
-		addr = iosapic_lists[index].addr;
-		base_irq = iosapic_lists[index].base_irq;
-		max_pin = iosapic_lists[index].max_pin;
-		pcat_compat = iosapic_lists[index].pcat_compat;
-		pin = irq - base_irq;
 
-		if ((unsigned) pin > max_pin)
-			/* the interrupt route is for another controller... */
-			continue;
+/*
+ * Set dev->irq and program iosapic to deliver interrupts
+ */
+void
+iosapic_alloc_irq (struct pci_dev *dev)
+{
+	unsigned char pin;
+	int vector;
+	struct hw_interrupt_type *irq_type;
+	irq_desc_t *idesc;
 
-		if (pcat_compat && (irq < 16))
-			vector = isa_irq_to_vector(irq);
-		else {
-			vector = iosapic_irq_to_vector(irq);
-			if (vector < 0)
-				/* new iosapic irq: allocate a vector for it */
-				vector = ia64_alloc_irq();
+	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+	if (pin) {
+		pin--;          /* interrupt pins are numbered starting from 1 */
+		vector = pci_pin_to_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+		if (vector < 0 && dev->bus->parent) {
+			/* go back to the bridge */
+			struct pci_dev *bridge = dev->bus->self;
+
+			if (bridge) {
+				/* allow for multiple bridges on an adapter */
+				do {
+					/* do the bridge swizzle... */
+					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+					vector = pci_pin_to_vector(bridge->bus->number,
+								   PCI_SLOT(bridge->devfn),
+								   pin);
+				} while (vector < 0 && (bridge = bridge->bus->self));
+			}
+			if (vector >= 0)
+				printk(KERN_WARNING
+				       "PCI: using PPB(B%d,I%d,P%d) to get vector %02x\n",
+				       dev->bus->number, PCI_SLOT(dev->devfn),
+				       pin, vector);
+			else
+				printk(KERN_WARNING
+				       "PCI: Couldn't map irq for (B%d,I%d,P%d)\n",
+				       dev->bus->number, PCI_SLOT(dev->devfn), pin);
 		}
-
-		register_irq(irq, vector, pin, IOSAPIC_LOWEST_PRIORITY, 0, 0, base_irq, addr);
-
-#ifdef DEBUG_IRQ_ROUTING
-		printk("PCI: (B%d,I%d,P%d) -> IOSAPIC irq 0x%02x -> vector 0x%02x\n",
-		       pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin,
-		       iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector);
+		if (vector >= 0) {
+			printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> 0x%02x\n",
+			       dev->bus->number, PCI_SLOT(dev->devfn), pin, vector);
+			dev->irq = vector;
+
+			irq_type = &irq_type_iosapic_level;
+			idesc = irq_desc(vector);
+			if (idesc->handler != irq_type) {
+				if (idesc->handler != &no_irq_type)
+					printk("iosapic_pci_fixup: changing vector 0x%02x "
+					       "from %s to %s\n", vector,
+					       idesc->handler->typename,
+					       irq_type->typename);
+				idesc->handler = irq_type;
+			}
+#ifdef CONFIG_SMP
+			/*
+			 * For platforms that do not support interrupt redirect
+			 * via the XTP interface, we can round-robin the PCI
+			 * device interrupts to the processors
+			 */
+			if (!(smp_int_redirect & SMP_IRQ_REDIRECTION)) {
+				static int cpu_index = 0;
+
+				set_rte(vector, cpu_physical_id(cpu_index) & 0xffff);
+
+				cpu_index++;
+				if (cpu_index >= smp_num_cpus)
+					cpu_index = 0;
+			} else {
+				/*
+				 * Direct the interrupt vector to the current cpu,
+				 * platform redirection will distribute them.
+				 */
+				set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
+			}
+#else
+			/* direct the interrupt vector to the running cpu id */
+			set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
 #endif
-		/*
-		 * NOTE: The IOSAPIC RTE will be programmed in iosapic_pci_fixup().  It
-		 * needs to be done there to ensure PCI hotplug works right.
-		 */
+		}
 	}
+	/*
+	 * Nothing to fixup
+	 * Fix out-of-range IRQ numbers
+	 */
+	if (dev->irq >= IA64_NUM_VECTORS)
+		dev->irq = 15;	/* Spurious interrupts */
 }
 
+
 void
 iosapic_pci_fixup (int phase)
 {
 	struct	pci_dev	*dev;
-	unsigned char pin;
-	int vector;
-	struct hw_interrupt_type *irq_type;
-	irq_desc_t *idesc;
 
 	if (phase = 0) {
-		iosapic_init_pci_irq();
+		if (0 != acpi_get_prt(&pci_irq.route, &pci_irq.num_routes)) {
+			printk("%s: acpi_get_prt failed\n", __FILE__);
+		}
 		return;
 	}
 
@@ -704,81 +794,6 @@
 		return;
 
 	pci_for_each_dev(dev) {
-		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-		if (pin) {
-			pin--;          /* interrupt pins are numbered starting from 1 */
-			vector = pci_pin_to_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
-			if (vector < 0 && dev->bus->parent) {
-				/* go back to the bridge */
-				struct pci_dev *bridge = dev->bus->self;
-
-				if (bridge) {
-					/* allow for multiple bridges on an adapter */
-					do {
-						/* do the bridge swizzle... */
-						pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-						vector = pci_pin_to_vector(bridge->bus->number,
-									   PCI_SLOT(bridge->devfn),
-									   pin);
-					} while (vector < 0 && (bridge = bridge->bus->self));
-				}
-				if (vector >= 0)
-					printk(KERN_WARNING
-					       "PCI: using PPB(B%d,I%d,P%d) to get vector %02x\n",
-					       dev->bus->number, PCI_SLOT(dev->devfn),
-					       pin, vector);
-				else
-					printk(KERN_WARNING
-					       "PCI: Couldn't map irq for (B%d,I%d,P%d)\n",
-					       dev->bus->number, PCI_SLOT(dev->devfn), pin);
-			}
-			if (vector >= 0) {
-				printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> 0x%02x\n",
-				       dev->bus->number, PCI_SLOT(dev->devfn), pin, vector);
-				dev->irq = vector;
-
-				irq_type = &irq_type_iosapic_level;
-				idesc = irq_desc(vector);
-				if (idesc->handler != irq_type) {
-					if (idesc->handler != &no_irq_type)
-						printk("iosapic_pci_fixup: changing vector 0x%02x "
-						       "from %s to %s\n", vector,
-						       idesc->handler->typename,
-						       irq_type->typename);
-					idesc->handler = irq_type;
-				}
-#ifdef CONFIG_SMP
-				/*
-				 * For platforms that do not support interrupt redirect
-				 * via the XTP interface, we can round-robin the PCI
-				 * device interrupts to the processors
-				 */
-				if (!(smp_int_redirect & SMP_IRQ_REDIRECTION)) {
-					static int cpu_index = 0;
-
-					set_rte(vector, cpu_physical_id(cpu_index) & 0xffff);
-
-					cpu_index++;
-					if (cpu_index >= smp_num_cpus)
-						cpu_index = 0;
-				} else {
-					/*
-					 * Direct the interrupt vector to the current cpu,
-					 * platform redirection will distribute them.
-					 */
-					set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
-				}
-#else
-				/* direct the interrupt vector to the running cpu id */
-				set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
-#endif
-			}
-		}
-		/*
-		 * Nothing to fixup
-		 * Fix out-of-range IRQ numbers
-		 */
-		if (dev->irq >= IA64_NUM_VECTORS)
-			dev->irq = 15;	/* Spurious interrupts */
+		iosapic_alloc_irq(dev);
 	}
 }
--- david-020722/include/asm-ia64/iosapic.h	Fri Nov  9 14:26:17 2001
+++ david-020722-pcihp/include/asm-ia64/iosapic.h	Mon Jul 29 18:03:59 2002
@@ -51,8 +51,9 @@
 
 #ifndef __ASSEMBLY__
 
-extern void __init iosapic_init (unsigned long address, unsigned int base_irq,
-                                 int pcat_compat);
+extern void __devinit iosapic_init (unsigned long address,
+				    unsigned int base_irq, int pcat_compat);
+extern void iosapic_alloc_irq (struct pci_dev *dev);
 extern int iosapic_register_irq (u32 global_vector, unsigned long polarity,
                                  unsigned long edge_triggered, u32 base_irq,
                                  char *iosapic_address);



Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
@ 2002-07-30  5:01 ` Grant Grundler
  2002-07-30 18:04 ` KOCHI, Takayoshi
                   ` (16 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Grant Grundler @ 2002-07-30  5:01 UTC (permalink / raw)
  To: linux-ia64

"KOCHI, Takayoshi" wrote:
> This patch fixes the behavior and only allocates vectors
> for existing pci_dev only.

Another approach is to defer allocating the vector until
request_irq() registers the interrupt handler. That way,
only devices that have drivers get interrupts.

> As usual, interrupt sharing often imply performance
> degradation and such a configuration should be avoided.

Another way to reduce/avoid sharing of vector table entries is to have
multiple Vector Tables. Either one for each CPU or each node of
a ccNUMA-like machine. I thought SGI's NUMA machines implement
this already but haven't checked.

grant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
  2002-07-30  5:01 ` Grant Grundler
@ 2002-07-30 18:04 ` KOCHI, Takayoshi
  2002-07-30 22:14 ` Grant Grundler
                   ` (15 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-07-30 18:04 UTC (permalink / raw)
  To: linux-ia64

Thanks for comments.

On Mon, 29 Jul 2002 22:01:28 -0700
Grant Grundler <grundler@cup.hp.com> wrote:

> "KOCHI, Takayoshi" wrote:
> > This patch fixes the behavior and only allocates vectors
> > for existing pci_dev only.
> 
> Another approach is to defer allocating the vector until
> request_irq() registers the interrupt handler. That way,
> only devices that have drivers get interrupts.

But how to distinguish PCI irqs from others?
It seems that all PCI drivers pass its pci_dev structure
as the 4th argument of request_irq, but others won't
(for example, serial.c).

I thought allocating a new vector in request_irq() is another
level of dynamic allocation.

Once I took another approach and wrote a version that
allocates a new vector when a driver calls pcibios_enable,
but it turned out to be problematic because not all PCI
device drivers call pci_enable() at their startup.

It seems that, for i386, irq lookup is done in pcibios_enable()
(to be exact, pcibios_enable_irq() and other functions in
 arch/i386/kernel/pci-irq.c).  So drivers that doesn't
call pci_enable() are just working luckily (due to proper
setting by BIOS etc.)

> > As usual, interrupt sharing often imply performance
> > degradation and such a configuration should be avoided.
> 
> Another way to reduce/avoid sharing of vector table entries is to have
> multiple Vector Tables. Either one for each CPU or each node of
> a ccNUMA-like machine. I thought SGI's NUMA machines implement
> this already but haven't checked.

Yes, once Alan Mayer at sgi did the work.
But ccNUMA platforms can have various connection topology
and simply dividing vector table into the number of nodes
may not the best choice to do.

I think iosapic.c should be as generic as possible enough to
cover all IO SAPIC-based platforms and tuning for specific
ccNUMA platforms should be considered separately.

Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
  2002-07-30  5:01 ` Grant Grundler
  2002-07-30 18:04 ` KOCHI, Takayoshi
@ 2002-07-30 22:14 ` Grant Grundler
  2002-07-30 23:49 ` KOCHI, Takayoshi
                   ` (14 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Grant Grundler @ 2002-07-30 22:14 UTC (permalink / raw)
  To: linux-ia64

"KOCHI, Takayoshi" wrote:
> Thanks for comments.

welcome...one of my favorite topics...

> But how to distinguish PCI irqs from others?

IRQ number. I assume you mean PCI vs other IO subsystems.
The pcibios support has to uniquely identify each PCI IRQ Line
with a different IRQ number. And that number space has to
be shared with all interrupt sources.

request_irq() can (does?) branch to iosapic support based on IRQ number.

> It seems that all PCI drivers pass its pci_dev structure
> as the 4th argument of request_irq, but others won't
> (for example, serial.c).

The 4th arg is a "void *". It can be anything the driver wants
to identify device instance or even NULL (see ob600 mouse driver).
Don't make any assumptions about what the 4th argument is.

> I thought allocating a new vector in request_irq() is another
> level of dynamic allocation.

It doesn't need to be. Just replaces the allocation
which happens per PCI IRQ Line or per PCI Device.

BTW, "in request_irq()" to me means in that code path, not
immediately in that function.

> Once I took another approach and wrote a version that
> allocates a new vector when a driver calls pcibios_enable,
> but it turned out to be problematic because not all PCI
> device drivers call pci_enable() at their startup.

That's a driver bug. Please submit patches to David or the driver
maintainer. cpqfc driver doesn't call pci_enable_device() either.
I found this out since parisc-linux port will crash on A500 if
a driver attempts to access a PCI device that isn't enabled.
With PCI Hotplug of PCI cards, this will become more critical.

> So drivers that doesn't
> call pci_enable() are just working luckily (due to proper
> setting by BIOS etc.)

Yup. x86 is the least strict in terms of following programming interfaces.
But lots of issues (eg Posted PCI writes, DMA Mapping, long vs int)
make drivers non-portable to other arches. Alan Cox (LinuxTag 2002)
and Arjen van de Ven (OLS2002) both gave excellent talks on
driver portability. I also gave a talk on HP ZX1 at OLS 2002.
I highly reccomend David and Stephane's "IA64 Linux" book
to those seeking detailed 2.4 driver interface descriptions.

> Yes, once Alan Mayer at sgi did the work.
> But ccNUMA platforms can have various connection topology
> and simply dividing vector table into the number of nodes
> may not the best choice to do.

Right. That's why I suggested per CPU (vs per Node) as an alternative.
Different platforms can do it differently. I'd be interested in
implementing per CPU vector tables but HP isn't interested in funding it.

> I think iosapic.c should be as generic as possible enough to
> cover all IO SAPIC-based platforms and tuning for specific
> ccNUMA platforms should be considered separately.

Agreed.

cheers,
grant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (2 preceding siblings ...)
  2002-07-30 22:14 ` Grant Grundler
@ 2002-07-30 23:49 ` KOCHI, Takayoshi
  2002-08-01  1:03 ` Grant Grundler
                   ` (13 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-07-30 23:49 UTC (permalink / raw)
  To: linux-ia64

On Tue, 30 Jul 2002 15:14:03 -0700
Grant Grundler <grundler@cup.hp.com> wrote:

> > But how to distinguish PCI irqs from others?
> 
> IRQ number. I assume you mean PCI vs other IO subsystems.
> The pcibios support has to uniquely identify each PCI IRQ Line
> with a different IRQ number. And that number space has to
> be shared with all interrupt sources.
> 
> request_irq() can (does?) branch to iosapic support based on IRQ number.

But PCI device driver will call request_irq() with dev->irq as
IRQ number.  This number is usually set by PCI device scan
routine in drivers/pci/pci.c (2.4.x) and is derived from
the device's configuration space.  The number BIOS sets in
that configuratoin space field is somewhat bogus in many
Itanium platforms.  So we have to embed into dev->irq
some magic number, which is not used elsewhere, for each
pci_dev in pci_fixup stage.

It makes sense because
 1) we can allocate interrupt vectors only for those who want them
 2) it has explicit free API (free_irq), while pcibios_enable_device
    doesn't have its counterpart.  This is good for PCI hotplug.

But many drivers assume dev->irq has some IRQ number associated with it
and does like " printk("IRQ %d\n", dev->irq); "
If dev->irq is the magic number, each driver will report its
IRQ as the same number.  This may confuse users.
(And drivers don't have any means to know what number request_irq() 
 allocated, either.)

/proc/interrupts and /proc/irq/ (smp_affinity stuff) may
involve confusion in matching irq number <-> device.

We'd like to make user suprise as least as possible, don't we?

> > It seems that all PCI drivers pass its pci_dev structure
> > as the 4th argument of request_irq, but others won't
> > (for example, serial.c).
> 
> The 4th arg is a "void *". It can be anything the driver wants
> to identify device instance or even NULL (see ob600 mouse driver).
> Don't make any assumptions about what the 4th argument is.

Exactly.

> > I thought allocating a new vector in request_irq() is another
> > level of dynamic allocation.
> 
> It doesn't need to be. Just replaces the allocation
> which happens per PCI IRQ Line or per PCI Device.
> 
> BTW, "in request_irq()" to me means in that code path, not
> immediately in that function.
> 
> > Once I took another approach and wrote a version that
> > allocates a new vector when a driver calls pcibios_enable,
> > but it turned out to be problematic because not all PCI
> > device drivers call pci_enable() at their startup.
> 
> That's a driver bug. Please submit patches to David or the driver
> maintainer. cpqfc driver doesn't call pci_enable_device() either.
> I found this out since parisc-linux port will crash on A500 if
> a driver attempts to access a PCI device that isn't enabled.
> With PCI Hotplug of PCI cards, this will become more critical.

Yes.  BTW for PCI hotplug, there's more serious problem.
If the device driver doesn't use 'struct pci_driver' and
'pci_register_driver()' API, removing the device may fail.

If there's one device in the system and the driver is modular,
you can remove the driver then remove the device.  But there
are two devices and you want to remove one of them, it can't
be helped without pci_register_driver() API.

> > So drivers that doesn't
> > call pci_enable() are just working luckily (due to proper
> > setting by BIOS etc.)
> 
> Yup. x86 is the least strict in terms of following programming interfaces.
> But lots of issues (eg Posted PCI writes, DMA Mapping, long vs int)
> make drivers non-portable to other arches. Alan Cox (LinuxTag 2002)
> and Arjen van de Ven (OLS2002) both gave excellent talks on
> driver portability. I also gave a talk on HP ZX1 at OLS 2002.

I attended the OLS2002, Arjan's and your talks.
Thank you...

> I highly reccomend David and Stephane's "IA64 Linux" book
> to those seeking detailed 2.4 driver interface descriptions.
> 
> > Yes, once Alan Mayer at sgi did the work.
> > But ccNUMA platforms can have various connection topology
> > and simply dividing vector table into the number of nodes
> > may not the best choice to do.
> 
> Right. That's why I suggested per CPU (vs per Node) as an alternative.
> Different platforms can do it differently. I'd be interested in
> implementing per CPU vector tables but HP isn't interested in funding it.

per-CPU vector table has lots to do for smp irq affinity stuff.
It may be a long-term solution, but not for short-term solution.

Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (3 preceding siblings ...)
  2002-07-30 23:49 ` KOCHI, Takayoshi
@ 2002-08-01  1:03 ` Grant Grundler
  2002-08-02  0:39 ` KOCHI, Takayoshi
                   ` (12 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Grant Grundler @ 2002-08-01  1:03 UTC (permalink / raw)
  To: linux-ia64

"KOCHI, Takayoshi" wrote:
> But PCI device driver will call request_irq() with dev->irq as
> IRQ number.  This number is usually set by PCI device scan
> routine in drivers/pci/pci.c (2.4.x) and is derived from
> the device's configuration space.

uhmm...emphasis on "derived from". pcibios can (and does
depending on platform) "fix up" the value that PCI Device scan
places in the pcidev.

> The number BIOS sets in
> that configuratoin space field is somewhat bogus in many
> Itanium platforms.

pcidev->irq != BIOS value or config space IRQ_LINE value.
pcidev->irq is just a "handle" for pcibios code to pass to
platform interrupt support. Both have to understand what
the handle means.

If you don't trust BIOS on your platform, it's ok if pcibios
support does the "magic" you describe below as long as the
platform interrupt support understands the result.

> So we have to embed into dev->irq
> some magic number, which is not used elsewhere, for each
> pci_dev in pci_fixup stage.

pcibios_fixup_bus() gets to mangle pcidev->irq values as needed.
This sounds right.

> It makes sense because
>  1) we can allocate interrupt vectors only for those who want them
>  2) it has explicit free API (free_irq), while pcibios_enable_device
>     doesn't have its counterpart.  This is good for PCI hotplug.

yes. I *think* (but don't know for sure) that's because more magic
might be needed to enable devices on some platforms than simply
flipping the MASTER enable bit in the PCI device command register
(config space). I suspect flipping MASTER enable bit off should
be enough.

> But many drivers assume dev->irq has some IRQ number associated with it
> and does like " printk("IRQ %d\n", dev->irq); "
> If dev->irq is the magic number, each driver will report its
> IRQ as the same number.  This may confuse users.

Use different magic numbers for each IRQ?
They can be any *int* value. You can even use them to index into
an array or structures. The trick is to fully hide the IRQ<->pcidev
relationship in the platform specific support.

> (And drivers don't have any means to know what number request_irq() 
>  allocated, either.)

Two comments on this one:
o drivers don't know anyway. pcidev->irq is just a "handle".

o request_irq() doesn't allocate pcidev->irq numbers.
  That's too late in the initialization process.

The pcidev->irq values have to be setup about the time the PCI bus
is "walked" and before the driver probe routine is called.
The IRQ doesn't have to be enabled until request_irq() is called.
"Enable" could mean allocate CPU vector, program iosapic RTE, etc.

Since I haven't worked on PCI hotplug, pcibios interface might be
deficient in how/where one can fixup pcidev->irq info.

> /proc/interrupts and /proc/irq/ (smp_affinity stuff) may
> involve confusion in matching irq number <-> device.
> We'd like to make user suprise as least as possible, don't we?

right.

> Yes.  BTW for PCI hotplug, there's more serious problem.
> If the device driver doesn't use 'struct pci_driver' and
> 'pci_register_driver()' API, removing the device may fail.

I haven't played with PCI Hotplug (yet). My gut reaction is you
should submit patches for drivers *you* need to hot plug/remove.
Same story as for pci_enable_device().

> I attended the OLS2002, Arjan's and your talks.
> Thank you...

welcome...HP paid for a very fun trip. ;^)

> per-CPU vector table has lots to do for smp irq affinity stuff.
> It may be a long-term solution, but not for short-term solution.

yes - definitely long term. irq affinity needs to track current
CPU and which vector it's using. I don't know how much work is
needed to fix/change that.

Clearly, having multiple vector tables will avoid sharing vectors on
larger systems (> 50 PCI slots).

How much pressure is on the vector table will also depend on how much
MSI or MSI-X (Message Signaled Interrupts) is used by the next round
of IO technology (infiniband, 10GbEther, etc).

thanks,
grant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (4 preceding siblings ...)
  2002-08-01  1:03 ` Grant Grundler
@ 2002-08-02  0:39 ` KOCHI, Takayoshi
  2002-08-02  6:04 ` David Mosberger
                   ` (11 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-08-02  0:39 UTC (permalink / raw)
  To: linux-ia64

On Wed, 31 Jul 2002 18:03:10 -0700
Grant Grundler <grundler@cup.hp.com> wrote:

> "KOCHI, Takayoshi" wrote:
> > But PCI device driver will call request_irq() with dev->irq as
> > IRQ number.  This number is usually set by PCI device scan
> > routine in drivers/pci/pci.c (2.4.x) and is derived from
> > the device's configuration space.
> 
> uhmm...emphasis on "derived from". pcibios can (and does
> depending on platform) "fix up" the value that PCI Device scan
> places in the pcidev.
> 
> > The number BIOS sets in
> > that configuratoin space field is somewhat bogus in many
> > Itanium platforms.
> 
> pcidev->irq != BIOS value or config space IRQ_LINE value.
> pcidev->irq is just a "handle" for pcibios code to pass to
> platform interrupt support. Both have to understand what
> the handle means.
> 
> If you don't trust BIOS on your platform, it's ok if pcibios
> support does the "magic" you describe below as long as the
> platform interrupt support understands the result.

What I described is the current linux behavior.

PCI core subsystem initializes pcidev->irq as IRQ_LINE value
which is usually set by BIOS.  I do agree that the number doesn't
have to be real interrupt vector.

But how can you trust Interrupt Line value set by BIOS?
It is definitely not an interrupt vector number, as
interrupt vector number is what OS allocates and ties into
a device.

Then is it a global interrupt vector?
The config space Interrupt Line value is only 8bit while
ACPI 2.0 can describe 32bit global interrupt vector.
NEC's platform actually use value of 256 and above
for global interrupt vector, therefore Interrupt Line
value of configuration space will be inevitably bogus.


> > So we have to embed into dev->irq
> > some magic number, which is not used elsewhere, for each
> > pci_dev in pci_fixup stage.
> 
> pcibios_fixup_bus() gets to mangle pcidev->irq values as needed.
> This sounds right.
> 
> > It makes sense because
> >  1) we can allocate interrupt vectors only for those who want them
> >  2) it has explicit free API (free_irq), while pcibios_enable_device
> >     doesn't have its counterpart.  This is good for PCI hotplug.
> 
> yes. I *think* (but don't know for sure) that's because more magic
> might be needed to enable devices on some platforms than simply
> flipping the MASTER enable bit in the PCI device command register
> (config space). I suspect flipping MASTER enable bit off should
> be enough.

Okay, then pci_set_master and pci_disable_device are a pair of APIs
and pci_enable_device/pci_disable_device are not symmetric... sigh.

It is ok for PCI hotplug that we don't have an architecture-dependent
pci_disable_device hook because there are other hooks when
a device driver releases control of a device.

> > But many drivers assume dev->irq has some IRQ number associated with it
> > and does like " printk("IRQ %d\n", dev->irq); "
> > If dev->irq is the magic number, each driver will report its
> > IRQ as the same number.  This may confuse users.
> 
> Use different magic numbers for each IRQ?
> They can be any *int* value. You can even use them to index into
> an array or structures. The trick is to fully hide the IRQ<->pcidev
> relationship in the platform specific support.

Yes, but I think it will complicate things more than necessary.

> > (And drivers don't have any means to know what number request_irq() 
> >  allocated, either.)
> 
> Two comments on this one:
> o drivers don't know anyway. pcidev->irq is just a "handle".
> 
> o request_irq() doesn't allocate pcidev->irq numbers.
>   That's too late in the initialization process.
> 
> The pcidev->irq values have to be setup about the time the PCI bus
> is "walked" and before the driver probe routine is called.
> The IRQ doesn't have to be enabled until request_irq() is called.
> "Enable" could mean allocate CPU vector, program iosapic RTE, etc.

right.

> Since I haven't worked on PCI hotplug, pcibios interface might be
> deficient in how/where one can fixup pcidev->irq info.

Now I understand that

 1) pci_dev->irq should be fixed-up at pci_fixup stage
    in the kernel
 2) pci_dev->irq is ia64 interrupt vector only
    because we choose to do so and can be implemented
    another way
 3) ia64 interrupt vector can be allocated when enabled
    but we allocate ahead of enabling

It is an implementation choice developers took long time ago
that sharing a vector space with all processors in a system
and one-to-one mapping between pci_dev->irq and interrupt vector.

iosapic.c has been written upon these assumptions.
My patch doesn't break them.

Implementing ia64 interrupt in other ways may be interesting
but it's a 2.5-series matter.  For 2.4, current vector
allocation scheme is broken at least on our platform with large
configuration.  What we'd like to do now is fix these cases for
stable series without breaking others.

> > Yes.  BTW for PCI hotplug, there's more serious problem.
> > If the device driver doesn't use 'struct pci_driver' and
> > 'pci_register_driver()' API, removing the device may fail.
> 
> I haven't played with PCI Hotplug (yet). My gut reaction is you
> should submit patches for drivers *you* need to hot plug/remove.
> Same story as for pci_enable_device().

Yes.

> > per-CPU vector table has lots to do for smp irq affinity stuff.
> > It may be a long-term solution, but not for short-term solution.
> 
> yes - definitely long term. irq affinity needs to track current
> CPU and which vector it's using. I don't know how much work is
> needed to fix/change that.
> 
> Clearly, having multiple vector tables will avoid sharing vectors on
> larger systems (> 50 PCI slots).
> 
> How much pressure is on the vector table will also depend on how much
> MSI or MSI-X (Message Signaled Interrupts) is used by the next round
> of IO technology (infiniband, 10GbEther, etc).
> 
> thanks,
> grant

Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (5 preceding siblings ...)
  2002-08-02  0:39 ` KOCHI, Takayoshi
@ 2002-08-02  6:04 ` David Mosberger
  2002-08-02 15:56 ` Bjorn Helgaas
                   ` (10 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: David Mosberger @ 2002-08-02  6:04 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Mon, 29 Jul 2002 19:36:12 -0700, "KOCHI, Takayoshi" <t-kouchi@mvf.biglobe.ne.jp> said:

  >> Now iosapic.c allocates an ia64 interrupt vector for each
  >> possible _PRT entry.  So iosapic allocates vectors to unpopulated
  >> PCI slots.  This patch fixes the behavior and only allocates
  >> vectors for existing pci_dev only.

Seems reasonable.

  >> Now the ia64_alloc_irq() routine is somewhat(?) broken and panics
  >> if there are more than 183 distinct entries for interrupts.  This
  >> is easily broken with huge configuration servers.

Well, ia64_alloc_irq() does what it was designed to do, so it can't
be broken. ;-)

But yes, on large machines, it's not what you want.

The patch basically looks fine to me, except for some naming issues.
We now have vectors, global vectors, global system interrupts (GSIs),
irq numbers, and what not.  This is confusing and hard to maintain.
We should settle on a consistent set of names (hopefully something
consistent with Linux, ACPI spec, and PCI spec).  I tried to do this a
while ago (see big comment at the beginning of iosapic.c), but the
picture described there is incomplete for large machines and doesn't
do a good job at integrating with ACPI lingo.  Anyone want to take a
stab?

	--david

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (6 preceding siblings ...)
  2002-08-02  6:04 ` David Mosberger
@ 2002-08-02 15:56 ` Bjorn Helgaas
  2002-08-02 16:32 ` David Mosberger
                   ` (9 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2002-08-02 15:56 UTC (permalink / raw)
  To: linux-ia64

> The patch basically looks fine to me, except for some naming issues.
> We now have vectors, global vectors, global system interrupts (GSIs),
> irq numbers, and what not.  This is confusing and hard to maintain.
> We should settle on a consistent set of names (hopefully something
> consistent with Linux, ACPI spec, and PCI spec).  I tried to do this a
> while ago (see big comment at the beginning of iosapic.c), but the
> picture described there is incomplete for large machines and doesn't
> do a good job at integrating with ACPI lingo.  Anyone want to take a
> stab?

This has been bugging me for a long time, too.  If nobody beats
me to it, I'll post a proposal next week.  How do you want to
handle the merging -- we now have a couple patches that I did
earlier, Takayoshi's patch, and the proposed naming patch that
all touch iosapic.c?  If you send me your current iosapic.c, I
can work from that.
-- 
Bjorn Helgaas - bjorn_helgaas at hp.com
Linux Systems Operation R&D
Hewlett-Packard Company


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (7 preceding siblings ...)
  2002-08-02 15:56 ` Bjorn Helgaas
@ 2002-08-02 16:32 ` David Mosberger
  2002-08-02 17:45 ` KOCHI, Takayoshi
                   ` (8 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: David Mosberger @ 2002-08-02 16:32 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Fri, 2 Aug 2002 09:56:33 -0600, Bjorn Helgaas <bjorn_helgaas@hp.com> said:

  >> The patch basically looks fine to me, except for some naming
  >> issues.  We now have vectors, global vectors, global system
  >> interrupts (GSIs), irq numbers, and what not.  This is confusing
  >> and hard to maintain.  We should settle on a consistent set of
  >> names (hopefully something consistent with Linux, ACPI spec, and
  >> PCI spec).  I tried to do this a while ago (see big comment at
  >> the beginning of iosapic.c), but the picture described there is
  >> incomplete for large machines and doesn't do a good job at
  >> integrating with ACPI lingo.  Anyone want to take a stab?

  Bjorn> This has been bugging me for a long time, too.  If nobody
  Bjorn> beats me to it, I'll post a proposal next week.  How do you
  Bjorn> want to handle the merging -- we now have a couple patches
  Bjorn> that I did earlier, Takayoshi's patch, and the proposed
  Bjorn> naming patch that all touch iosapic.c?  If you send me your
  Bjorn> current iosapic.c, I can work from that.

I haven't applied the patch yet (mainly because I'm focusing on 2.5 at
the moment).

Another heads up: we currently have an initialization ordering problem
between the ACPI and PCI subsystems.  My current thinking is that the
best way to fix this will be to sync up with the PCI irq work that has
been done in the ACPI subsystem.  Besides solving the ordering
problem, this should also get rid of some code duplication (e.g.,
there is currently code both in ACPI and iosapic.c to do the bridge
swizzle thingy).  I'll see about doing this in 2.5 and if the result
looks good, we can backport it to 2.4.

	--david

/*
 * I/O SAPIC support.
 *
 * Copyright (C) 1999 Intel Corp.
 * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
 * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
 * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co.
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 * Copyright (C) 1999 VA Linux Systems
 * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
 *
 * 00/04/19	D. Mosberger	Rewritten to mirror more closely the x86 I/O APIC code.
 *				In particular, we now have separate handlers for edge
 *				and level triggered interrupts.
 * 00/10/27	Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
 *				PCI to vector mapping, shared PCI interrupts.
 * 00/10/27	D. Mosberger	Document things a bit more to make them more understandable.
 *				Clean up much of the old IOSAPIC cruft.
 * 01/07/27	J.I. Lee	PCI irq routing, Platform/Legacy interrupts and fixes for
 *				ACPI S5(SoftOff) support.
 * 02/01/23	J.I. Lee	iosapic pgm fixes for PCI irq routing from _PRT
 * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt vectors in
 *                              iosapic_set_affinity(), initializations for
 *                              /proc/irq/#/smp_affinity
 * 02/04/02	P. Diefenbaugh	Cleaned up ACPI PCI IRQ routing.
 * 02/04/18	J.I. Lee	bug fix in iosapic_init_pci_irq
 * 02/04/30	J.I. Lee	bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping error
 */
/*
 * Here is what the interrupt logic between a PCI device and the CPU looks like:
 *
 * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD).  The
 *     device is uniquely identified by its bus--, and slot-number (the function
 *     number does not matter here because all functions share the same interrupt
 *     lines).
 *
 * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
 *     Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
 *     triggered and use the same polarity).  Each interrupt line has a unique IOSAPIC
 *     irq number which can be calculated as the sum of the controller's base irq number
 *     and the IOSAPIC pin number to which the line connects.
 *
 * (3) The IOSAPIC uses an internal table to map the IOSAPIC pin into the IA-64 interrupt
 *     vector.  This interrupt vector is then sent to the CPU.
 *
 * In other words, there are two levels of indirections involved:
 *
 *	pci pin -> iosapic irq -> IA-64 vector
 *
 * Note: outside this module, IA-64 vectors are called "irqs".  This is because that's
 * the traditional name Linux uses for interrupt vectors.
 */
#include <linux/config.h>

#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/string.h>
#include <linux/irq.h>
#include <linux/acpi.h>

#include <asm/delay.h>
#include <asm/hw_irq.h>
#include <asm/io.h>
#include <asm/iosapic.h>
#include <asm/machvec.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/system.h>


#undef DEBUG_IRQ_ROUTING
#undef OVERRIDE_DEBUG

static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED;

/* PCI pin to IOSAPIC irq routing information.  This info typically comes from ACPI. */

static struct {
	int num_routes;
	struct pci_vector_struct *route;
} pci_irq;

/* This tables maps IA-64 vectors to the IOSAPIC pin that generates this vector. */

static struct iosapic_irq {
	char *addr;			/* base address of IOSAPIC */
	unsigned int base_irq;		/* first irq assigned to this IOSAPIC */
	char pin;			/* IOSAPIC pin (-1 => not an IOSAPIC irq) */
	unsigned char dmode	: 3;	/* delivery mode (see iosapic.h) */
	unsigned char polarity	: 1;	/* interrupt polarity (see iosapic.h) */
	unsigned char trigger	: 1;	/* trigger mode (see iosapic.h) */
} iosapic_irq[IA64_NUM_VECTORS];

static struct iosapic {
	char *addr;			/* base address of IOSAPIC */
	unsigned int 	base_irq;	/* first irq assigned to this IOSAPIC */
	unsigned short 	max_pin;	/* max input pin supported in this IOSAPIC */
	unsigned char	pcat_compat;	/* 8259 compatibility flag */
} iosapic_lists[256] __initdata;

static int num_iosapic = 0;


/*
 * Find an IOSAPIC associated with an IRQ
 */
static inline int __init
find_iosapic (unsigned int irq)
{
	int i;

	for (i = 0; i < num_iosapic; i++) {
		if ((unsigned) (irq - iosapic_lists[i].base_irq) <= iosapic_lists[i].max_pin)
			return i;
	}

	return -1;
}

/*
 * Translate IOSAPIC irq number to the corresponding IA-64 interrupt vector.  If no
 * entry exists, return -1.
 */
static int
iosapic_irq_to_vector (int irq)
{
	int vector;

	for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
		if (iosapic_irq[vector].base_irq + iosapic_irq[vector].pin = irq)
			return vector;
	return -1;
}

/*
 * Map PCI pin to the corresponding IA-64 interrupt vector.  If no such mapping exists,
 * return -1.
 */
int
pci_pin_to_vector (int bus, int slot, int pci_pin)
{
	struct pci_vector_struct *r;

	for (r = pci_irq.route; r < pci_irq.route + pci_irq.num_routes; ++r)
		if (r->bus = bus && (r->pci_id >> 16) = slot && r->pin = pci_pin)
			return iosapic_irq_to_vector(r->irq);
	return -1;
}

static void
set_rte (unsigned int vector, unsigned long dest)
{
	unsigned long pol, trigger, dmode;
	u32 low32, high32;
	char *addr;
	int pin;
	char redir;

	pin = iosapic_irq[vector].pin;
	if (pin < 0)
		return;		/* not an IOSAPIC interrupt */

	addr    = iosapic_irq[vector].addr;
	pol     = iosapic_irq[vector].polarity;
	trigger = iosapic_irq[vector].trigger;
	dmode   = iosapic_irq[vector].dmode;

	redir = (dmode = IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
#ifdef CONFIG_SMP
	set_irq_affinity_info(vector, (int)(dest & 0xffff), redir);
#endif

	low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
		 (trigger << IOSAPIC_TRIGGER_SHIFT) |
		 (dmode << IOSAPIC_DELIVERY_SHIFT) |
		 vector);

	/* dest contains both id and eid */
	high32 = (dest << IOSAPIC_DEST_SHIFT);

	writel(IOSAPIC_RTE_HIGH(pin), addr + IOSAPIC_REG_SELECT);
	writel(high32, addr + IOSAPIC_WINDOW);
	writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
	writel(low32, addr + IOSAPIC_WINDOW);
}

static void
nop (unsigned int vector)
{
	/* do nothing... */
}

static void
mask_irq (unsigned int irq)
{
	unsigned long flags;
	char *addr;
	u32 low32;
	int pin;
	ia64_vector vec = irq_to_vector(irq);

	addr = iosapic_irq[vec].addr;
	pin = iosapic_irq[vec].pin;

	if (pin < 0)
		return;			/* not an IOSAPIC interrupt! */

	spin_lock_irqsave(&iosapic_lock, flags);
	{
		writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
		low32 = readl(addr + IOSAPIC_WINDOW);

		low32 |= (1 << IOSAPIC_MASK_SHIFT);    /* set only the mask bit */
		writel(low32, addr + IOSAPIC_WINDOW);
	}
	spin_unlock_irqrestore(&iosapic_lock, flags);
}

static void
unmask_irq (unsigned int irq)
{
	unsigned long flags;
	char *addr;
	u32 low32;
	int pin;
	ia64_vector vec = irq_to_vector(irq);

	addr = iosapic_irq[vec].addr;
	pin = iosapic_irq[vec].pin;
	if (pin < 0)
		return;			/* not an IOSAPIC interrupt! */

	spin_lock_irqsave(&iosapic_lock, flags);
	{
		writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
		low32 = readl(addr + IOSAPIC_WINDOW);

		low32 &= ~(1 << IOSAPIC_MASK_SHIFT);    /* clear only the mask bit */
		writel(low32, addr + IOSAPIC_WINDOW);
	}
	spin_unlock_irqrestore(&iosapic_lock, flags);
}


static void
iosapic_set_affinity (unsigned int irq, unsigned long mask)
{
#ifdef CONFIG_SMP
	unsigned long flags;
	u32 high32, low32;
	int dest, pin;
	char *addr;
	int redir = (irq & (1<<31)) ? 1 : 0;

	mask &= (1UL << smp_num_cpus) - 1;

	if (!mask || irq >= IA64_NUM_VECTORS)
		return;

	dest = cpu_physical_id(ffz(~mask));

	pin = iosapic_irq[irq].pin;
	addr = iosapic_irq[irq].addr;

	if (pin < 0)
		return;			/* not an IOSAPIC interrupt */

	set_irq_affinity_info(irq,dest,redir);

	/* dest contains both id and eid */
	high32 = dest << IOSAPIC_DEST_SHIFT;

	spin_lock_irqsave(&iosapic_lock, flags);
	{
		/* get current delivery mode by reading the low32 */
		writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
		low32 = readl(addr + IOSAPIC_WINDOW);

		low32 &= ~(7 << IOSAPIC_DELIVERY_SHIFT);
		if (redir)
		        /* change delivery mode to lowest priority */
			low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
		else
		        /* change delivery mode to fixed */
			low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);

		writel(IOSAPIC_RTE_HIGH(pin), addr + IOSAPIC_REG_SELECT);
		writel(high32, addr + IOSAPIC_WINDOW);
		writel(IOSAPIC_RTE_LOW(pin), addr + IOSAPIC_REG_SELECT);
		writel(low32, addr + IOSAPIC_WINDOW);
	}
	spin_unlock_irqrestore(&iosapic_lock, flags);
#endif
}

/*
 * Handlers for level-triggered interrupts.
 */

static unsigned int
iosapic_startup_level_irq (unsigned int irq)
{
	unmask_irq(irq);
	return 0;
}

static void
iosapic_end_level_irq (unsigned int irq)
{
	ia64_vector vec = irq_to_vector(irq);

	writel(vec, iosapic_irq[vec].addr + IOSAPIC_EOI);
}

#define iosapic_shutdown_level_irq	mask_irq
#define iosapic_enable_level_irq	unmask_irq
#define iosapic_disable_level_irq	mask_irq
#define iosapic_ack_level_irq		nop

struct hw_interrupt_type irq_type_iosapic_level = {
	typename:	"IO-SAPIC-level",
	startup:	iosapic_startup_level_irq,
	shutdown:	iosapic_shutdown_level_irq,
	enable:		iosapic_enable_level_irq,
	disable:	iosapic_disable_level_irq,
	ack:		iosapic_ack_level_irq,
	end:		iosapic_end_level_irq,
	set_affinity:	iosapic_set_affinity
};

/*
 * Handlers for edge-triggered interrupts.
 */

static unsigned int
iosapic_startup_edge_irq (unsigned int irq)
{
	unmask_irq(irq);
	/*
	 * IOSAPIC simply drops interrupts pended while the
	 * corresponding pin was masked, so we can't know if an
	 * interrupt is pending already.  Let's hope not...
	 */
	return 0;
}

static void
iosapic_ack_edge_irq (unsigned int irq)
{
	irq_desc_t *idesc = irq_desc(irq);
	/*
	 * Once we have recorded IRQ_PENDING already, we can mask the
	 * interrupt for real. This prevents IRQ storms from unhandled
	 * devices.
	 */
	if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) = (IRQ_PENDING|IRQ_DISABLED))
		mask_irq(irq);
}

#define iosapic_enable_edge_irq		unmask_irq
#define iosapic_disable_edge_irq	nop
#define iosapic_end_edge_irq		nop

struct hw_interrupt_type irq_type_iosapic_edge = {
	typename:	"IO-SAPIC-edge",
	startup:	iosapic_startup_edge_irq,
	shutdown:	iosapic_disable_edge_irq,
	enable:		iosapic_enable_edge_irq,
	disable:	iosapic_disable_edge_irq,
	ack:		iosapic_ack_edge_irq,
	end:		iosapic_end_edge_irq,
	set_affinity:	iosapic_set_affinity
};

unsigned int
iosapic_version (char *addr)
{
	/*
	 * IOSAPIC Version Register return 32 bit structure like:
	 * {
	 *	unsigned int version   : 8;
	 *	unsigned int reserved1 : 8;
	 *	unsigned int pins      : 8;
	 *	unsigned int reserved2 : 8;
	 * }
	 */
	writel(IOSAPIC_VERSION, addr + IOSAPIC_REG_SELECT);
	return readl(IOSAPIC_WINDOW + addr);
}

/*
 * if the given vector is already owned by other,
 *  assign a new vector for the other and make the vector available
 */
static void
iosapic_reassign_vector (int vector)
{
	int new_vector;

	if (iosapic_irq[vector].pin >= 0 || iosapic_irq[vector].addr
	    || iosapic_irq[vector].base_irq || iosapic_irq[vector].dmode
	    || iosapic_irq[vector].polarity || iosapic_irq[vector].trigger)
	{
		new_vector = ia64_alloc_irq();
		printk("Reassigning Vector 0x%x to 0x%x\n", vector, new_vector);
		memcpy (&iosapic_irq[new_vector], &iosapic_irq[vector],
			sizeof(struct iosapic_irq));
		memset (&iosapic_irq[vector], 0, sizeof(struct iosapic_irq));
		iosapic_irq[vector].pin = -1;
	}
}

static void
register_irq (u32 global_vector, int vector, int pin, unsigned char delivery,
	      unsigned long polarity, unsigned long edge_triggered,
	      u32 base_irq, char *iosapic_address)
{
	irq_desc_t *idesc;
	struct hw_interrupt_type *irq_type;

	gsi_to_vector(global_vector) = vector;
	iosapic_irq[vector].pin	= pin;
	iosapic_irq[vector].polarity = polarity ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW;
	iosapic_irq[vector].dmode    = delivery;

	/*
	 * In override, it does not provide addr/base_irq.  global_vector is enough to
	 * locate iosapic addr, base_irq and pin by examining base_irq and max_pin of
	 * registered iosapics (tbd)
	 */
#ifndef	OVERRIDE_DEBUG
	if (iosapic_address) {
		iosapic_irq[vector].addr = iosapic_address;
		iosapic_irq[vector].base_irq = base_irq;
	}
#else
	if (iosapic_address) {
		if (iosapic_irq[vector].addr && (iosapic_irq[vector].addr != iosapic_address))
			printk("WARN: register_irq: diff IOSAPIC ADDRESS for gv %x, v %x\n",
			       global_vector, vector);
		iosapic_irq[vector].addr = iosapic_address;
		if (iosapic_irq[vector].base_irq && (iosapic_irq[vector].base_irq != base_irq)) {
			printk("WARN: register_irq: diff BASE IRQ %x for gv %x, v %x\n",
			       base_irq, global_vector, vector);
		}
		iosapic_irq[vector].base_irq = base_irq;
	} else if (!iosapic_irq[vector].addr)
		printk("WARN: register_irq: invalid override for gv %x, v %x\n",
		       global_vector, vector);
#endif
	if (edge_triggered) {
		iosapic_irq[vector].trigger = IOSAPIC_EDGE;
		irq_type = &irq_type_iosapic_edge;
	} else {
		iosapic_irq[vector].trigger = IOSAPIC_LEVEL;
		irq_type = &irq_type_iosapic_level;
	}

	idesc = irq_desc(vector);
	if (idesc->handler != irq_type) {
		if (idesc->handler != &no_irq_type)
			printk("register_irq(): changing vector 0x%02x from "
			       "%s to %s\n", vector, idesc->handler->typename, irq_type->typename);
		idesc->handler = irq_type;
	}
}

/*
 * ACPI can describe IOSAPIC interrupts via static tables and namespace
 * methods.  This provides an interface to register those interrupts and
 * program the IOSAPIC RTE.
 */
int
iosapic_register_irq (u32 global_vector, unsigned long polarity, unsigned long
                      edge_triggered, u32 base_irq, char *iosapic_address)
{
	int vector;

	vector = iosapic_irq_to_vector(global_vector);
	if (vector < 0)
		vector = ia64_alloc_irq();

	register_irq (global_vector, vector, global_vector - base_irq,
			IOSAPIC_LOWEST_PRIORITY, polarity, edge_triggered,
			base_irq, iosapic_address);

	printk("IOSAPIC 0x%x(%s,%s) -> Vector 0x%x\n", global_vector,
	       (polarity ? "high" : "low"), (edge_triggered ? "edge" : "level"), vector);

	/* program the IOSAPIC routing table */
	set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
	return vector;
}

/*
 * ACPI calls this when it finds an entry for a platform interrupt.
 * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
 */
int
iosapic_register_platform_irq (u32 int_type, u32 global_vector,
			       u32 iosapic_vector, u16 eid, u16 id, unsigned long polarity,
			       unsigned long edge_triggered, u32 base_irq, char *iosapic_address)
{
	unsigned char delivery;
	int vector;

	switch (int_type) {
	      case ACPI_INTERRUPT_PMI:
		vector = iosapic_vector;
		/*
		 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
		 * we need to make sure the vector is available
		 */
		iosapic_reassign_vector(vector);
		delivery = IOSAPIC_PMI;
		break;
	      case ACPI_INTERRUPT_INIT:
		vector = ia64_alloc_irq();
		delivery = IOSAPIC_INIT;
		break;
	      case ACPI_INTERRUPT_CPEI:
		vector = IA64_PCE_VECTOR;
		delivery = IOSAPIC_LOWEST_PRIORITY;
		break;
	      default:
		printk("iosapic_register_platform_irq(): invalid int type\n");
		return -1;
	}

	register_irq(global_vector, vector, global_vector - base_irq, delivery, polarity,
		     edge_triggered, base_irq, iosapic_address);

	printk("PLATFORM int 0x%x: IOSAPIC 0x%x(%s,%s) -> Vector 0x%x CPU %.02u:%.02u\n",
	       int_type, global_vector, (polarity ? "high" : "low"),
	       (edge_triggered ? "edge" : "level"), vector, eid, id);

	/* program the IOSAPIC routing table */
	set_rte(vector, ((id << 8) | eid) & 0xffff);
	return vector;
}


/*
 * ACPI calls this when it finds an entry for a legacy ISA interrupt.
 * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
 */
void
iosapic_register_legacy_irq (unsigned long irq,
			     unsigned long pin, unsigned long polarity,
			     unsigned long edge_triggered)
{
	int vector = isa_irq_to_vector(irq);

	register_irq(irq, vector, (int)pin, IOSAPIC_LOWEST_PRIORITY, polarity, edge_triggered,
		     0, NULL);		/* ignored for override */

#ifdef DEBUG_IRQ_ROUTING
	printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (%s, %s) -> vector %02x\n",
	       (unsigned) irq, (unsigned) pin,
	       polarity ? "high" : "low", edge_triggered ? "edge" : "level",
	       vector);
#endif

	/* program the IOSAPIC routing table */
	set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
}

void __init
iosapic_init (unsigned long phys_addr, unsigned int base_irq, int pcat_compat)
{
	int irq, max_pin, vector, pin;
	unsigned int ver;
	char *addr;
	static int first_time = 1;

	if (first_time) {
		first_time = 0;
		for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
			iosapic_irq[vector].pin = -1;	/* mark as unused */
	}

	if (pcat_compat) {
		/*
		 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
		 * enabled.
		 */
		printk("%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
		outb(0xff, 0xA1);
		outb(0xff, 0x21);
	}

	addr = ioremap(phys_addr, 0);
	ver = iosapic_version(addr);
	max_pin = (ver >> 16) & 0xff;

	iosapic_lists[num_iosapic].addr = addr;
	iosapic_lists[num_iosapic].pcat_compat = pcat_compat;
	iosapic_lists[num_iosapic].base_irq = base_irq;
	iosapic_lists[num_iosapic].max_pin = max_pin;
	num_iosapic++;

	printk("IOSAPIC: version %x.%x, address 0x%lx, IRQs 0x%02x-0x%02x\n",
	       (ver & 0xf0) >> 4, (ver & 0x0f), phys_addr, base_irq, base_irq + max_pin);

	if ((base_irq = 0) && pcat_compat) {
		/*
		 * Map the legacy ISA devices into the IOSAPIC data.  Some of these may
		 * get reprogrammed later on with data from the ACPI Interrupt Source
		 * Override table.
		 */
		for (irq = 0; irq < 16; ++irq) {
			vector = isa_irq_to_vector(irq);
			if ((pin = iosapic_irq[vector].pin) = -1)
				pin = irq;

			register_irq(irq, vector, pin,
				     /* IOSAPIC_POL_HIGH, IOSAPIC_EDGE */
				     IOSAPIC_LOWEST_PRIORITY, 1, 1, base_irq, addr);

#ifdef DEBUG_IRQ_ROUTING
			printk("ISA: IRQ %u -> IOSAPIC irq 0x%02x (high, edge) -> vector 0x%02x\n",
			       irq, iosapic_irq[vector].base_irq + iosapic_irq[vector].pin,
			       vector);
#endif

			/* program the IOSAPIC routing table: */
			set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
		}
	}
}

void __init
iosapic_init_pci_irq (void)
{
	int i, index, vector, pin;
	int base_irq, max_pin, pcat_compat;
	unsigned int irq;
	char *addr;

	if (0 != acpi_get_prt(&pci_irq.route, &pci_irq.num_routes))
		return;

	for (i = 0; i < pci_irq.num_routes; i++) {

		irq = pci_irq.route[i].irq;

		index = find_iosapic(irq);
		if (index < 0) {
			printk("PCI: IRQ %u has no IOSAPIC mapping\n", irq);
			continue;
		}

		addr = iosapic_lists[index].addr;
		base_irq = iosapic_lists[index].base_irq;
		max_pin = iosapic_lists[index].max_pin;
		pcat_compat = iosapic_lists[index].pcat_compat;
		pin = irq - base_irq;

		if ((unsigned) pin > max_pin)
			/* the interrupt route is for another controller... */
			continue;

		if (pcat_compat && (irq < 16))
			vector = isa_irq_to_vector(irq);
		else {
			vector = iosapic_irq_to_vector(irq);
			if (vector < 0)
				/* new iosapic irq: allocate a vector for it */
				vector = ia64_alloc_irq();
		}

		register_irq(irq, vector, pin, IOSAPIC_LOWEST_PRIORITY, 0, 0, base_irq, addr);

#ifdef DEBUG_IRQ_ROUTING
		printk("PCI: (B%d,I%d,P%d) -> IOSAPIC irq 0x%02x -> vector 0x%02x\n",
		       pci_irq.route[i].bus, pci_irq.route[i].pci_id>>16, pci_irq.route[i].pin,
		       iosapic_irq[vector].base_irq + iosapic_irq[vector].pin, vector);
#endif
		/*
		 * NOTE: The IOSAPIC RTE will be programmed in iosapic_pci_fixup().  It
		 * needs to be done there to ensure PCI hotplug works right.
		 */
	}
}

void
iosapic_pci_fixup (int phase)
{
	struct	pci_dev	*dev;
	unsigned char pin;
	int vector;
	struct hw_interrupt_type *irq_type;
	irq_desc_t *idesc;

	if (phase = 0) {
		iosapic_init_pci_irq();
		return;
	}

	if (phase != 1)
		return;

	pci_for_each_dev(dev) {
		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
		if (pin) {
			pin--;          /* interrupt pins are numbered starting from 1 */
			vector = pci_pin_to_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
			if (vector < 0 && dev->bus->parent) {
				/* go back to the bridge */
				struct pci_dev *bridge = dev->bus->self;

				if (bridge) {
					/* allow for multiple bridges on an adapter */
					do {
						/* do the bridge swizzle... */
						pin = (pin + PCI_SLOT(dev->devfn)) % 4;
						vector = pci_pin_to_vector(bridge->bus->number,
									   PCI_SLOT(bridge->devfn),
									   pin);
					} while (vector < 0 && (bridge = bridge->bus->self));
				}
				if (vector >= 0)
					printk(KERN_WARNING
					       "PCI: using PPB(B%d,I%d,P%d) to get vector %02x\n",
					       dev->bus->number, PCI_SLOT(dev->devfn),
					       pin, vector);
				else
					printk(KERN_WARNING
					       "PCI: Couldn't map irq for (B%d,I%d,P%d)\n",
					       dev->bus->number, PCI_SLOT(dev->devfn), pin);
			}
			if (vector >= 0) {
				printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> 0x%02x\n",
				       dev->bus->number, PCI_SLOT(dev->devfn), pin, vector);
				dev->irq = vector;

				irq_type = &irq_type_iosapic_level;
				idesc = irq_desc(vector);
				if (idesc->handler != irq_type) {
					if (idesc->handler != &no_irq_type)
						printk("iosapic_pci_fixup: changing vector 0x%02x "
						       "from %s to %s\n", vector,
						       idesc->handler->typename,
						       irq_type->typename);
					idesc->handler = irq_type;
				}
#ifdef CONFIG_SMP
				/*
				 * For platforms that do not support interrupt redirect
				 * via the XTP interface, we can round-robin the PCI
				 * device interrupts to the processors
				 */
				if (!(smp_int_redirect & SMP_IRQ_REDIRECTION)) {
					static int cpu_index = 0;

					set_rte(vector, cpu_physical_id(cpu_index) & 0xffff);

					cpu_index++;
					if (cpu_index >= smp_num_cpus)
						cpu_index = 0;
				} else {
					/*
					 * Direct the interrupt vector to the current cpu,
					 * platform redirection will distribute them.
					 */
					set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
				}
#else
				/* direct the interrupt vector to the running cpu id */
				set_rte(vector, (ia64_get_lid() >> 16) & 0xffff);
#endif
			}
		}
		/*
		 * Nothing to fixup
		 * Fix out-of-range IRQ numbers
		 */
		if (dev->irq >= IA64_NUM_VECTORS)
			dev->irq = 15;	/* Spurious interrupts */
	}
}



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (8 preceding siblings ...)
  2002-08-02 16:32 ` David Mosberger
@ 2002-08-02 17:45 ` KOCHI, Takayoshi
  2002-08-02 18:58 ` Grant Grundler
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-08-02 17:45 UTC (permalink / raw)
  To: linux-ia64

On Thu, 1 Aug 2002 23:04:29 -0700
David Mosberger <davidm@napali.hpl.hp.com> wrote:

> The patch basically looks fine to me, except for some naming issues.
> We now have vectors, global vectors, global system interrupts (GSIs),
> irq numbers, and what not.  This is confusing and hard to maintain.
> We should settle on a consistent set of names (hopefully something
> consistent with Linux, ACPI spec, and PCI spec).  I tried to do this a
> while ago (see big comment at the beginning of iosapic.c), but the
> picture described there is incomplete for large machines and doesn't
> do a good job at integrating with ACPI lingo.  Anyone want to take a
> stab?

Bjorn, David, I checked related documents.

* _PRT (ACPI2.0a, p.149)
|  An object that specifies the PCI interrupt Routing Table.
  Linux: pci_irq (iosapic.c)
         acpi_prt (ACPI, arch/ia64/acpi.c)
         struct pci_vector_struct (asm-ia64/system.h)
  Note: pci_irq and acpi_prt are almost identical.

* Global System Interrupts (ACPI2.0a, p.125, 32bit)
|  Global System Interrupts can be thought of as ACPI Plug and
|  Play IRQ numbers. They are used to virtualize interrupts in tables
|  and in ASL methods that perform resource allocation of interrupts.
|  Do not confuse global system interrupts with ISA IRQs although in
|  the case of the IA-PC 8259 interrupts they correspond in a one-to-one
|  fashion.
  Linux: global_vector (iosapic.c)
         irq (used as an argument for find_iosapic() etc. in iosapic.c)

* Global System Interrupt Base (ACPI2.0a, p.122, 32bit)
|  The global system interrupt number where this I/O SAPIC's
|  interrupt inputs start. The number of interrupt inputs is
|  determined by the I/O SAPIC's Max Redir Entry register.
  Linux: base_irq in struct iosapic_irq/iosapic (iosapic.c)

* Bus-relative interrupt source (IRQ) (ACPI2.0a, p.119, Table 5-20)
|  For example, if your machine has the ISA Programmable Interrupt
|  Timer (PIT) connected to ISA IRQ 0, but in APIC mode, it is
|  connected to I/O APIC interrupt input 2, then you would need an
|  Interrupt Source Override where the source entry is `0' and
|  the Global System Interrupt is `2.'
  Linux: irq (iosapic.c::iosapic_init())
         isa_irq_to_vector (iosapic.c, irq_ia64.c etc.)
         legacy_irq (iosapic.c::iosapic_register_legacy_irq())

* Address (ACPI2.0a, p.164, _PRT, 32bit)
|  The address of the device (uses the same format as _ADR).
  Linux: pci_id in struct pci_vector_struct (asm-ia64/system.h)

* Pin (ACPI2.0a, p.164, _PRT, 8bit)
|  The PCI pin number of the device (0?INTA, 1?INTB, 2?INTC, 3?INTD).
  Linux: pin in struct pci_vector_struct (asm-ia64/system.h)

* Source (ACPI2.0a, p.164, _PRT, String or NULL)
|  String Name of the device that allocates the interrupt to which
|  the above pin is connected . If this field is NULL, then the
|  interrupt is allocated from the global interrupt pool.
  Not used

* Source Index (ACPI2.0a, p.164, _PRT, 32bit)
|  Index that indicates which resource descriptor in the resource
|  template of the device pointed to in the Source field this
|  interrupt is allocated from. If the Source field is null,
|  this field is the global system interrupt number to which
|  the pin is connected.
  Linux: irq in struct pci_vector_struct (asm-ia64/system.h)
         iosapic_irq (iosapic.c)

* MAX REDIR (460GX SDM, p.2-49, 8bit)
|  This is entry number (0 being the lowest entry) of the highest
|  entry in the I/O RT. It is equal to the number of interrupt
|  input pins minus one that the PID supports. This field is
|  hardwired and is read-only. The PID sets this field to 3FH,
|  indicating that it supports 64 RTEs.
  Linux: max_pin (iosapic.c)

* Interrupt Pin (PCI2.2, p.200, 8bit)
|  The Interrupt Pin register tells which interrupt pin the device
|  (or device funtion) uses. A value of 1 corresponds to INTA#.
  Linux: pci_pin (iosapic.c::pci_pin_to_vector())
         pin (iosapic.c)

...

While looking up these words in specs, I found that no one is
using the term 'global vector' except us:)

Confusing words are: irq, vector, pin.

IRQ:
 Top comment in iosapic.c clarifies that

 "outside this module, IA-64 vectors are called "irqs".
  This is because the traditional name Linux uses for interrupt vectors"

 but we can see much confusion here and there in iosapic.c.

 Now we have to realize that IRQ means "an identifier for interrupt
 in a Linux system".  But ISA IRQ is only exception for this.
 Perhaps we should use "isa_irq" for naming.

 So except IRQ handler routines (which are called via
 irq_type_iosapic_{level,edge}), and isa_irq related routines,
 all "*irq*" have to be renamed.

 Also note that we are using one-to-one mapping between
 IRQ and VECTOR (for DIG platform), which is source of
 confusion and bugs.
 For example, most IRQ handler routines call irq_to_vector()
 to convert from IRQ to VECTOR, but iosapic_set_affinity()
 doesn't.  In set_rte(), set_irq_affinity_info() is called
 with VECTOR as 1st argument, but it should be IRQ.

 "iosapic irq", "global vector" and "global system interrupt" are same!
 gsi_to_vector() was totally wrong thing.
 But as "iosapic irq" and "global vector" are not used
 elsewhere, all usage should be corrected to "global system interrupt".

VECTOR:
 'Vector' is relatively consistently used in iosapic.c.
 vector should be used only for ia64 interrupt vector.
 'global_vector' must not be used:)
 For iosapic_register_platform_irq(), the second argument
 should be just "vector" instead of "global_vector" and
 the third argument is "gsi".

PIN:
 'pin' is used for PCI interrupt pin (INTA, INTB...) and 
 IOSAPIC interrupt input pin.  Probably calling the former
 'pci_pin' is enough.  For more clarification, IOSAPIC
 pin can be called 'rte_index'.

 "max_pin" may be named "max_redir" if you prefer
 exact match with the specification.  But the content
 is somewhat confusing for most C programmers, because
 it represents the highest number of pins (0 origin)
 and not the number of pins.  There were a couple of
 bugs in the past regarding this.  So we could use "num_rte"
 (by adding 1 to max_redir) for future bug-proof.

These are my idea how to rename variable/function names:

 pci_irq -> remove (use "acpi_prt" directly?)
 iosapic_irq -> iosapic_intr_info
 base_irq -> gsi_base
 irq -> gsi (where appropriate)
 irq -> isa_irq (where appropriate)
 irq_type -> as it is
      (including no_irq_type, irq_type_iosapic_{level,edge},
       irq_type_ia64_lsapic)
 iosapic_irq_to_vector() -> gsi_to_vector()
       (Note: old gsi_to_vector() is wrongly implemented)

 register_irq() -> register_intr()
      (including iosapic_register_irq(), 
                 iosapic_register_platform_irq())
 iosacpic_register_legacy_irq() -> iosapic_override_isa_irq()
 ia64_alloc_irq() -> ia64_alloc_vector()
 ia64_handle_irq() -> ia64_handle_vector() (<-? I'm not sure)
 register_percpu_irq() -> register_percpu_vector()

 pin -> rte_index
 pin -> pci_pin (where appropriate)
 max_pin -> max_redir or num_rte

Bjorn, I suppose there are conflicts with your idea of
name sanitization.
Comments are welcome.

Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (9 preceding siblings ...)
  2002-08-02 17:45 ` KOCHI, Takayoshi
@ 2002-08-02 18:58 ` Grant Grundler
  2002-08-02 21:22 ` David Mosberger
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Grant Grundler @ 2002-08-02 18:58 UTC (permalink / raw)
  To: linux-ia64

"KOCHI, Takayoshi" wrote:
> But how can you trust Interrupt Line value set by BIOS?

I don't see any evidence ia64 uses INT_LINE from config space.
IA64 seems to overwrite pcidev->irq with the "vector" from ACPI.
However, I've not (recently) studied iosapic.c thoroughly.
Last time I took a close look was when david/stephane publish
the full ia64 source tree in Feb 2000 at NYLWE.
(http://lists.parisc-linux.org/hypermail/parisc-linux-cvs/2859.html)

My understanding is ia64 does a looks in the "PCI routing Table"
(_PRT) provided by ACPI. Input paramters to the lookup are IO SAPIC
address, IRQ *pin*, pci device bus/dev/func.  Output values are
"global" IRQ number (= vector?) and which IRTE to use in the
given IO SAPIC.

> It is definitely not an interrupt vector number, as
> interrupt vector number is what OS allocates and ties into
> a device. Then is it a global interrupt vector?

I don't know the right terminology here.
I'd think "global" interrupt vector is what goes into pcidev->irq.
INT_LINE isn't used so maybe it just doesn't matter. ;^)

> The config space Interrupt Line value is only 8bit while
> ACPI 2.0 can describe 32bit global interrupt vector.
> NEC's platform actually use value of 256 and above
> for global interrupt vector, therefore Interrupt Line
> value of configuration space will be inevitably bogus.

right. similar issue on parisc.

...
> Okay, then pci_set_master and pci_disable_device are a pair of APIs
> and pci_enable_device/pci_disable_device are not symmetric... sigh.

I think that depends on which platform.
My preference would be drivers not use pci_set_master().

> It is ok for PCI hotplug that we don't have an architecture-dependent
> pci_disable_device hook because there are other hooks when
> a device driver releases control of a device.

ok.

> > Use different magic numbers for each IRQ?
> > They can be any *int* value. You can even use them to index into
> > an array or structures. The trick is to fully hide the IRQ<->pcidev
> > relationship in the platform specific support.
> 
> Yes, but I think it will complicate things more than necessary.

ACPI seems to provide the "magic" number.
We don't need to anything else in addition so far.

> Now I understand that
> 
>  1) pci_dev->irq should be fixed-up at pci_fixup stage
>     in the kernel

s/should/could/
It's platform dependent.

>  2) pci_dev->irq is ia64 interrupt vector only
>     because we choose to do so and can be implemented
>     another way
>  3) ia64 interrupt vector can be allocated when enabled
>     but we allocate ahead of enabling
> 
> It is an implementation choice developers took long time ago
> that sharing a vector space with all processors in a system
> and one-to-one mapping between pci_dev->irq and interrupt vector.

yes. it's simple and sufficient for boxes currently on the market.

> iosapic.c has been written upon these assumptions.
> My patch doesn't break them.

TBH, I haven't looked at your patch.

> Implementing ia64 interrupt in other ways may be interesting
> but it's a 2.5-series matter.  For 2.4, current vector
> allocation scheme is broken at least on our platform with large
> configuration.  What we'd like to do now is fix these cases for
> stable series without breaking others.

ok.

thanks,
grant

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (10 preceding siblings ...)
  2002-08-02 18:58 ` Grant Grundler
@ 2002-08-02 21:22 ` David Mosberger
  2002-08-02 21:44 ` Bjorn Helgaas
                   ` (5 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: David Mosberger @ 2002-08-02 21:22 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Fri, 02 Aug 2002 10:45:11 -0700, "KOCHI, Takayoshi" <t-kouchi@mvf.biglobe.ne.jp> said:

  >>  register_irq() -> register_intr()

No can do.  At the platform-independent level, we have to accommodate
the Linux irq naming convention.  (In the book I used the name "irq
number" to make it clearer that this is just a logical number, not
necessarily something corresponding to a physical line.)

	--david


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (11 preceding siblings ...)
  2002-08-02 21:22 ` David Mosberger
@ 2002-08-02 21:44 ` Bjorn Helgaas
  2002-08-02 21:47 ` David Mosberger
                   ` (4 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: Bjorn Helgaas @ 2002-08-02 21:44 UTC (permalink / raw)
  To: linux-ia64

You've obviously thought about this a lot more than I have,
and I have only these comments:

>  pci_irq -> remove (use "acpi_prt" directly?)

iosapic_init_pci_irq() is the only thing in iosapic.c that uses
ACPI directly.  I wonder whether that ACPI usage could be
moved into ia64/kernel/acpi.c, leaving iosapic.c ACPI-free.
This might make it cleaner for non-ACPI systems, i.e., SGI,
to use iosapic.

There are actually relatively few ia64 dependencies in iosapic.c,
too, so I could imagine (in the distant future) using it across
architectures, too.  PA-RISC already uses the same hardware,
and I wouldn't be surprised if others do eventually.

>  iosapic_irq -> iosapic_intr_info
>  base_irq -> gsi_base
>  irq -> gsi (where appropriate)
>  irq -> isa_irq (where appropriate)
>  irq_type -> as it is
>       (including no_irq_type, irq_type_iosapic_{level,edge},
>        irq_type_ia64_lsapic)
>  iosapic_irq_to_vector() -> gsi_to_vector()
>        (Note: old gsi_to_vector() is wrongly implemented)
>
>  register_irq() -> register_intr()
>       (including iosapic_register_irq(),
>                  iosapic_register_platform_irq())
>  iosacpic_register_legacy_irq() -> iosapic_override_isa_irq()
>  ia64_alloc_irq() -> ia64_alloc_vector()
>  ia64_handle_irq() -> ia64_handle_vector() (<-? I'm not sure)

After doing all the above renaming, there are very few usages
of "irq" left (only isa_irq and irq_type stuff).  Would it be worth
reverting to the traditional Linux usage and using "irq" to refer to
IA64 vectors?

>  register_percpu_irq() -> register_percpu_vector()
>
>  pin -> rte_index
>  pin -> pci_pin (where appropriate)
>  max_pin -> max_redir or num_rte

I like num_rte for the reason you describe.

The other things I'd like to see in iosapic.c are:

  - Standardize on bases used in printk (i.e., hex for GSI, decimal
    for processor vectors).  Some places print vectors in hex without
    leading "0x", which is particularly confusing.
  - Replace printk("... B%d,I%d,P%D...", ...) with
    printk("... %s INT%c...", dev->slot_name, 'A' + pin).

Bjorn

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (12 preceding siblings ...)
  2002-08-02 21:44 ` Bjorn Helgaas
@ 2002-08-02 21:47 ` David Mosberger
  2002-08-02 22:01 ` KOCHI, Takayoshi
                   ` (3 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: David Mosberger @ 2002-08-02 21:47 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Fri, 2 Aug 2002 15:44:26 -0600, Bjorn Helgaas <bjorn_helgaas@hp.com> said:

  Bjorn> iosapic_init_pci_irq() is the only thing in iosapic.c that
  Bjorn> uses ACPI directly.

This routine will change a lot if we switch over to using the
corresponding ACPI code.  I'll see if I can prototype this in 2.5
today.

	--david


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (13 preceding siblings ...)
  2002-08-02 21:47 ` David Mosberger
@ 2002-08-02 22:01 ` KOCHI, Takayoshi
  2002-08-02 22:04 ` David Mosberger
                   ` (2 subsequent siblings)
  17 siblings, 0 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-08-02 22:01 UTC (permalink / raw)
  To: linux-ia64

On Fri, 2 Aug 2002 14:22:41 -0700
David Mosberger <davidm@napali.hpl.hp.com> wrote:

> >>>>> On Fri, 02 Aug 2002 10:45:11 -0700, "KOCHI, Takayoshi" <t-kouchi@mvf.biglobe.ne.jp> said:
> 
>   >>  register_irq() -> register_intr()
> 
> No can do.  At the platform-independent level, we have to accommodate
> the Linux irq naming convention.  (In the book I used the name "irq
> number" to make it clearer that this is just a logical number, not
> necessarily something corresponding to a physical line.)

Did you take register_irq() for request_irq()?

register_irq() is declared as static void register_irq(...) in
iosapic.c.

It does pretty iosapic-dependent stuff.

Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (14 preceding siblings ...)
  2002-08-02 22:01 ` KOCHI, Takayoshi
@ 2002-08-02 22:04 ` David Mosberger
  2002-08-02 22:22 ` KOCHI, Takayoshi
  2002-08-02 22:37 ` Grant Grundler
  17 siblings, 0 replies; 19+ messages in thread
From: David Mosberger @ 2002-08-02 22:04 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Fri, 02 Aug 2002 15:01:49 -0700, "KOCHI, Takayoshi" <t-kouchi@mvf.biglobe.ne.jp> said:

  >> Did you take register_irq() for request_irq()?

Yes, indeed, it was a parsing error. ;-)

Never mind...

	--david


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (15 preceding siblings ...)
  2002-08-02 22:04 ` David Mosberger
@ 2002-08-02 22:22 ` KOCHI, Takayoshi
  2002-08-02 22:37 ` Grant Grundler
  17 siblings, 0 replies; 19+ messages in thread
From: KOCHI, Takayoshi @ 2002-08-02 22:22 UTC (permalink / raw)
  To: linux-ia64

Hi,

On Fri, 2 Aug 2002 15:44:26 -0600
Bjorn Helgaas <bjorn_helgaas@hp.com> wrote:

> >  pci_irq -> remove (use "acpi_prt" directly?)
> 
> iosapic_init_pci_irq() is the only thing in iosapic.c that uses
> ACPI directly.  I wonder whether that ACPI usage could be
> moved into ia64/kernel/acpi.c, leaving iosapic.c ACPI-free.
> This might make it cleaner for non-ACPI systems, i.e., SGI,
> to use iosapic.

Sounds good.
But iosapic isn't used for sgi systems:)

> There are actually relatively few ia64 dependencies in iosapic.c,
> too, so I could imagine (in the distant future) using it across
> architectures, too.  PA-RISC already uses the same hardware,
> and I wouldn't be surprised if others do eventually.

I didn't know PA-RISC platforms, but obviously i386
has similar configuration and similar hardware (IO-(x)APIC).

> >  iosapic_irq -> iosapic_intr_info
> >  base_irq -> gsi_base
> >  irq -> gsi (where appropriate)
> >  irq -> isa_irq (where appropriate)
> >  irq_type -> as it is
> >       (including no_irq_type, irq_type_iosapic_{level,edge},
> >        irq_type_ia64_lsapic)
> >  iosapic_irq_to_vector() -> gsi_to_vector()
> >        (Note: old gsi_to_vector() is wrongly implemented)
> >
> >  register_irq() -> register_intr()
> >       (including iosapic_register_irq(),
> >                  iosapic_register_platform_irq())
> >  iosacpic_register_legacy_irq() -> iosapic_override_isa_irq()
> >  ia64_alloc_irq() -> ia64_alloc_vector()
> >  ia64_handle_irq() -> ia64_handle_vector() (<-? I'm not sure)
> 
> After doing all the above renaming, there are very few usages
> of "irq" left (only isa_irq and irq_type stuff).  Would it be worth
> reverting to the traditional Linux usage and using "irq" to refer to
> IA64 vectors?

I thought distinguishing "irq" with "IA64 vector" clearly is
important.  Now "irq" and "IA64 vector" is tightly coupled
(one-to-one mapping) but there are other possibilities of
coupling "irq" and "IA64 vector", like Grant says in another
mail or the way SGI took.

> >  max_pin -> max_redir or num_rte
> 
> I like num_rte for the reason you describe.

Thanks;)

> The other things I'd like to see in iosapic.c are:
> 
>   - Standardize on bases used in printk (i.e., hex for GSI, decimal
>     for processor vectors).  Some places print vectors in hex without
>     leading "0x", which is particularly confusing.
>   - Replace printk("... B%d,I%d,P%D...", ...) with
>     printk("... %s INT%c...", dev->slot_name, 'A' + pin).

I'd like, too.

Thanks,
-- 
KOCHI, Takayoshi <t-kouchi@cq.jp.nec.com/t-kouchi@mvf.biglobe.ne.jp>



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Linux-ia64] [PATCH] dynamic IRQ allocation
  2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
                   ` (16 preceding siblings ...)
  2002-08-02 22:22 ` KOCHI, Takayoshi
@ 2002-08-02 22:37 ` Grant Grundler
  17 siblings, 0 replies; 19+ messages in thread
From: Grant Grundler @ 2002-08-02 22:37 UTC (permalink / raw)
  To: linux-ia64

Bjorn Helgaas wrote:
> There are actually relatively few ia64 dependencies in iosapic.c,
> too, so I could imagine (in the distant future) using it across
> architectures, too.  PA-RISC already uses the same hardware,
> and I wouldn't be surprised if others do eventually.

yes. x86 (Foster, ie HyperThreading CPU) does too.
80-90% of iosapic.c for ia64 and parisc are functionally identical.
IIRC, differences are in "discovery" and PCI routing table setup.
Haven't looked at x86 version yet.

If someone wants to merge them, I can make HW available remotely.
I've got all the HW but not the time. :^(
(IA64 rx2600 and PARISC A500 are already remotely accessible).

grant

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2002-08-02 22:37 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-07-30  2:36 [Linux-ia64] [PATCH] dynamic IRQ allocation KOCHI, Takayoshi
2002-07-30  5:01 ` Grant Grundler
2002-07-30 18:04 ` KOCHI, Takayoshi
2002-07-30 22:14 ` Grant Grundler
2002-07-30 23:49 ` KOCHI, Takayoshi
2002-08-01  1:03 ` Grant Grundler
2002-08-02  0:39 ` KOCHI, Takayoshi
2002-08-02  6:04 ` David Mosberger
2002-08-02 15:56 ` Bjorn Helgaas
2002-08-02 16:32 ` David Mosberger
2002-08-02 17:45 ` KOCHI, Takayoshi
2002-08-02 18:58 ` Grant Grundler
2002-08-02 21:22 ` David Mosberger
2002-08-02 21:44 ` Bjorn Helgaas
2002-08-02 21:47 ` David Mosberger
2002-08-02 22:01 ` KOCHI, Takayoshi
2002-08-02 22:04 ` David Mosberger
2002-08-02 22:22 ` KOCHI, Takayoshi
2002-08-02 22:37 ` Grant Grundler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox