public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Suresh Siddha <suresh.b.siddha@intel.com>
To: mingo@elte.hu, hpa@zytor.com, tglx@linutronix.de,
	akpm@linux-foundation.org, arjan@linux.intel.com,
	andi@firstfloor.org, ebiederm@xmission.com,
	jbarnes@virtuousgeek.org, steiner@sgi.com
Cc: linux-kernel@vger.kernel.org, Suresh Siddha <suresh.b.siddha@intel.com>
Subject: [patch 23/26] x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure
Date: Thu, 10 Jul 2008 11:16:57 -0700	[thread overview]
Message-ID: <20080710182239.205057000@linux-os.sc.intel.com> (raw)
In-Reply-To: 20080710181634.764954000@linux-os.sc.intel.com

[-- Attachment #1: msi_intr_remap_support.patch --]
[-- Type: text/plain, Size: 8938 bytes --]

MSI and MSI-X support for interrupt remapping infrastructure.

MSI address register will be programmed with interrupt-remapping table
entry(IRTE) index and the IRTE will contain information about the vector,
cpu destination, etc.

For MSI-X, all the IRTE's will be consecutively allocated in the table,
and the address registers will contain the starting index to the block
and the data register will contain the subindex with in that block.

This also introduces a new irq_chip for cleaner irq migration (in the process
context as opposed to the current irq migration in the context of an interrupt.
interrupt-remapping infrastructure will help us achieve this).

As MSI is edge triggered, irq migration is a simple atomic update(of vector
and cpu destination) of IRTE and flushing the hardware cache.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: tree-x86/arch/x86/kernel/io_apic_64.c
===================================================================
--- tree-x86.orig/arch/x86/kernel/io_apic_64.c	2008-07-10 09:52:31.000000000 -0700
+++ tree-x86/arch/x86/kernel/io_apic_64.c	2008-07-10 09:52:34.000000000 -0700
@@ -2289,6 +2289,9 @@
 
 	dynamic_irq_cleanup(irq);
 
+#ifdef CONFIG_INTR_REMAP
+	free_irte(irq);
+#endif
 	spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
@@ -2307,11 +2310,42 @@
 
 	tmp = TARGET_CPUS;
 	err = assign_irq_vector(irq, tmp);
-	if (!err) {
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+	if (err)
+		return err;
+
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irte irte;
+		int ir_index;
+		u16 sub_handle;
+
+		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+		BUG_ON(ir_index == -1);
+
+		memset (&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = 0; /* edge */
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = cfg->vector;
+		irte.dest_id = IRTE_DEST(dest);
+
+		modify_irte(irq, &irte);
 
 		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(ir_index) |
+				  MSI_ADDR_IR_INDEX2(ir_index);
+	} else
+#endif
+	{
+		msg->address_hi = MSI_ADDR_BASE_HI;
 		msg->address_lo =
 			MSI_ADDR_BASE_LO |
 			((INT_DEST_MODE == 0) ?
@@ -2361,6 +2395,55 @@
 	write_msi_msg(irq, &msg);
 	irq_desc[irq].affinity = mask;
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * atomically update the IRTE with the new destination and vector.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	irq_desc[irq].affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 
 /*
@@ -2378,26 +2461,157 @@
 	.retrigger	= ioapic_retrigger_irq,
 };
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+	.name		= "IR-PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_x2apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= ir_set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 {
+	struct intel_iommu *iommu;
+	int index;
+
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		return -ENOENT;
+	}
+
+	index = alloc_irte(iommu, irq, nvec);
+	if (index < 0) {
+		printk(KERN_ERR
+		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+		        pci_name(dev));
+		return -ENOSPC;
+	}
+	return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
+{
+	int ret;
 	struct msi_msg msg;
+
+	ret = msi_compose_msg(dev, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	set_irq_msi(irq, desc);
+	write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irq_desc *desc = irq_desc + irq;
+		/*
+		 * irq migration in process context
+		 */
+		desc->status |= IRQ_MOVE_PCNTXT;
+		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+	} else
+#endif
+		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+	return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
 	int irq, ret;
+
 	irq = create_irq();
 	if (irq < 0)
 		return irq;
 
-	ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled)
+		goto no_ir;
+
+	ret = msi_alloc_irte(dev, irq, 1);
+	if (ret < 0)
+		goto error;
+no_ir:
+#endif
+	ret = setup_msi_irq(dev, desc, irq);
 	if (ret < 0) {
 		destroy_irq(irq);
 		return ret;
 	}
+	return 0;
 
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+	destroy_irq(irq);
+	return ret;
+#endif
+}
 
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret, sub_handle;
+	struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+	struct intel_iommu *iommu = 0;
+	int index = 0;
+#endif
 
+	sub_handle = 0;
+	list_for_each_entry(desc, &dev->msi_list, list) {
+		irq = create_irq();
+		if (irq < 0)
+			return irq;
+#ifdef CONFIG_INTR_REMAP
+		if (!intr_remapping_enabled)
+			goto no_ir;
+
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_irte(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			iommu = map_dev_to_ir(dev);
+			if (!iommu) {
+				ret = -ENOENT;
+				goto error;
+			}
+			/*
+			 * setup the mapping between the irq and the IRTE
+			 * base index, the sub_handle pointing to the
+			 * appropriate interrupt remap table entry.
+			 */
+			set_irte_irq(irq, iommu, index, sub_handle);
+		}
+no_ir:
+#endif
+		ret = setup_msi_irq(dev, desc, irq);
+		if (ret < 0)
+			goto error;
+		sub_handle++;
+	}
 	return 0;
+
+error:
+	destroy_irq(irq);
+	return ret;
 }
 
 void arch_teardown_msi_irq(unsigned int irq)
Index: tree-x86/drivers/pci/intr_remapping.c
===================================================================
--- tree-x86.orig/drivers/pci/intr_remapping.c	2008-07-10 09:52:31.000000000 -0700
+++ tree-x86/drivers/pci/intr_remapping.c	2008-07-10 09:52:34.000000000 -0700
@@ -230,6 +230,17 @@
 	return NULL;
 }
 
+struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
+{
+	struct dmar_drhd_unit *drhd;
+
+	drhd = dmar_find_matched_drhd_unit(dev);
+	if (!drhd)
+		return NULL;
+
+	return drhd->iommu;
+}
+
 int free_irte(int irq)
 {
 	int index, i;
Index: tree-x86/include/linux/dmar.h
===================================================================
--- tree-x86.orig/include/linux/dmar.h	2008-07-10 09:52:31.000000000 -0700
+++ tree-x86/include/linux/dmar.h	2008-07-10 09:52:34.000000000 -0700
@@ -109,6 +109,7 @@
 extern int free_irte(int irq);
 
 extern int irq_remapped(int irq);
+extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
 extern struct intel_iommu *map_ioapic_to_ir(int apic);
 #else
 #define irq_remapped(irq)		(0)
Index: tree-x86/include/asm-x86/msidef.h
===================================================================
--- tree-x86.orig/include/asm-x86/msidef.h	2008-07-10 09:51:44.000000000 -0700
+++ tree-x86/include/asm-x86/msidef.h	2008-07-10 09:52:34.000000000 -0700
@@ -48,4 +48,8 @@
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
 					 MSI_ADDR_DEST_ID_MASK)
 
+#define MSI_ADDR_IR_EXT_INT		(1 << 4)
+#define MSI_ADDR_IR_SHV			(1 << 3)
+#define MSI_ADDR_IR_INDEX1(index)	((index & 0x8000) >> 13)
+#define MSI_ADDR_IR_INDEX2(index)	((index & 0x7fff) << 5)
 #endif /* ASM_MSIDEF_H */

-- 


  parent reply	other threads:[~2008-07-10 18:45 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-10 18:16 [patch 00/26] x64, x2apic/intr-remap: Interrupt-remapping and x2apic support Suresh Siddha
2008-07-10 18:16 ` [patch 01/26] x64, x2apic/intr-remap: Intel vt-d, IOMMU code reorganization Suresh Siddha
2008-07-10 18:16 ` [patch 02/26] x64, x2apic/intr-remap: fix the need for sequential array allocation of iommus Suresh Siddha
2008-07-10 18:16 ` [patch 03/26] x64, x2apic/intr-remap: code re-structuring, to be used by both DMA and Interrupt remapping Suresh Siddha
2008-07-10 18:16 ` [patch 04/26] x64, x2apic/intr-remap: use CONFIG_DMAR for DMA-remapping specific code Suresh Siddha
2008-07-10 18:16 ` [patch 05/26] x64, x2apic/intr-remap: Fix the need for RMRR in the DMA-remapping detection Suresh Siddha
2008-07-10 18:16 ` [patch 06/26] x64, x2apic/intr-remap: parse ioapic scope under vt-d structures Suresh Siddha
2008-07-10 18:16 ` [patch 07/26] x64, x2apic/intr-remap: move IOMMU_WAIT_OP() macro to intel-iommu.h Suresh Siddha
2008-07-10 18:16 ` [patch 08/26] x64, x2apic/intr-remap: Queued invalidation infrastructure (part of VT-d) Suresh Siddha
2008-07-10 18:16 ` [patch 09/26] x64, x2apic/intr-remap: Interrupt remapping infrastructure Suresh Siddha
2008-07-10 18:16 ` [patch 10/26] x64, x2apic/intr-remap: routines managing Interrupt remapping table entries Suresh Siddha
2008-07-10 18:16 ` [patch 11/26] x64, x2apic/intr-remap: generic irq migration support from process context Suresh Siddha
2008-07-10 23:08   ` Eric W. Biederman
2008-07-11  5:41     ` Suresh Siddha
2008-07-11  9:19       ` Eric W. Biederman
2008-07-10 18:16 ` [patch 12/26] x64, x2apic/intr-remap: 8259 specific mask/unmask routines Suresh Siddha
2008-07-10 18:16 ` [patch 13/26] x64, x2apic/intr-remap: ioapic routines which deal with initial io-apic RTE setup Suresh Siddha
2008-07-10 18:16 ` [patch 14/26] x64, x2apic/intr-remap: introduce read_apic_id() to genapic routines Suresh Siddha
2008-07-10 18:16 ` [patch 15/26] x64, x2apic/intr-remap: basic apic ops support Suresh Siddha
2008-07-10 18:16 ` [patch 16/26] x64, x2apic/intr-remap: cpuid bits for x2apic feature Suresh Siddha
2008-07-10 18:16 ` [patch 17/26] x64, x2apic/intr-remap: disable DMA-remapping if Interrupt-remapping is detected (temporary quirk) Suresh Siddha
2008-07-10 18:16 ` [patch 18/26] x64, x2apic/intr-remap: x2apic ops for x2apic mode support Suresh Siddha
2008-07-10 18:16 ` [patch 19/26] x64, x2apic/intr-remap: introcude self IPI to genapic routines Suresh Siddha
2008-07-10 23:34   ` Eric W. Biederman
2008-07-11  2:29     ` Mike Travis
2008-07-11  3:50       ` Eric W. Biederman
2008-07-11 13:55         ` Mike Travis
2008-07-10 18:16 ` [patch 20/26] x64, x2apic/intr-remap: x2apic cluster mode support Suresh Siddha
2008-07-10 18:16 ` [patch 21/26] x64, x2apic/intr-remap: setup init_apic_ldr for UV Suresh Siddha
2008-07-11  0:14   ` Andrew Morton
2008-07-11  1:56     ` Suresh Siddha
2008-07-10 18:16 ` [patch 22/26] x64, x2apic/intr-remap: IO-APIC support for interrupt-remapping Suresh Siddha
2008-07-10 18:16 ` Suresh Siddha [this message]
2008-07-11  1:22   ` [patch 23/26] x64, x2apic/intr-remap: MSI and MSI-X support for interrupt remapping infrastructure Eric W. Biederman
2008-07-11  6:07     ` Suresh Siddha
2008-07-11  8:59       ` Eric W. Biederman
2008-07-11 23:07         ` Suresh Siddha
2008-07-11 23:50           ` Eric W. Biederman
2008-07-10 18:16 ` [patch 24/26] x64, x2apic/intr-remap: add x2apic support, including enabling interrupt-remapping Suresh Siddha
2008-07-10 18:16 ` [patch 25/26] x64, x2apic/intr-remap: support for x2apic physical mode support Suresh Siddha
2008-07-10 18:17 ` [patch 26/26] x64, x2apic/intr-remap: introduce CONFIG_INTR_REMAP Suresh Siddha
2008-07-10 23:29   ` Eric W. Biederman
2008-07-10 23:37     ` Yong Wang
2008-07-11  1:50       ` Suresh Siddha
2008-07-11  1:53       ` Eric W. Biederman
2008-07-10 19:53 ` [patch 00/26] x64, x2apic/intr-remap: Interrupt-remapping and x2apic support Ingo Molnar
2008-07-10 20:22   ` Suresh Siddha
2008-07-10 21:56   ` Suresh Siddha
2008-07-11 10:28     ` Ingo Molnar
2008-07-11 20:09       ` Ingo Molnar
2008-07-11 20:31         ` Suresh Siddha
2008-07-11 20:42           ` Yinghai Lu
2008-07-11 20:45             ` Ingo Molnar
2008-07-11 21:24               ` Suresh Siddha
2008-07-11 22:02                 ` Yinghai Lu
2008-07-12  3:16                   ` Yinghai Lu
2008-07-12  3:52                     ` Eric W. Biederman
2008-07-12  6:17                       ` Yinghai Lu
2008-07-12  7:02                         ` Eric W. Biederman
2008-07-12  7:49                           ` Yinghai Lu
2008-07-12  8:11                             ` Eric W. Biederman
2008-07-12  8:37                               ` Yinghai Lu
2008-07-12  9:46                                 ` Eric W. Biederman
2008-07-13  1:02                                 ` Suresh Siddha
2008-07-13  1:01                             ` Suresh Siddha
2008-07-13  1:32                           ` Suresh Siddha
2008-07-13  1:00                         ` Suresh Siddha
2008-07-13  0:55                     ` Suresh Siddha
2008-07-12  5:37                   ` Ingo Molnar
2008-07-12  6:06                     ` Yinghai Lu
2008-07-12  6:45                       ` Ingo Molnar
2008-07-11 20:49           ` Ingo Molnar
2008-07-16 14:37   ` Yong Wang
2008-07-16 14:53     ` Ingo Molnar
2008-07-22 20:49   ` Andrew Morton
2008-07-22 21:00     ` Mike Travis
2008-07-22 21:14       ` Andrew Morton
2008-07-24  5:03       ` Ingo Molnar
2008-07-10 20:05 ` Eric W. Biederman
2008-07-10 20:18   ` Ingo Molnar
2008-07-10 21:07     ` Eric W. Biederman
2008-07-10 21:15   ` Suresh Siddha
2008-07-10 22:52     ` Eric W. Biederman
2008-07-11  2:35       ` Suresh Siddha
2008-07-11  3:15         ` Eric W. Biederman
2008-07-10 22:09   ` Arjan van de Ven
2008-07-10 22:54     ` Eric W. Biederman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080710182239.205057000@linux-os.sc.intel.com \
    --to=suresh.b.siddha@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=ebiederm@xmission.com \
    --cc=hpa@zytor.com \
    --cc=jbarnes@virtuousgeek.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=steiner@sgi.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox