[PATCH 0/5] HPET and IOMMU adjustments

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/5] HPET and IOMMU adjustments
@ 2012-11-21 10:06 Jan Beulich
  2012-11-21 10:16 ` [PATCH 1/5] x86/HPET: include FSB interrupt information in 'M' debug key output Jan Beulich
                   ` (5 more replies)
  0 siblings, 6 replies; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 10:06 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

The first three patches complete the 'M' debug key output, to
also include MSIs from HPET and IOMMUs.

The remaining two patched are logically unrelated, but apply only
on top of the earlier ones.

1: x86/HPET: include FSB interrupt information in 'M' debug key output
2: VT-d: include IOMMU interrupt information in 'M' debug key output
3: AMD IOMMU: include IOMMU interrupt information in 'M' debug key output
4: x86/HPET: fix FSB interrupt masking
5: VT-d: adjust IOMMU interrupt affinities when all CPUs are online

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/5] x86/HPET: include FSB interrupt information in 'M' debug key output
  2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
@ 2012-11-21 10:16 ` Jan Beulich
  2012-11-21 10:17 ` [PATCH 2/5] VT-d: include IOMMU " Jan Beulich
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 10:16 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

[-- Attachment #1: Type: text/plain, Size: 4619 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -239,6 +239,7 @@ static void hpet_msi_unmask(struct irq_d
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg |= HPET_TN_FSB;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+    ch->msi.msi_attrib.masked = 0;
 }
 
 static void hpet_msi_mask(struct irq_desc *desc)
@@ -249,6 +250,7 @@ static void hpet_msi_mask(struct irq_des
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg &= ~HPET_TN_FSB;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+    ch->msi.msi_attrib.masked = 1;
 }
 
 static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
@@ -346,6 +348,7 @@ static int __init hpet_setup_msi_irq(str
     }
 
     __hpet_setup_msi_irq(desc);
+    desc->msi_desc = &ch->msi;
 
     return 0;
 }
@@ -390,6 +393,17 @@ static void __init hpet_fsb_cap_lookup(v
         if ( !(cfg & HPET_TN_FSB_CAP) )
             continue;
 
+        /* hpet_setup(), via hpet_resume(), attempted to clear HPET_TN_FSB, so
+         * if it is still set... */
+        if ( !(cfg & HPET_TN_FSB) )
+            ch->msi.msi_attrib.maskbit = 1;
+        else
+        {
+            ch->msi.msi_attrib.maskbit = 0;
+            printk(XENLOG_WARNING "HPET: channel %u is not maskable (%04x)\n",
+                   i, cfg);
+        }
+
         if ( !zalloc_cpumask_var(&ch->cpumask) )
         {
             if ( !num_hpets_used )
@@ -573,6 +587,8 @@ void __init hpet_broadcast_init(void)
         spin_lock_init(&hpet_events[i].lock);
         wmb();
         hpet_events[i].event_handler = handle_hpet_broadcast;
+
+        hpet_events[i].msi.msi_attrib.pos = MSI_TYPE_HPET;
     }
 
     if ( !num_hpets_used )
@@ -795,7 +811,10 @@ void hpet_resume(u32 *boot_cfg)
     {
         cfg = hpet_read32(HPET_Tn_CFG(i));
         if ( boot_cfg )
+        {
             boot_cfg[i + 1] = cfg;
+            cfg &= ~HPET_TN_FSB;
+        }
         cfg &= ~HPET_TN_ENABLE;
         if ( cfg & HPET_TN_RESERVED )
         {
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1119,17 +1119,17 @@ static void dump_msi(unsigned char key)
 {
     unsigned int irq;
 
-    printk("PCI-MSI interrupt information:\n");
+    printk("MSI information:\n");
 
     for ( irq = 0; irq < nr_irqs; irq++ )
     {
         struct irq_desc *desc = irq_to_desc(irq);
         const struct msi_desc *entry;
         u32 addr, data, dest32;
-        int mask;
+        char mask;
         struct msi_attrib attr;
         unsigned long flags;
-        char type;
+        const char *type = "???";
 
         if ( !irq_desc_initialized(desc) )
             continue;
@@ -1145,21 +1145,30 @@ static void dump_msi(unsigned char key)
 
         switch ( entry->msi_attrib.type )
         {
-        case PCI_CAP_ID_MSI: type = ' '; break;
-        case PCI_CAP_ID_MSIX: type = 'X'; break;
-        default: type = '?'; break;
+        case PCI_CAP_ID_MSI: type = "MSI"; break;
+        case PCI_CAP_ID_MSIX: type = "MSI-X"; break;
+        case 0:
+            switch ( entry->msi_attrib.pos )
+            {
+            case MSI_TYPE_HPET: type = "HPET"; break;
+            case MSI_TYPE_IOMMU: type = "IOMMU"; break;
+            }
+            break;
         }
 
         data = entry->msg.data;
         addr = entry->msg.address_lo;
         dest32 = entry->msg.dest32;
         attr = entry->msi_attrib;
-        mask = msi_get_mask_bit(entry);
+        if ( entry->msi_attrib.type )
+            mask = msi_get_mask_bit(entry) ? '1' : '0';
+        else
+            mask = '?';
 
         spin_unlock_irqrestore(&desc->lock, flags);
 
-        printk(" MSI%c %4u vec=%02x%7s%6s%3sassert%5s%7s"
-               " dest=%08x mask=%d/%d/%d\n",
+        printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
+               " dest=%08x mask=%d/%d/%c\n",
                type, irq,
                (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
                data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -109,6 +109,14 @@ struct msi_desc {
 	int remap_index;		/* index in interrupt remapping table */
 };
 
+/*
+ * Values stored into msi_desc.msi_attrib.pos for non-PCI devices
+ * (msi_desc.msi_attrib.type is zero):
+ */
+#define MSI_TYPE_UNKNOWN 0
+#define MSI_TYPE_HPET    1
+#define MSI_TYPE_IOMMU   2
+
 int msi_maskable_irq(const struct msi_desc *);
 int msi_free_irq(struct msi_desc *entry);
 



[-- Attachment #2: x86-HPET-MSI-dump.patch --]
[-- Type: text/plain, Size: 4686 bytes --]

x86/HPET: include FSB interrupt information in 'M' debug key output

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -239,6 +239,7 @@ static void hpet_msi_unmask(struct irq_d
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg |= HPET_TN_FSB;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+    ch->msi.msi_attrib.masked = 0;
 }
 
 static void hpet_msi_mask(struct irq_desc *desc)
@@ -249,6 +250,7 @@ static void hpet_msi_mask(struct irq_des
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     cfg &= ~HPET_TN_FSB;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+    ch->msi.msi_attrib.masked = 1;
 }
 
 static void hpet_msi_write(struct hpet_event_channel *ch, struct msi_msg *msg)
@@ -346,6 +348,7 @@ static int __init hpet_setup_msi_irq(str
     }
 
     __hpet_setup_msi_irq(desc);
+    desc->msi_desc = &ch->msi;
 
     return 0;
 }
@@ -390,6 +393,17 @@ static void __init hpet_fsb_cap_lookup(v
         if ( !(cfg & HPET_TN_FSB_CAP) )
             continue;
 
+        /* hpet_setup(), via hpet_resume(), attempted to clear HPET_TN_FSB, so
+         * if it is still set... */
+        if ( !(cfg & HPET_TN_FSB) )
+            ch->msi.msi_attrib.maskbit = 1;
+        else
+        {
+            ch->msi.msi_attrib.maskbit = 0;
+            printk(XENLOG_WARNING "HPET: channel %u is not maskable (%04x)\n",
+                   i, cfg);
+        }
+
         if ( !zalloc_cpumask_var(&ch->cpumask) )
         {
             if ( !num_hpets_used )
@@ -573,6 +587,8 @@ void __init hpet_broadcast_init(void)
         spin_lock_init(&hpet_events[i].lock);
         wmb();
         hpet_events[i].event_handler = handle_hpet_broadcast;
+
+        hpet_events[i].msi.msi_attrib.pos = MSI_TYPE_HPET;
     }
 
     if ( !num_hpets_used )
@@ -795,7 +811,10 @@ void hpet_resume(u32 *boot_cfg)
     {
         cfg = hpet_read32(HPET_Tn_CFG(i));
         if ( boot_cfg )
+        {
             boot_cfg[i + 1] = cfg;
+            cfg &= ~HPET_TN_FSB;
+        }
         cfg &= ~HPET_TN_ENABLE;
         if ( cfg & HPET_TN_RESERVED )
         {
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -1119,17 +1119,17 @@ static void dump_msi(unsigned char key)
 {
     unsigned int irq;
 
-    printk("PCI-MSI interrupt information:\n");
+    printk("MSI information:\n");
 
     for ( irq = 0; irq < nr_irqs; irq++ )
     {
         struct irq_desc *desc = irq_to_desc(irq);
         const struct msi_desc *entry;
         u32 addr, data, dest32;
-        int mask;
+        char mask;
         struct msi_attrib attr;
         unsigned long flags;
-        char type;
+        const char *type = "???";
 
         if ( !irq_desc_initialized(desc) )
             continue;
@@ -1145,21 +1145,30 @@ static void dump_msi(unsigned char key)
 
         switch ( entry->msi_attrib.type )
         {
-        case PCI_CAP_ID_MSI: type = ' '; break;
-        case PCI_CAP_ID_MSIX: type = 'X'; break;
-        default: type = '?'; break;
+        case PCI_CAP_ID_MSI: type = "MSI"; break;
+        case PCI_CAP_ID_MSIX: type = "MSI-X"; break;
+        case 0:
+            switch ( entry->msi_attrib.pos )
+            {
+            case MSI_TYPE_HPET: type = "HPET"; break;
+            case MSI_TYPE_IOMMU: type = "IOMMU"; break;
+            }
+            break;
         }
 
         data = entry->msg.data;
         addr = entry->msg.address_lo;
         dest32 = entry->msg.dest32;
         attr = entry->msi_attrib;
-        mask = msi_get_mask_bit(entry);
+        if ( entry->msi_attrib.type )
+            mask = msi_get_mask_bit(entry) ? '1' : '0';
+        else
+            mask = '?';
 
         spin_unlock_irqrestore(&desc->lock, flags);
 
-        printk(" MSI%c %4u vec=%02x%7s%6s%3sassert%5s%7s"
-               " dest=%08x mask=%d/%d/%d\n",
+        printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
+               " dest=%08x mask=%d/%d/%c\n",
                type, irq,
                (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
                data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -109,6 +109,14 @@ struct msi_desc {
 	int remap_index;		/* index in interrupt remapping table */
 };
 
+/*
+ * Values stored into msi_desc.msi_attrib.pos for non-PCI devices
+ * (msi_desc.msi_attrib.type is zero):
+ */
+#define MSI_TYPE_UNKNOWN 0
+#define MSI_TYPE_HPET    1
+#define MSI_TYPE_IOMMU   2
+
 int msi_maskable_irq(const struct msi_desc *);
 int msi_free_irq(struct msi_desc *entry);
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/5] VT-d: include IOMMU interrupt information in 'M' debug key output
  2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
  2012-11-21 10:16 ` [PATCH 1/5] x86/HPET: include FSB interrupt information in 'M' debug key output Jan Beulich
@ 2012-11-21 10:17 ` Jan Beulich
  2012-11-21 10:18 ` [PATCH 3/5] AMD IOMMU: " Jan Beulich
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 10:17 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

[-- Attachment #1: Type: text/plain, Size: 4372 bytes --]

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1008,6 +1008,7 @@ static void dma_msi_unmask(struct irq_de
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
+    iommu->msi.msi_attrib.masked = 0;
 }
 
 static void dma_msi_mask(struct irq_desc *desc)
@@ -1019,6 +1020,7 @@ static void dma_msi_mask(struct irq_desc
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
+    iommu->msi.msi_attrib.masked = 1;
 }
 
 static unsigned int dma_msi_startup(struct irq_desc *desc)
@@ -1059,6 +1061,7 @@ static void dma_msi_set_affinity(struct 
         msg.address_hi = dest & 0xFFFFFF00;
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
+    iommu->msi.msg = msg;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg.data);
@@ -1082,6 +1085,8 @@ static int __init iommu_set_interrupt(st
 {
     int irq, ret;
     struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+    struct iommu *iommu = drhd->iommu;
+    struct irq_desc *desc;
 
     irq = create_irq(rhsa ? pxm_to_node(rhsa->proximity_domain)
                           : NUMA_NO_NODE);
@@ -1091,17 +1096,24 @@ static int __init iommu_set_interrupt(st
         return -EINVAL;
     }
 
-    irq_desc[irq].handler = &dma_msi_type;
-    ret = request_irq(irq, iommu_page_fault, 0, "dmar", drhd->iommu);
+    desc = irq_to_desc(irq);
+    desc->handler = &dma_msi_type;
+    ret = request_irq(irq, iommu_page_fault, 0, "dmar", iommu);
     if ( ret )
     {
-        irq_desc[irq].handler = &no_irq_type;
+        desc->handler = &no_irq_type;
         destroy_irq(irq);
         dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
         return ret;
     }
 
-    return irq;
+    iommu->msi.irq = irq;
+    iommu->msi.msi_attrib.pos = MSI_TYPE_IOMMU;
+    iommu->msi.msi_attrib.maskbit = 1;
+    iommu->msi.msi_attrib.is_64 = 1;
+    desc->msi_desc = &iommu->msi;
+
+    return 0;
 }
 
 int __init iommu_alloc(struct acpi_drhd_unit *drhd)
@@ -1121,7 +1133,7 @@ int __init iommu_alloc(struct acpi_drhd_
     if ( iommu == NULL )
         return -ENOMEM;
 
-    iommu->irq = -1; /* No irq assigned yet. */
+    iommu->msi.irq = -1; /* No irq assigned yet. */
 
     iommu->intel = alloc_intel_iommu();
     if ( iommu->intel == NULL )
@@ -1218,8 +1230,8 @@ void __init iommu_free(struct acpi_drhd_
     xfree(iommu->domid_map);
 
     free_intel_iommu(iommu->intel);
-    if ( iommu->irq >= 0 )
-        destroy_irq(iommu->irq);
+    if ( iommu->msi.irq >= 0 )
+        destroy_irq(iommu->msi.irq);
     xfree(iommu);
 }
 
@@ -1976,7 +1988,7 @@ static int init_vtd_hw(void)
 
         iommu = drhd->iommu;
 
-        desc = irq_to_desc(iommu->irq);
+        desc = irq_to_desc(iommu->msi.irq);
         dma_msi_set_affinity(desc, desc->arch.cpu_mask);
 
         clear_fault_bits(iommu);
@@ -2122,12 +2134,11 @@ int __init intel_vtd_setup(void)
             iommu_hap_pt_share = 0;
 
         ret = iommu_set_interrupt(drhd);
-        if ( ret < 0 )
+        if ( ret )
         {
             dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
             goto error;
         }
-        iommu->irq = ret;
     }
 
     softirq_tasklet_init(&vtd_fault_tasklet, do_iommu_page_fault, 0);
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -21,6 +21,7 @@
 #define _INTEL_IOMMU_H_
 
 #include <xen/iommu.h>
+#include <asm/msi.h>
 
 /*
  * Intel IOMMU register specification per version 1.0 public spec.
@@ -520,7 +521,7 @@ struct iommu {
     spinlock_t lock; /* protect context, domain ids */
     spinlock_t register_lock; /* protect iommu register handling */
     u64 root_maddr; /* root entry machine address */
-    int irq;
+    struct msi_desc msi;
     struct intel_iommu *intel;
     unsigned long *domid_bitmap;  /* domain id bitmap */
     u16 *domid_map;               /* domain id mapping array */



[-- Attachment #2: VT-d-MSI-dump.patch --]
[-- Type: text/plain, Size: 4437 bytes --]

VT-d: include IOMMU interrupt information in 'M' debug key output

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1008,6 +1008,7 @@ static void dma_msi_unmask(struct irq_de
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FECTL_REG, 0);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
+    iommu->msi.msi_attrib.masked = 0;
 }
 
 static void dma_msi_mask(struct irq_desc *desc)
@@ -1019,6 +1020,7 @@ static void dma_msi_mask(struct irq_desc
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FECTL_REG, DMA_FECTL_IM);
     spin_unlock_irqrestore(&iommu->register_lock, flags);
+    iommu->msi.msi_attrib.masked = 1;
 }
 
 static unsigned int dma_msi_startup(struct irq_desc *desc)
@@ -1059,6 +1061,7 @@ static void dma_msi_set_affinity(struct 
         msg.address_hi = dest & 0xFFFFFF00;
     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
     msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
+    iommu->msi.msg = msg;
 
     spin_lock_irqsave(&iommu->register_lock, flags);
     dmar_writel(iommu->reg, DMAR_FEDATA_REG, msg.data);
@@ -1082,6 +1085,8 @@ static int __init iommu_set_interrupt(st
 {
     int irq, ret;
     struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+    struct iommu *iommu = drhd->iommu;
+    struct irq_desc *desc;
 
     irq = create_irq(rhsa ? pxm_to_node(rhsa->proximity_domain)
                           : NUMA_NO_NODE);
@@ -1091,17 +1096,24 @@ static int __init iommu_set_interrupt(st
         return -EINVAL;
     }
 
-    irq_desc[irq].handler = &dma_msi_type;
-    ret = request_irq(irq, iommu_page_fault, 0, "dmar", drhd->iommu);
+    desc = irq_to_desc(irq);
+    desc->handler = &dma_msi_type;
+    ret = request_irq(irq, iommu_page_fault, 0, "dmar", iommu);
     if ( ret )
     {
-        irq_desc[irq].handler = &no_irq_type;
+        desc->handler = &no_irq_type;
         destroy_irq(irq);
         dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
         return ret;
     }
 
-    return irq;
+    iommu->msi.irq = irq;
+    iommu->msi.msi_attrib.pos = MSI_TYPE_IOMMU;
+    iommu->msi.msi_attrib.maskbit = 1;
+    iommu->msi.msi_attrib.is_64 = 1;
+    desc->msi_desc = &iommu->msi;
+
+    return 0;
 }
 
 int __init iommu_alloc(struct acpi_drhd_unit *drhd)
@@ -1121,7 +1133,7 @@ int __init iommu_alloc(struct acpi_drhd_
     if ( iommu == NULL )
         return -ENOMEM;
 
-    iommu->irq = -1; /* No irq assigned yet. */
+    iommu->msi.irq = -1; /* No irq assigned yet. */
 
     iommu->intel = alloc_intel_iommu();
     if ( iommu->intel == NULL )
@@ -1218,8 +1230,8 @@ void __init iommu_free(struct acpi_drhd_
     xfree(iommu->domid_map);
 
     free_intel_iommu(iommu->intel);
-    if ( iommu->irq >= 0 )
-        destroy_irq(iommu->irq);
+    if ( iommu->msi.irq >= 0 )
+        destroy_irq(iommu->msi.irq);
     xfree(iommu);
 }
 
@@ -1976,7 +1988,7 @@ static int init_vtd_hw(void)
 
         iommu = drhd->iommu;
 
-        desc = irq_to_desc(iommu->irq);
+        desc = irq_to_desc(iommu->msi.irq);
         dma_msi_set_affinity(desc, desc->arch.cpu_mask);
 
         clear_fault_bits(iommu);
@@ -2122,12 +2134,11 @@ int __init intel_vtd_setup(void)
             iommu_hap_pt_share = 0;
 
         ret = iommu_set_interrupt(drhd);
-        if ( ret < 0 )
+        if ( ret )
         {
             dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
             goto error;
         }
-        iommu->irq = ret;
     }
 
     softirq_tasklet_init(&vtd_fault_tasklet, do_iommu_page_fault, 0);
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -21,6 +21,7 @@
 #define _INTEL_IOMMU_H_
 
 #include <xen/iommu.h>
+#include <asm/msi.h>
 
 /*
  * Intel IOMMU register specification per version 1.0 public spec.
@@ -520,7 +521,7 @@ struct iommu {
     spinlock_t lock; /* protect context, domain ids */
     spinlock_t register_lock; /* protect iommu register handling */
     u64 root_maddr; /* root entry machine address */
-    int irq;
+    struct msi_desc msi;
     struct intel_iommu *intel;
     unsigned long *domid_bitmap;  /* domain id bitmap */
     u16 *domid_map;               /* domain id mapping array */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/5] AMD IOMMU: include IOMMU interrupt information in 'M' debug key output
  2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
  2012-11-21 10:16 ` [PATCH 1/5] x86/HPET: include FSB interrupt information in 'M' debug key output Jan Beulich
  2012-11-21 10:17 ` [PATCH 2/5] VT-d: include IOMMU " Jan Beulich
@ 2012-11-21 10:18 ` Jan Beulich
  2012-11-21 10:19 ` [PATCH 4/5] x86/HPET: fix FSB interrupt masking Jan Beulich
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 10:18 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

[-- Attachment #1: Type: text/plain, Size: 7892 bytes --]

Note that this also adds a few pieces missing from c/s
25903:5e4a00b4114c (relevant only when the PCI MSI mask bit is
supported by an IOMMU, which apparently isn't the case for existing
implementations).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -263,7 +263,7 @@ static void write_msi_msg(struct msi_des
     }
 }
 
-static void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
+void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
 {
     struct msi_msg msg;
     unsigned int dest;
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -39,7 +39,9 @@ static int __init get_iommu_msi_capabili
 
     AMD_IOMMU_DEBUG("Found MSI capability block at %#x\n", pos);
 
-    iommu->msi_cap = pos;
+    iommu->msi.msi_attrib.type = PCI_CAP_ID_MSI;
+    iommu->msi.msi_attrib.pos = pos;
+    iommu->msi.msi_attrib.is_64 = 1;
     return 0;
 }
 
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -449,42 +449,10 @@ static void iommu_reset_log(struct amd_i
     ctrl_func(iommu, IOMMU_CONTROL_ENABLED);
 }
 
-static void iommu_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
-{
-    struct msi_msg msg;
-    unsigned int dest;
-    struct amd_iommu *iommu = desc->action->dev_id;
-    u16 seg = iommu->seg;
-    u8 bus = PCI_BUS(iommu->bdf);
-    u8 dev = PCI_SLOT(iommu->bdf);
-    u8 func = PCI_FUNC(iommu->bdf);
-
-    dest = set_desc_affinity(desc, mask);
-
-    if ( dest == BAD_APICID )
-    {
-        dprintk(XENLOG_ERR, "Set iommu interrupt affinity error!\n");
-        return;
-    }
-
-    msi_compose_msg(desc, &msg);
-    /* Is this override really needed? */
-    msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-    msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
-
-    pci_conf_write32(seg, bus, dev, func,
-        iommu->msi_cap + PCI_MSI_DATA_64, msg.data);
-    pci_conf_write32(seg, bus, dev, func,
-        iommu->msi_cap + PCI_MSI_ADDRESS_LO, msg.address_lo);
-    pci_conf_write32(seg, bus, dev, func,
-        iommu->msi_cap + PCI_MSI_ADDRESS_HI, msg.address_hi);
-    
-}
-
 static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
 {
     __msi_set_enable(iommu->seg, PCI_BUS(iommu->bdf), PCI_SLOT(iommu->bdf),
-                     PCI_FUNC(iommu->bdf), iommu->msi_cap, flag);
+                     PCI_FUNC(iommu->bdf), iommu->msi.msi_attrib.pos, flag);
 }
 
 static void iommu_msi_unmask(struct irq_desc *desc)
@@ -495,6 +463,7 @@ static void iommu_msi_unmask(struct irq_
     spin_lock_irqsave(&iommu->lock, flags);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
     spin_unlock_irqrestore(&iommu->lock, flags);
+    iommu->msi.msi_attrib.masked = 0;
 }
 
 static void iommu_msi_mask(struct irq_desc *desc)
@@ -507,6 +476,7 @@ static void iommu_msi_mask(struct irq_de
     spin_lock_irqsave(&iommu->lock, flags);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
     spin_unlock_irqrestore(&iommu->lock, flags);
+    iommu->msi.msi_attrib.masked = 1;
 }
 
 static unsigned int iommu_msi_startup(struct irq_desc *desc)
@@ -530,7 +500,7 @@ static hw_irq_controller iommu_msi_type 
     .disable = iommu_msi_mask,
     .ack = iommu_msi_mask,
     .end = iommu_msi_end,
-    .set_affinity = iommu_msi_set_affinity,
+    .set_affinity = set_msi_affinity,
 };
 
 static unsigned int iommu_maskable_msi_startup(struct irq_desc *desc)
@@ -561,7 +531,7 @@ static hw_irq_controller iommu_maskable_
     .disable = mask_msi_irq,
     .ack = iommu_maskable_msi_ack,
     .end = iommu_maskable_msi_end,
-    .set_affinity = iommu_msi_set_affinity,
+    .set_affinity = set_msi_affinity,
 };
 
 static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
@@ -775,9 +745,11 @@ static void iommu_interrupt_handler(int 
     tasklet_schedule(&amd_iommu_irq_tasklet);
 }
 
-static int __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
+static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
 {
     int irq, ret;
+    struct irq_desc *desc;
+    unsigned long flags;
     u16 control;
 
     irq = create_irq(NUMA_NO_NODE);
@@ -786,23 +758,38 @@ static int __init set_iommu_interrupt_ha
         dprintk(XENLOG_ERR, "IOMMU: no irqs\n");
         return 0;
     }
-    
+
+    desc = irq_to_desc(irq);
+    spin_lock_irqsave(&pcidevs_lock, flags);
+    iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf),
+                                  PCI_DEVFN2(iommu->bdf));
+    spin_unlock_irqrestore(&pcidevs_lock, flags);
+    if ( !iommu->msi.dev )
+    {
+        AMD_IOMMU_DEBUG("IOMMU: no pdev for %04x:%02x:%02x.%u\n",
+                        iommu->seg, PCI_BUS(iommu->bdf),
+                        PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
+        return 0;
+    }
+    desc->msi_desc = &iommu->msi;
     control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf),
                               PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
-                              iommu->msi_cap + PCI_MSI_FLAGS);
-    irq_desc[irq].handler = control & PCI_MSI_FLAGS_MASKBIT ?
-                            &iommu_maskable_msi_type : &iommu_msi_type;
+                              iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
+    iommu->msi.msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
+    desc->handler = control & PCI_MSI_FLAGS_MASKBIT ?
+                    &iommu_maskable_msi_type : &iommu_msi_type;
     ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
     if ( ret )
     {
-        irq_desc[irq].handler = &no_irq_type;
+        desc->handler = &no_irq_type;
         destroy_irq(irq);
         AMD_IOMMU_DEBUG("can't request irq\n");
         return 0;
     }
 
-    iommu->irq = irq;
-    return irq;
+    iommu->msi.irq = irq;
+
+    return 1;
 }
 
 static void enable_iommu(struct amd_iommu *iommu)
@@ -825,7 +812,7 @@ static void enable_iommu(struct amd_iomm
     if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
         register_iommu_ppr_log_in_mmio_space(iommu);
 
-    iommu_msi_set_affinity(irq_to_desc(iommu->irq), &cpu_online_map);
+    set_msi_affinity(irq_to_desc(iommu->msi.irq), &cpu_online_map);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 
     set_iommu_ht_flags(iommu);
@@ -947,7 +934,7 @@ static int __init amd_iommu_init_one(str
         if ( allocate_ppr_log(iommu) == NULL )
             goto error_out;
 
-    if ( set_iommu_interrupt_handler(iommu) == 0 )
+    if ( !set_iommu_interrupt_handler(iommu) )
         goto error_out;
 
     /* To make sure that device_table.buffer has been successfully allocated */
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -25,6 +25,7 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <xen/tasklet.h>
+#include <asm/msi.h>
 #include <asm/hvm/svm/amd-iommu-defs.h>
 
 #define iommu_found()           (!list_empty(&amd_iommu_head))
@@ -82,8 +83,9 @@ struct amd_iommu {
 
     u16 seg;
     u16 bdf;
+    struct msi_desc msi;
+
     u16 cap_offset;
-    u8 msi_cap;
     iommu_cap_t cap;
 
     u8 ht_flags;
@@ -103,7 +105,6 @@ struct amd_iommu {
     uint64_t exclusion_limit;
 
     int enabled;
-    int irq;
 };
 
 struct ivrs_mappings {
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -214,5 +214,6 @@ void mask_msi_irq(struct irq_desc *);
 void unmask_msi_irq(struct irq_desc *);
 void ack_nonmaskable_msi_irq(struct irq_desc *);
 void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
+void set_msi_affinity(struct irq_desc *, const cpumask_t *);
 
 #endif /* __ASM_MSI_H */



[-- Attachment #2: AMD-IOMMU-MSI-dump.patch --]
[-- Type: text/plain, Size: 7962 bytes --]

AMD IOMMU: include IOMMU interrupt information in 'M' debug key output

Note that this also adds a few pieces missing from c/s
25903:5e4a00b4114c (relevant only when the PCI MSI mask bit is
supported by an IOMMU, which apparently isn't the case for existing
implementations).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -263,7 +263,7 @@ static void write_msi_msg(struct msi_des
     }
 }
 
-static void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
+void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
 {
     struct msi_msg msg;
     unsigned int dest;
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -39,7 +39,9 @@ static int __init get_iommu_msi_capabili
 
     AMD_IOMMU_DEBUG("Found MSI capability block at %#x\n", pos);
 
-    iommu->msi_cap = pos;
+    iommu->msi.msi_attrib.type = PCI_CAP_ID_MSI;
+    iommu->msi.msi_attrib.pos = pos;
+    iommu->msi.msi_attrib.is_64 = 1;
     return 0;
 }
 
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -449,42 +449,10 @@ static void iommu_reset_log(struct amd_i
     ctrl_func(iommu, IOMMU_CONTROL_ENABLED);
 }
 
-static void iommu_msi_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
-{
-    struct msi_msg msg;
-    unsigned int dest;
-    struct amd_iommu *iommu = desc->action->dev_id;
-    u16 seg = iommu->seg;
-    u8 bus = PCI_BUS(iommu->bdf);
-    u8 dev = PCI_SLOT(iommu->bdf);
-    u8 func = PCI_FUNC(iommu->bdf);
-
-    dest = set_desc_affinity(desc, mask);
-
-    if ( dest == BAD_APICID )
-    {
-        dprintk(XENLOG_ERR, "Set iommu interrupt affinity error!\n");
-        return;
-    }
-
-    msi_compose_msg(desc, &msg);
-    /* Is this override really needed? */
-    msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-    msg.address_lo |= MSI_ADDR_DEST_ID(dest & 0xff);
-
-    pci_conf_write32(seg, bus, dev, func,
-        iommu->msi_cap + PCI_MSI_DATA_64, msg.data);
-    pci_conf_write32(seg, bus, dev, func,
-        iommu->msi_cap + PCI_MSI_ADDRESS_LO, msg.address_lo);
-    pci_conf_write32(seg, bus, dev, func,
-        iommu->msi_cap + PCI_MSI_ADDRESS_HI, msg.address_hi);
-    
-}
-
 static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
 {
     __msi_set_enable(iommu->seg, PCI_BUS(iommu->bdf), PCI_SLOT(iommu->bdf),
-                     PCI_FUNC(iommu->bdf), iommu->msi_cap, flag);
+                     PCI_FUNC(iommu->bdf), iommu->msi.msi_attrib.pos, flag);
 }
 
 static void iommu_msi_unmask(struct irq_desc *desc)
@@ -495,6 +463,7 @@ static void iommu_msi_unmask(struct irq_
     spin_lock_irqsave(&iommu->lock, flags);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
     spin_unlock_irqrestore(&iommu->lock, flags);
+    iommu->msi.msi_attrib.masked = 0;
 }
 
 static void iommu_msi_mask(struct irq_desc *desc)
@@ -507,6 +476,7 @@ static void iommu_msi_mask(struct irq_de
     spin_lock_irqsave(&iommu->lock, flags);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
     spin_unlock_irqrestore(&iommu->lock, flags);
+    iommu->msi.msi_attrib.masked = 1;
 }
 
 static unsigned int iommu_msi_startup(struct irq_desc *desc)
@@ -530,7 +500,7 @@ static hw_irq_controller iommu_msi_type 
     .disable = iommu_msi_mask,
     .ack = iommu_msi_mask,
     .end = iommu_msi_end,
-    .set_affinity = iommu_msi_set_affinity,
+    .set_affinity = set_msi_affinity,
 };
 
 static unsigned int iommu_maskable_msi_startup(struct irq_desc *desc)
@@ -561,7 +531,7 @@ static hw_irq_controller iommu_maskable_
     .disable = mask_msi_irq,
     .ack = iommu_maskable_msi_ack,
     .end = iommu_maskable_msi_end,
-    .set_affinity = iommu_msi_set_affinity,
+    .set_affinity = set_msi_affinity,
 };
 
 static void parse_event_log_entry(struct amd_iommu *iommu, u32 entry[])
@@ -775,9 +745,11 @@ static void iommu_interrupt_handler(int 
     tasklet_schedule(&amd_iommu_irq_tasklet);
 }
 
-static int __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
+static bool_t __init set_iommu_interrupt_handler(struct amd_iommu *iommu)
 {
     int irq, ret;
+    struct irq_desc *desc;
+    unsigned long flags;
     u16 control;
 
     irq = create_irq(NUMA_NO_NODE);
@@ -786,23 +758,38 @@ static int __init set_iommu_interrupt_ha
         dprintk(XENLOG_ERR, "IOMMU: no irqs\n");
         return 0;
     }
-    
+
+    desc = irq_to_desc(irq);
+    spin_lock_irqsave(&pcidevs_lock, flags);
+    iommu->msi.dev = pci_get_pdev(iommu->seg, PCI_BUS(iommu->bdf),
+                                  PCI_DEVFN2(iommu->bdf));
+    spin_unlock_irqrestore(&pcidevs_lock, flags);
+    if ( !iommu->msi.dev )
+    {
+        AMD_IOMMU_DEBUG("IOMMU: no pdev for %04x:%02x:%02x.%u\n",
+                        iommu->seg, PCI_BUS(iommu->bdf),
+                        PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf));
+        return 0;
+    }
+    desc->msi_desc = &iommu->msi;
     control = pci_conf_read16(iommu->seg, PCI_BUS(iommu->bdf),
                               PCI_SLOT(iommu->bdf), PCI_FUNC(iommu->bdf),
-                              iommu->msi_cap + PCI_MSI_FLAGS);
-    irq_desc[irq].handler = control & PCI_MSI_FLAGS_MASKBIT ?
-                            &iommu_maskable_msi_type : &iommu_msi_type;
+                              iommu->msi.msi_attrib.pos + PCI_MSI_FLAGS);
+    iommu->msi.msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
+    desc->handler = control & PCI_MSI_FLAGS_MASKBIT ?
+                    &iommu_maskable_msi_type : &iommu_msi_type;
     ret = request_irq(irq, iommu_interrupt_handler, 0, "amd_iommu", iommu);
     if ( ret )
     {
-        irq_desc[irq].handler = &no_irq_type;
+        desc->handler = &no_irq_type;
         destroy_irq(irq);
         AMD_IOMMU_DEBUG("can't request irq\n");
         return 0;
     }
 
-    iommu->irq = irq;
-    return irq;
+    iommu->msi.irq = irq;
+
+    return 1;
 }
 
 static void enable_iommu(struct amd_iommu *iommu)
@@ -825,7 +812,7 @@ static void enable_iommu(struct amd_iomm
     if ( iommu_has_feature(iommu, IOMMU_EXT_FEATURE_PPRSUP_SHIFT) )
         register_iommu_ppr_log_in_mmio_space(iommu);
 
-    iommu_msi_set_affinity(irq_to_desc(iommu->irq), &cpu_online_map);
+    set_msi_affinity(irq_to_desc(iommu->msi.irq), &cpu_online_map);
     amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 
     set_iommu_ht_flags(iommu);
@@ -947,7 +934,7 @@ static int __init amd_iommu_init_one(str
         if ( allocate_ppr_log(iommu) == NULL )
             goto error_out;
 
-    if ( set_iommu_interrupt_handler(iommu) == 0 )
+    if ( !set_iommu_interrupt_handler(iommu) )
         goto error_out;
 
     /* To make sure that device_table.buffer has been successfully allocated */
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -25,6 +25,7 @@
 #include <xen/list.h>
 #include <xen/spinlock.h>
 #include <xen/tasklet.h>
+#include <asm/msi.h>
 #include <asm/hvm/svm/amd-iommu-defs.h>
 
 #define iommu_found()           (!list_empty(&amd_iommu_head))
@@ -82,8 +83,9 @@ struct amd_iommu {
 
     u16 seg;
     u16 bdf;
+    struct msi_desc msi;
+
     u16 cap_offset;
-    u8 msi_cap;
     iommu_cap_t cap;
 
     u8 ht_flags;
@@ -103,7 +105,6 @@ struct amd_iommu {
     uint64_t exclusion_limit;
 
     int enabled;
-    int irq;
 };
 
 struct ivrs_mappings {
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -214,5 +214,6 @@ void mask_msi_irq(struct irq_desc *);
 void unmask_msi_irq(struct irq_desc *);
 void ack_nonmaskable_msi_irq(struct irq_desc *);
 void end_nonmaskable_msi_irq(struct irq_desc *, u8 vector);
+void set_msi_affinity(struct irq_desc *, const cpumask_t *);
 
 #endif /* __ASM_MSI_H */

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/5] x86/HPET: fix FSB interrupt masking
  2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
                   ` (2 preceding siblings ...)
  2012-11-21 10:18 ` [PATCH 3/5] AMD IOMMU: " Jan Beulich
@ 2012-11-21 10:19 ` Jan Beulich
  2012-11-21 10:19 ` [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online Jan Beulich
  2012-11-21 11:11 ` [PATCH 0/5] HPET and IOMMU adjustments Keir Fraser
  5 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 10:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

[-- Attachment #1: Type: text/plain, Size: 4578 bytes --]

HPET_TN_FSB is not really suitable for masking interrupts - it merely
switches between the two delivery methods. The right way of masking is
through the HPET_TN_ENABLE bit (which really is an interrupt enable,
not a counter enable or some such). This is even more so with certain
chip sets not even allowing HPET_TN_FSB to be cleared on some of the
channels.

Further, all the setup of the channel should happen before actually
enabling the interrupt, which requires splitting legacy and FSB logic.

Finally this also fixes an S3 resume problem (HPET_TN_FSB did not get
set in hpet_broadcast_resume(), and hpet_msi_unmask() doesn't get
called from the general resume code either afaict).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -237,7 +237,7 @@ static void hpet_msi_unmask(struct irq_d
     struct hpet_event_channel *ch = desc->action->dev_id;
 
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
-    cfg |= HPET_TN_FSB;
+    cfg |= HPET_TN_ENABLE;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
     ch->msi.msi_attrib.masked = 0;
 }
@@ -248,7 +248,7 @@ static void hpet_msi_mask(struct irq_des
     struct hpet_event_channel *ch = desc->action->dev_id;
 
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
-    cfg &= ~HPET_TN_FSB;
+    cfg &= ~HPET_TN_ENABLE;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
     ch->msi.msi_attrib.masked = 1;
 }
@@ -328,6 +328,7 @@ static void __hpet_setup_msi_irq(struct 
 static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch)
 {
     int ret;
+    u32 cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     irq_desc_t *desc = irq_to_desc(ch->msi.irq);
 
     if ( iommu_intremap )
@@ -338,6 +339,11 @@ static int __init hpet_setup_msi_irq(str
             return ret;
     }
 
+    /* set HPET Tn as oneshot */
+    cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+    cfg |= HPET_TN_FSB | HPET_TN_32BIT;
+    hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+
     desc->handler = &hpet_msi_type;
     ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0, "HPET", ch);
     if ( ret < 0 )
@@ -393,17 +399,6 @@ static void __init hpet_fsb_cap_lookup(v
         if ( !(cfg & HPET_TN_FSB_CAP) )
             continue;
 
-        /* hpet_setup(), via hpet_resume(), attempted to clear HPET_TN_FSB, so
-         * if it is still set... */
-        if ( !(cfg & HPET_TN_FSB) )
-            ch->msi.msi_attrib.maskbit = 1;
-        else
-        {
-            ch->msi.msi_attrib.maskbit = 0;
-            printk(XENLOG_WARNING "HPET: channel %u is not maskable (%04x)\n",
-                   i, cfg);
-        }
-
         if ( !zalloc_cpumask_var(&ch->cpumask) )
         {
             if ( !num_hpets_used )
@@ -570,11 +565,14 @@ void __init hpet_broadcast_init(void)
 
     for ( i = 0; i < n; i++ )
     {
-        /* set HPET Tn as oneshot */
-        cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
-        cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
-        cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-        hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
+        if ( i == 0 && (cfg & HPET_CFG_LEGACY) )
+        {
+            /* set HPET T0 as oneshot */
+            cfg = hpet_read32(HPET_Tn_CFG(0));
+            cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+            cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+            hpet_write32(cfg, HPET_Tn_CFG(0));
+        }
 
         /*
          * The period is a femto seconds value. We need to calculate the scaled
@@ -588,6 +586,7 @@ void __init hpet_broadcast_init(void)
         wmb();
         hpet_events[i].event_handler = handle_hpet_broadcast;
 
+        hpet_events[i].msi.msi_attrib.maskbit = 1;
         hpet_events[i].msi.msi_attrib.pos = MSI_TYPE_HPET;
     }
 
@@ -633,6 +632,8 @@ void hpet_broadcast_resume(void)
         cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
         cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
         cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+        if ( !(hpet_events[i].flags & HPET_EVT_LEGACY) )
+            cfg |= HPET_TN_FSB;
         hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
 
         hpet_events[i].next_event = STIME_MAX;
@@ -811,10 +812,7 @@ void hpet_resume(u32 *boot_cfg)
     {
         cfg = hpet_read32(HPET_Tn_CFG(i));
         if ( boot_cfg )
-        {
             boot_cfg[i + 1] = cfg;
-            cfg &= ~HPET_TN_FSB;
-        }
         cfg &= ~HPET_TN_ENABLE;
         if ( cfg & HPET_TN_RESERVED )
         {



[-- Attachment #2: x86-HPET-masking.patch --]
[-- Type: text/plain, Size: 4613 bytes --]

x86/HPET: fix FSB interrupt masking

HPET_TN_FSB is not really suitable for masking interrupts - it merely
switches between the two delivery methods. The right way of masking is
through the HPET_TN_ENABLE bit (which really is an interrupt enable,
not a counter enable or some such). This is even more so with certain
chip sets not even allowing HPET_TN_FSB to be cleared on some of the
channels.

Further, all the setup of the channel should happen before actually
enabling the interrupt, which requires splitting legacy and FSB logic.

Finally this also fixes an S3 resume problem (HPET_TN_FSB did not get
set in hpet_broadcast_resume(), and hpet_msi_unmask() doesn't get
called from the general resume code either afaict).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/hpet.c
+++ b/xen/arch/x86/hpet.c
@@ -237,7 +237,7 @@ static void hpet_msi_unmask(struct irq_d
     struct hpet_event_channel *ch = desc->action->dev_id;
 
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
-    cfg |= HPET_TN_FSB;
+    cfg |= HPET_TN_ENABLE;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
     ch->msi.msi_attrib.masked = 0;
 }
@@ -248,7 +248,7 @@ static void hpet_msi_mask(struct irq_des
     struct hpet_event_channel *ch = desc->action->dev_id;
 
     cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
-    cfg &= ~HPET_TN_FSB;
+    cfg &= ~HPET_TN_ENABLE;
     hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
     ch->msi.msi_attrib.masked = 1;
 }
@@ -328,6 +328,7 @@ static void __hpet_setup_msi_irq(struct 
 static int __init hpet_setup_msi_irq(struct hpet_event_channel *ch)
 {
     int ret;
+    u32 cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
     irq_desc_t *desc = irq_to_desc(ch->msi.irq);
 
     if ( iommu_intremap )
@@ -338,6 +339,11 @@ static int __init hpet_setup_msi_irq(str
             return ret;
     }
 
+    /* set HPET Tn as oneshot */
+    cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+    cfg |= HPET_TN_FSB | HPET_TN_32BIT;
+    hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+
     desc->handler = &hpet_msi_type;
     ret = request_irq(ch->msi.irq, hpet_interrupt_handler, 0, "HPET", ch);
     if ( ret < 0 )
@@ -393,17 +399,6 @@ static void __init hpet_fsb_cap_lookup(v
         if ( !(cfg & HPET_TN_FSB_CAP) )
             continue;
 
-        /* hpet_setup(), via hpet_resume(), attempted to clear HPET_TN_FSB, so
-         * if it is still set... */
-        if ( !(cfg & HPET_TN_FSB) )
-            ch->msi.msi_attrib.maskbit = 1;
-        else
-        {
-            ch->msi.msi_attrib.maskbit = 0;
-            printk(XENLOG_WARNING "HPET: channel %u is not maskable (%04x)\n",
-                   i, cfg);
-        }
-
         if ( !zalloc_cpumask_var(&ch->cpumask) )
         {
             if ( !num_hpets_used )
@@ -570,11 +565,14 @@ void __init hpet_broadcast_init(void)
 
     for ( i = 0; i < n; i++ )
     {
-        /* set HPET Tn as oneshot */
-        cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
-        cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
-        cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-        hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
+        if ( i == 0 && (cfg & HPET_CFG_LEGACY) )
+        {
+            /* set HPET T0 as oneshot */
+            cfg = hpet_read32(HPET_Tn_CFG(0));
+            cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
+            cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+            hpet_write32(cfg, HPET_Tn_CFG(0));
+        }
 
         /*
          * The period is a femto seconds value. We need to calculate the scaled
@@ -588,6 +586,7 @@ void __init hpet_broadcast_init(void)
         wmb();
         hpet_events[i].event_handler = handle_hpet_broadcast;
 
+        hpet_events[i].msi.msi_attrib.maskbit = 1;
         hpet_events[i].msi.msi_attrib.pos = MSI_TYPE_HPET;
     }
 
@@ -633,6 +632,8 @@ void hpet_broadcast_resume(void)
         cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
         cfg &= ~(HPET_TN_LEVEL | HPET_TN_PERIODIC);
         cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+        if ( !(hpet_events[i].flags & HPET_EVT_LEGACY) )
+            cfg |= HPET_TN_FSB;
         hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
 
         hpet_events[i].next_event = STIME_MAX;
@@ -811,10 +812,7 @@ void hpet_resume(u32 *boot_cfg)
     {
         cfg = hpet_read32(HPET_Tn_CFG(i));
         if ( boot_cfg )
-        {
             boot_cfg[i + 1] = cfg;
-            cfg &= ~HPET_TN_FSB;
-        }
         cfg &= ~HPET_TN_ENABLE;
         if ( cfg & HPET_TN_RESERVED )
         {

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
                   ` (3 preceding siblings ...)
  2012-11-21 10:19 ` [PATCH 4/5] x86/HPET: fix FSB interrupt masking Jan Beulich
@ 2012-11-21 10:19 ` Jan Beulich
  2012-11-21 11:10   ` Keir Fraser
  2012-11-21 11:11 ` [PATCH 0/5] HPET and IOMMU adjustments Keir Fraser
  5 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 10:19 UTC (permalink / raw)
  To: xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

[-- Attachment #1: Type: text/plain, Size: 2703 bytes --]

Since these interrupts get setup before APs get brought online, their
affinities naturally could only ever point to CPU 0 alone so far.
Adjust this to include potentially multiple CPUs in the target mask
(when running in one of the cluster modes), and take into account NUMA
information (to handle the interrupts on a CPU on the node where the
respective IOMMU is).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -839,6 +839,7 @@ void acpi_dmar_reinstate(void)
 
 void acpi_dmar_zap(void)
 {
+    adjust_vtd_irq_affinities();
     if ( dmar_table == NULL )
         return;
     dmar_table->signature[0] = 'X';
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -52,6 +52,7 @@ int invalidate_sync(struct iommu *iommu)
 int iommu_flush_iec_global(struct iommu *iommu);
 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
 void clear_fault_bits(struct iommu *iommu);
+int adjust_vtd_irq_affinities(void);
 
 struct iommu * ioapic_to_iommu(unsigned int apic_id);
 struct iommu * hpet_to_iommu(unsigned int hpet_id);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1971,6 +1971,33 @@ void clear_fault_bits(struct iommu *iomm
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+static void adjust_irq_affinity(struct acpi_drhd_unit *drhd)
+{
+    const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+    unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
+                             : NUMA_NO_NODE;
+    const cpumask_t *cpumask = &cpu_online_map;
+
+    if ( node < MAX_NUMNODES && node_online(node) &&
+         cpumask_intersects(&node_to_cpumask(node), cpumask) )
+        cpumask = &node_to_cpumask(node);
+    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
+}
+
+int adjust_vtd_irq_affinities(void)
+{
+    struct acpi_drhd_unit *drhd;
+
+    if ( !iommu_enabled )
+        return 0;
+
+    for_each_drhd_unit ( drhd )
+        adjust_irq_affinity(drhd);
+
+    return 0;
+}
+__initcall(adjust_vtd_irq_affinities);
+
 static int init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
@@ -1984,13 +2011,10 @@ static int init_vtd_hw(void)
      */
     for_each_drhd_unit ( drhd )
     {
-        struct irq_desc *desc;
+        adjust_irq_affinity(drhd);
 
         iommu = drhd->iommu;
 
-        desc = irq_to_desc(iommu->msi.irq);
-        dma_msi_set_affinity(desc, desc->arch.cpu_mask);
-
         clear_fault_bits(iommu);
 
         spin_lock_irqsave(&iommu->register_lock, flags);




[-- Attachment #2: VT-d-interrupt-affinity.patch --]
[-- Type: text/plain, Size: 2765 bytes --]

VT-d: adjust IOMMU interrupt affinities when all CPUs are online

Since these interrupts get setup before APs get brought online, their
affinities naturally could only ever point to CPU 0 alone so far.
Adjust this to include potentially multiple CPUs in the target mask
(when running in one of the cluster modes), and take into account NUMA
information (to handle the interrupts on a CPU on the node where the
respective IOMMU is).

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -839,6 +839,7 @@ void acpi_dmar_reinstate(void)
 
 void acpi_dmar_zap(void)
 {
+    adjust_vtd_irq_affinities();
     if ( dmar_table == NULL )
         return;
     dmar_table->signature[0] = 'X';
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -52,6 +52,7 @@ int invalidate_sync(struct iommu *iommu)
 int iommu_flush_iec_global(struct iommu *iommu);
 int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
 void clear_fault_bits(struct iommu *iommu);
+int adjust_vtd_irq_affinities(void);
 
 struct iommu * ioapic_to_iommu(unsigned int apic_id);
 struct iommu * hpet_to_iommu(unsigned int hpet_id);
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1971,6 +1971,33 @@ void clear_fault_bits(struct iommu *iomm
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+static void adjust_irq_affinity(struct acpi_drhd_unit *drhd)
+{
+    const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+    unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
+                             : NUMA_NO_NODE;
+    const cpumask_t *cpumask = &cpu_online_map;
+
+    if ( node < MAX_NUMNODES && node_online(node) &&
+         cpumask_intersects(&node_to_cpumask(node), cpumask) )
+        cpumask = &node_to_cpumask(node);
+    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
+}
+
+int adjust_vtd_irq_affinities(void)
+{
+    struct acpi_drhd_unit *drhd;
+
+    if ( !iommu_enabled )
+        return 0;
+
+    for_each_drhd_unit ( drhd )
+        adjust_irq_affinity(drhd);
+
+    return 0;
+}
+__initcall(adjust_vtd_irq_affinities);
+
 static int init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
@@ -1984,13 +2011,10 @@ static int init_vtd_hw(void)
      */
     for_each_drhd_unit ( drhd )
     {
-        struct irq_desc *desc;
+        adjust_irq_affinity(drhd);
 
         iommu = drhd->iommu;
 
-        desc = irq_to_desc(iommu->msi.irq);
-        dma_msi_set_affinity(desc, desc->arch.cpu_mask);
-
         clear_fault_bits(iommu);
 
         spin_lock_irqsave(&iommu->register_lock, flags);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 10:19 ` [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online Jan Beulich
@ 2012-11-21 11:10   ` Keir Fraser
  2012-11-21 11:17     ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Keir Fraser @ 2012-11-21 11:10 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

On 21/11/2012 10:19, "Jan Beulich" <JBeulich@suse.com> wrote:

> Since these interrupts get setup before APs get brought online, their
> affinities naturally could only ever point to CPU 0 alone so far.
> Adjust this to include potentially multiple CPUs in the target mask
> (when running in one of the cluster modes), and take into account NUMA
> information (to handle the interrupts on a CPU on the node where the
> respective IOMMU is).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> 
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -839,6 +839,7 @@ void acpi_dmar_reinstate(void)
>  
>  void acpi_dmar_zap(void)
>  {
> +    adjust_vtd_irq_affinities();

Is this just a handy place to hook? Does it logically make sense?

 -- Keir

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/5] HPET and IOMMU adjustments
  2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
                   ` (4 preceding siblings ...)
  2012-11-21 10:19 ` [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online Jan Beulich
@ 2012-11-21 11:11 ` Keir Fraser
  5 siblings, 0 replies; 13+ messages in thread
From: Keir Fraser @ 2012-11-21 11:11 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

On 21/11/2012 10:06, "Jan Beulich" <JBeulich@suse.com> wrote:

> The first three patches complete the 'M' debug key output, to
> also include MSIs from HPET and IOMMUs.
> 
> The remaining two patched are logically unrelated, but apply only
> on top of the earlier ones.
> 
> 1: x86/HPET: include FSB interrupt information in 'M' debug key output
> 2: VT-d: include IOMMU interrupt information in 'M' debug key output
> 3: AMD IOMMU: include IOMMU interrupt information in 'M' debug key output
> 4: x86/HPET: fix FSB interrupt masking
> 5: VT-d: adjust IOMMU interrupt affinities when all CPUs are online
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

I had a minor comment on 5/5, but apart from that, these all look pretty
sensible.

Acked-by: Keir Fraser <keir@xen.org>

> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 11:10   ` Keir Fraser
@ 2012-11-21 11:17     ` Jan Beulich
  2012-11-21 11:29       ` Keir Fraser
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 11:17 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Wei Huang, xen-devel, Wei Wang, xiantao.zhang, Gang Wei

>>> On 21.11.12 at 12:10, Keir Fraser <keir.xen@gmail.com> wrote:
> On 21/11/2012 10:19, "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>> Since these interrupts get setup before APs get brought online, their
>> affinities naturally could only ever point to CPU 0 alone so far.
>> Adjust this to include potentially multiple CPUs in the target mask
>> (when running in one of the cluster modes), and take into account NUMA
>> information (to handle the interrupts on a CPU on the node where the
>> respective IOMMU is).
>> 
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> 
>> --- a/xen/drivers/passthrough/vtd/dmar.c
>> +++ b/xen/drivers/passthrough/vtd/dmar.c
>> @@ -839,6 +839,7 @@ void acpi_dmar_reinstate(void)
>>  
>>  void acpi_dmar_zap(void)
>>  {
>> +    adjust_vtd_irq_affinities();
> 
> Is this just a handy place to hook?

Yes.

> Does it logically make sense?

No. Just needed to put it somewhere where it would get run at
the right point in time, and the place here ensures this for both
boot and resume.

Shall I add a comment to this effect?

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 11:17     ` Jan Beulich
@ 2012-11-21 11:29       ` Keir Fraser
  2012-11-21 11:58         ` Jan Beulich
  2012-11-21 12:13         ` [PATCH 5/5 v2] " Jan Beulich
  0 siblings, 2 replies; 13+ messages in thread
From: Keir Fraser @ 2012-11-21 11:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Wei Huang, xen-devel, Wei Wang, xiantao.zhang, Gang Wei

On 21/11/2012 11:17, "Jan Beulich" <JBeulich@suse.com> wrote:

>> Is this just a handy place to hook?
> 
> Yes.
> 
>> Does it logically make sense?
> 
> No. Just needed to put it somewhere where it would get run at
> the right point in time, and the place here ensures this for both
> boot and resume.

Yuk! And it doesn't work anyway. acpi_dmar_zap() isn't usually called during
boot -- go see it open coded at the end of acpi_parse_dmar(). It's only
called during boot when running tboot.

> Shall I add a comment to this effect?

I would rather have this added as another call to
acpi/power.c:enter_state(). It doesn't logically belong with
acpi_dmar_zap(), nor even with all its callers (e.g.,
tboot_parse_dmar_table).

 -- Keir

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 11:29       ` Keir Fraser
@ 2012-11-21 11:58         ` Jan Beulich
  2012-11-21 12:13         ` [PATCH 5/5 v2] " Jan Beulich
  1 sibling, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 11:58 UTC (permalink / raw)
  To: Keir Fraser; +Cc: Wei Huang, xen-devel, Wei Wang, xiantao.zhang, Gang Wei

>>> On 21.11.12 at 12:29, Keir Fraser <keir.xen@gmail.com> wrote:
> On 21/11/2012 11:17, "Jan Beulich" <JBeulich@suse.com> wrote:
> 
>>> Is this just a handy place to hook?
>> 
>> Yes.
>> 
>>> Does it logically make sense?
>> 
>> No. Just needed to put it somewhere where it would get run at
>> the right point in time, and the place here ensures this for both
>> boot and resume.
> 
> Yuk! And it doesn't work anyway. acpi_dmar_zap() isn't usually called during
> boot -- go see it open coded at the end of acpi_parse_dmar(). It's only
> called during boot when running tboot.

Oh, right, I should have checked the patch before replying: It's
there _only_ for the resume case, the boot time case gets
handled via an initcall.

>> Shall I add a comment to this effect?
> 
> I would rather have this added as another call to
> acpi/power.c:enter_state(). It doesn't logically belong with
> acpi_dmar_zap(), nor even with all its callers (e.g.,
> tboot_parse_dmar_table).

Okay, will change this accordingly and re-submit just that one
patch.

Jan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 5/5 v2] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 11:29       ` Keir Fraser
  2012-11-21 11:58         ` Jan Beulich
@ 2012-11-21 12:13         ` Jan Beulich
  2012-11-21 12:45           ` Keir Fraser
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2012-11-21 12:13 UTC (permalink / raw)
  To: Keir Fraser, xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

[-- Attachment #1: Type: text/plain, Size: 2903 bytes --]

Since these interrupts get setup before APs get brought online, their
affinities naturally could only ever point to CPU 0 alone so far.
Adjust this to include potentially multiple CPUs in the target mask
(when running in one of the cluster modes), and take into account NUMA
information (to handle the interrupts on a CPU on the node where the
respective IOMMU is).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Call adjust_vtd_irq_affinities() explicitly from enter_state()
    rather than through acpi_dmar_zap().

--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -219,6 +219,7 @@ static int enter_state(u32 state)
     mtrr_aps_sync_begin();
     enable_nonboot_cpus();
     mtrr_aps_sync_end();
+    adjust_vtd_irq_affinities();
     acpi_dmar_zap();
     thaw_domains();
     system_state = SYS_STATE_active;
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1971,6 +1971,33 @@ void clear_fault_bits(struct iommu *iomm
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+static void adjust_irq_affinity(struct acpi_drhd_unit *drhd)
+{
+    const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+    unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
+                             : NUMA_NO_NODE;
+    const cpumask_t *cpumask = &cpu_online_map;
+
+    if ( node < MAX_NUMNODES && node_online(node) &&
+         cpumask_intersects(&node_to_cpumask(node), cpumask) )
+        cpumask = &node_to_cpumask(node);
+    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
+}
+
+int adjust_vtd_irq_affinities(void)
+{
+    struct acpi_drhd_unit *drhd;
+
+    if ( !iommu_enabled )
+        return 0;
+
+    for_each_drhd_unit ( drhd )
+        adjust_irq_affinity(drhd);
+
+    return 0;
+}
+__initcall(adjust_vtd_irq_affinities);
+
 static int init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
@@ -1984,13 +2011,10 @@ static int init_vtd_hw(void)
      */
     for_each_drhd_unit ( drhd )
     {
-        struct irq_desc *desc;
+        adjust_irq_affinity(drhd);
 
         iommu = drhd->iommu;
 
-        desc = irq_to_desc(iommu->msi.irq);
-        dma_msi_set_affinity(desc, desc->arch.cpu_mask);
-
         clear_fault_bits(iommu);
 
         spin_lock_irqsave(&iommu->register_lock, flags);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -137,6 +137,9 @@ int iommu_do_domctl(struct xen_domctl *,
 void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count);
 void iommu_iotlb_flush_all(struct domain *d);
 
+/* While VT-d specific, this must get declared in a generic header. */
+int adjust_vtd_irq_affinities(void);
+
 /*
  * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
  * avoid unecessary iotlb_flush in the low level IOMMU code.




[-- Attachment #2: VT-d-interrupt-affinity.patch --]
[-- Type: text/plain, Size: 2965 bytes --]

VT-d: adjust IOMMU interrupt affinities when all CPUs are online

Since these interrupts get setup before APs get brought online, their
affinities naturally could only ever point to CPU 0 alone so far.
Adjust this to include potentially multiple CPUs in the target mask
(when running in one of the cluster modes), and take into account NUMA
information (to handle the interrupts on a CPU on the node where the
respective IOMMU is).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: Call adjust_vtd_irq_affinities() explicitly from enter_state()
    rather than through acpi_dmar_zap().

--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -219,6 +219,7 @@ static int enter_state(u32 state)
     mtrr_aps_sync_begin();
     enable_nonboot_cpus();
     mtrr_aps_sync_end();
+    adjust_vtd_irq_affinities();
     acpi_dmar_zap();
     thaw_domains();
     system_state = SYS_STATE_active;
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1971,6 +1971,33 @@ void clear_fault_bits(struct iommu *iomm
     spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+static void adjust_irq_affinity(struct acpi_drhd_unit *drhd)
+{
+    const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
+    unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
+                             : NUMA_NO_NODE;
+    const cpumask_t *cpumask = &cpu_online_map;
+
+    if ( node < MAX_NUMNODES && node_online(node) &&
+         cpumask_intersects(&node_to_cpumask(node), cpumask) )
+        cpumask = &node_to_cpumask(node);
+    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
+}
+
+int adjust_vtd_irq_affinities(void)
+{
+    struct acpi_drhd_unit *drhd;
+
+    if ( !iommu_enabled )
+        return 0;
+
+    for_each_drhd_unit ( drhd )
+        adjust_irq_affinity(drhd);
+
+    return 0;
+}
+__initcall(adjust_vtd_irq_affinities);
+
 static int init_vtd_hw(void)
 {
     struct acpi_drhd_unit *drhd;
@@ -1984,13 +2011,10 @@ static int init_vtd_hw(void)
      */
     for_each_drhd_unit ( drhd )
     {
-        struct irq_desc *desc;
+        adjust_irq_affinity(drhd);
 
         iommu = drhd->iommu;
 
-        desc = irq_to_desc(iommu->msi.irq);
-        dma_msi_set_affinity(desc, desc->arch.cpu_mask);
-
         clear_fault_bits(iommu);
 
         spin_lock_irqsave(&iommu->register_lock, flags);
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -137,6 +137,9 @@ int iommu_do_domctl(struct xen_domctl *,
 void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int page_count);
 void iommu_iotlb_flush_all(struct domain *d);
 
+/* While VT-d specific, this must get declared in a generic header. */
+int adjust_vtd_irq_affinities(void);
+
 /*
  * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
  * avoid unecessary iotlb_flush in the low level IOMMU code.

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 5/5 v2] VT-d: adjust IOMMU interrupt affinities when all CPUs are online
  2012-11-21 12:13         ` [PATCH 5/5 v2] " Jan Beulich
@ 2012-11-21 12:45           ` Keir Fraser
  0 siblings, 0 replies; 13+ messages in thread
From: Keir Fraser @ 2012-11-21 12:45 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Wei Huang, Wei Wang, xiantao.zhang, Gang Wei

On 21/11/2012 12:13, "Jan Beulich" <JBeulich@suse.com> wrote:

> Since these interrupts get setup before APs get brought online, their
> affinities naturally could only ever point to CPU 0 alone so far.
> Adjust this to include potentially multiple CPUs in the target mask
> (when running in one of the cluster modes), and take into account NUMA
> information (to handle the interrupts on a CPU on the node where the
> respective IOMMU is).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Keir Fraser <keir@xen.org>

> ---
> v2: Call adjust_vtd_irq_affinities() explicitly from enter_state()
>     rather than through acpi_dmar_zap().
> 
> --- a/xen/arch/x86/acpi/power.c
> +++ b/xen/arch/x86/acpi/power.c
> @@ -219,6 +219,7 @@ static int enter_state(u32 state)
>      mtrr_aps_sync_begin();
>      enable_nonboot_cpus();
>      mtrr_aps_sync_end();
> +    adjust_vtd_irq_affinities();
>      acpi_dmar_zap();
>      thaw_domains();
>      system_state = SYS_STATE_active;
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -1971,6 +1971,33 @@ void clear_fault_bits(struct iommu *iomm
>      spin_unlock_irqrestore(&iommu->register_lock, flags);
>  }
>  
> +static void adjust_irq_affinity(struct acpi_drhd_unit *drhd)
> +{
> +    const struct acpi_rhsa_unit *rhsa = drhd_to_rhsa(drhd);
> +    unsigned int node = rhsa ? pxm_to_node(rhsa->proximity_domain)
> +                             : NUMA_NO_NODE;
> +    const cpumask_t *cpumask = &cpu_online_map;
> +
> +    if ( node < MAX_NUMNODES && node_online(node) &&
> +         cpumask_intersects(&node_to_cpumask(node), cpumask) )
> +        cpumask = &node_to_cpumask(node);
> +    dma_msi_set_affinity(irq_to_desc(drhd->iommu->msi.irq), cpumask);
> +}
> +
> +int adjust_vtd_irq_affinities(void)
> +{
> +    struct acpi_drhd_unit *drhd;
> +
> +    if ( !iommu_enabled )
> +        return 0;
> +
> +    for_each_drhd_unit ( drhd )
> +        adjust_irq_affinity(drhd);
> +
> +    return 0;
> +}
> +__initcall(adjust_vtd_irq_affinities);
> +
>  static int init_vtd_hw(void)
>  {
>      struct acpi_drhd_unit *drhd;
> @@ -1984,13 +2011,10 @@ static int init_vtd_hw(void)
>       */
>      for_each_drhd_unit ( drhd )
>      {
> -        struct irq_desc *desc;
> +        adjust_irq_affinity(drhd);
>  
>          iommu = drhd->iommu;
>  
> -        desc = irq_to_desc(iommu->msi.irq);
> -        dma_msi_set_affinity(desc, desc->arch.cpu_mask);
> -
>          clear_fault_bits(iommu);
>  
>          spin_lock_irqsave(&iommu->register_lock, flags);
> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -137,6 +137,9 @@ int iommu_do_domctl(struct xen_domctl *,
>  void iommu_iotlb_flush(struct domain *d, unsigned long gfn, unsigned int
> page_count);
>  void iommu_iotlb_flush_all(struct domain *d);
>  
> +/* While VT-d specific, this must get declared in a generic header. */
> +int adjust_vtd_irq_affinities(void);
> +
>  /*
>   * The purpose of the iommu_dont_flush_iotlb optional cpu flag is to
>   * avoid unecessary iotlb_flush in the low level IOMMU code.
> 
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-11-21 12:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-21 10:06 [PATCH 0/5] HPET and IOMMU adjustments Jan Beulich
2012-11-21 10:16 ` [PATCH 1/5] x86/HPET: include FSB interrupt information in 'M' debug key output Jan Beulich
2012-11-21 10:17 ` [PATCH 2/5] VT-d: include IOMMU " Jan Beulich
2012-11-21 10:18 ` [PATCH 3/5] AMD IOMMU: " Jan Beulich
2012-11-21 10:19 ` [PATCH 4/5] x86/HPET: fix FSB interrupt masking Jan Beulich
2012-11-21 10:19 ` [PATCH 5/5] VT-d: adjust IOMMU interrupt affinities when all CPUs are online Jan Beulich
2012-11-21 11:10   ` Keir Fraser
2012-11-21 11:17     ` Jan Beulich
2012-11-21 11:29       ` Keir Fraser
2012-11-21 11:58         ` Jan Beulich
2012-11-21 12:13         ` [PATCH 5/5 v2] " Jan Beulich
2012-11-21 12:45           ` Keir Fraser
2012-11-21 11:11 ` [PATCH 0/5] HPET and IOMMU adjustments Keir Fraser

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).