From: "Zhai, Edwin" <edwin.zhai@intel.com>
To: Keir Fraser <keir.fraser@eu.citrix.com>
Cc: xen-devel@lists.xensource.com, "Zhai,
Edwin" <edwin.zhai@intel.com>, Jan Beulich <JBeulich@novell.com>
Subject: [PATCH] [IOMMU] clean interrupt remapping and queued invalidation
Date: Fri, 16 Oct 2009 16:58:27 +0800 [thread overview]
Message-ID: <4AD835B3.6030306@intel.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 271 bytes --]
This patch enlarges interrupt remapping table to fix the out-of range
table access when using many multiple-function PCI devices.
Invalidation queue is also expanded.
Signed-Off-By: Zhai Edwin <edwin.zhai@intel.com>
Signed-Off-By: Cui Dexuan <dexuan.cui@intel.com>
[-- Attachment #2: vtd_table_fix_v2.patch --]
[-- Type: application/octet-stream, Size: 16585 bytes --]
Index: xen-dev/xen/drivers/passthrough/vtd/intremap.c
===================================================================
--- xen-dev.orig/xen/drivers/passthrough/vtd/intremap.c
+++ xen-dev/xen/drivers/passthrough/vtd/intremap.c
@@ -146,6 +146,7 @@ static int remap_entry_to_ioapic_rte(
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
if ( ir_ctrl == NULL )
{
@@ -164,9 +165,11 @@ static int remap_entry_to_ioapic_rte(
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
old_rte->vector = iremap_entry->lo.vector;
old_rte->delivery_mode = iremap_entry->lo.dlm;
@@ -192,6 +195,7 @@ static int ioapic_rte_to_remap_entry(str
int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -208,15 +212,17 @@ static int ioapic_rte_to_remap_entry(str
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: intremap index (%d) is larger than"
- " the maximum index (%ld)!\n",
+ " the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
@@ -425,6 +431,7 @@ static int remap_entry_to_msi_msg(
int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
if ( ir_ctrl == NULL )
{
@@ -447,9 +454,11 @@ static int remap_entry_to_msi_msg(
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
@@ -485,6 +494,7 @@ static int msi_msg_to_remap_entry(
int index;
unsigned long flags;
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+ u64 entry_base;
remap_rte = (struct msi_msg_remap_entry *) msg;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
@@ -502,16 +512,18 @@ static int msi_msg_to_remap_entry(
{
dprintk(XENLOG_ERR VTDPREFIX,
"%s: intremap index (%d) is larger than"
- " the maximum index (%ld)!\n",
+ " the maximum index (%d)!\n",
__func__, index, IREMAP_ENTRY_NR - 1);
msi_desc->remap_index = -1;
spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
return -EFAULT;
}
+ entry_base = ir_ctrl->iremap_maddr +
+ (( index >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
- iremap_entry = &iremap_entries[index];
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ iremap_entry = &iremap_entries[index % (1 << IREMAP_ENTRY_ORDER)];
memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
/* Set interrupt remapping table entry */
@@ -617,7 +629,7 @@ int enable_intremap(struct iommu *iommu)
if ( ir_ctrl->iremap_maddr == 0 )
{
drhd = iommu_to_drhd(iommu);
- ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, 1);
+ ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR );
if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
Index: xen-dev/xen/drivers/passthrough/vtd/iommu.h
===================================================================
--- xen-dev.orig/xen/drivers/passthrough/vtd/iommu.h
+++ xen-dev/xen/drivers/passthrough/vtd/iommu.h
@@ -302,7 +302,23 @@ struct iremap_entry {
}hi;
};
};
-#define IREMAP_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct iremap_entry))
+
+/* Max intr remapping table page order is 8, as max number of IRTEs is 64K */
+#define IREMAP_PAGE_ORDER 8
+
+/*
+ * VTd engine handles 4K page, while CPU may have different page size on
+ * different arch. E.g. 16K on IPF.
+ */
+#define IREMAP_ARCH_PAGE_ORDER (IREMAP_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
+#define IREMAP_ARCH_PAGE_NR ( IREMAP_ARCH_PAGE_ORDER < 0 ? \
+ 1 : \
+ 1 << IREMAP_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per 4K page */
+#define IREMAP_ENTRY_ORDER ( PAGE_SHIFT - 4 )
+#define IREMAP_ENTRY_NR ( 1 << ( IREMAP_PAGE_ORDER + 8 ) )
+
#define iremap_present(v) ((v).lo & 1)
#define iremap_fault_disable(v) (((v).lo >> 1) & 1)
@@ -392,12 +408,17 @@ struct qinval_entry {
}q;
};
-/* Order of queue invalidation pages */
-#define IQA_REG_QS 0
-#define NUM_QINVAL_PAGES (1 << IQA_REG_QS)
+/* Order of queue invalidation pages(max is 8) */
+#define QINVAL_PAGE_ORDER 2
-/* Each entry is 16 byte */
-#define QINVAL_ENTRY_NR (1 << (IQA_REG_QS + 8))
+#define QINVAL_ARCH_PAGE_ORDER (QINVAL_PAGE_ORDER + PAGE_SHIFT_4K - PAGE_SHIFT)
+#define QINVAL_ARCH_PAGE_NR ( QINVAL_ARCH_PAGE_ORDER < 0 ? \
+ 1 : \
+ 1 << QINVAL_ARCH_PAGE_ORDER )
+
+/* Each entry is 16 bytes, so 2^8 entries per page */
+#define QINVAL_ENTRY_ORDER ( PAGE_SHIFT - 4 )
+#define QINVAL_ENTRY_NR (1 << (QINVAL_PAGE_ORDER + 8))
/* Status data flag */
#define QINVAL_STAT_INIT 0
@@ -429,9 +450,9 @@ struct qinval_entry {
#define IEC_GLOBAL_INVL 0
#define IEC_INDEX_INVL 1
#define IRTA_REG_EIME_SHIFT 11
-#define IRTA_REG_TABLE_SIZE 7 // 4k page = 256 * 16 byte entries
- // 2^^(IRTA_REG_TABLE_SIZE + 1) = 256
- // IRTA_REG_TABLE_SIZE = 7
+
+/* 2^(IRTA_REG_TABLE_SIZE + 1) = IREMAP_ENTRY_NR */
+#define IRTA_REG_TABLE_SIZE ( IREMAP_PAGE_ORDER + 7 )
#define VTD_PAGE_TABLE_LEVEL_3 3
#define VTD_PAGE_TABLE_LEVEL_4 4
Index: xen-dev/xen/drivers/passthrough/vtd/iommu.c
===================================================================
--- xen-dev.orig/xen/drivers/passthrough/vtd/iommu.c
+++ xen-dev/xen/drivers/passthrough/vtd/iommu.c
@@ -135,16 +135,16 @@ void iommu_flush_cache_entry(void *addr)
void iommu_flush_cache_page(void *addr, unsigned long npages)
{
- __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
+ __iommu_flush_cache(addr, PAGE_SIZE * npages);
}
/* Allocate page table, return its machine address */
u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages)
{
struct acpi_rhsa_unit *rhsa;
- struct page_info *pg;
+ struct page_info *pg, *cur_pg;
u64 *vaddr;
- int node = -1;
+ int node = -1, i;
rhsa = drhd_to_rhsa(drhd);
if ( rhsa )
@@ -154,11 +154,17 @@ u64 alloc_pgtable_maddr(struct acpi_drhd
(node == -1 ) ? 0 : MEMF_node(node));
if ( !pg )
return 0;
- vaddr = __map_domain_page(pg);
- memset(vaddr, 0, PAGE_SIZE * npages);
- iommu_flush_cache_page(vaddr, npages);
- unmap_domain_page(vaddr);
+ cur_pg = pg;
+ for ( i = 0; i < npages; i++ )
+ {
+ vaddr = __map_domain_page(cur_pg);
+ memset(vaddr, 0, PAGE_SIZE);
+
+ iommu_flush_cache_page(vaddr, 1);
+ unmap_domain_page(vaddr);
+ cur_pg++;
+ }
return page_to_maddr(pg);
}
Index: xen-dev/xen/drivers/passthrough/vtd/qinval.c
===================================================================
--- xen-dev.orig/xen/drivers/passthrough/vtd/qinval.c
+++ xen-dev/xen/drivers/passthrough/vtd/qinval.c
@@ -45,17 +45,15 @@ static void print_qi_regs(struct iommu *
static int qinval_next_index(struct iommu *iommu)
{
- u64 tail, head;
+ u64 tail;
tail = dmar_readq(iommu->reg, DMAR_IQT_REG);
tail >>= QINVAL_INDEX_SHIFT;
- head = dmar_readq(iommu->reg, DMAR_IQH_REG);
- head >>= QINVAL_INDEX_SHIFT;
-
- /* round wrap check */
- if ( ( tail + 1 ) % QINVAL_ENTRY_NR == head )
- return -1;
+ /* (tail+1 == head) indicates a full queue, wait for HW */
+ while ( ( tail + 1 ) % QINVAL_ENTRY_NR ==
+ ( dmar_readq(iommu->reg, DMAR_IQH_REG) >> QINVAL_INDEX_SHIFT ) )
+ cpu_relax();
return tail;
}
@@ -77,11 +75,13 @@ static int gen_cc_inv_dsc(struct iommu *
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.cc_inv_dsc.lo.type = TYPE_INVAL_CONTEXT;
qinval_entry->q.cc_inv_dsc.lo.granu = granu;
qinval_entry->q.cc_inv_dsc.lo.res_1 = 0;
@@ -121,14 +121,14 @@ static int gen_iotlb_inv_dsc(struct iomm
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
- if ( index == -1 )
- return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.iotlb_inv_dsc.lo.type = TYPE_INVAL_IOTLB;
qinval_entry->q.iotlb_inv_dsc.lo.granu = granu;
qinval_entry->q.iotlb_inv_dsc.lo.dr = dr;
@@ -172,13 +172,13 @@ static int gen_wait_dsc(struct iommu *io
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
- if ( index == -1 )
- return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.inv_wait_dsc.lo.type = TYPE_INVAL_WAIT;
qinval_entry->q.inv_wait_dsc.lo.iflag = iflag;
qinval_entry->q.inv_wait_dsc.lo.sw = sw;
@@ -247,14 +247,14 @@ static int gen_dev_iotlb_inv_dsc(struct
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
- if ( index == -1 )
- return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.dev_iotlb_inv_dsc.lo.type = TYPE_INVAL_DEVICE_IOTLB;
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_1 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.lo.max_invs_pend = max_invs_pend;
@@ -295,14 +295,14 @@ static int gen_iec_inv_dsc(struct iommu
unsigned long flags;
struct qinval_entry *qinval_entry = NULL, *qinval_entries;
struct qi_ctrl *qi_ctrl = iommu_qi_ctrl(iommu);
+ u64 entry_base = qi_ctrl->qinval_maddr +
+ (( index >> QINVAL_ENTRY_ORDER ) << PAGE_SHIFT );
- if ( index == -1 )
- return -1;
spin_lock_irqsave(&qi_ctrl->qinval_lock, flags);
qinval_entries =
- (struct qinval_entry *)map_vtd_domain_page(qi_ctrl->qinval_maddr);
- qinval_entry = &qinval_entries[index];
+ (struct qinval_entry *)map_vtd_domain_page(entry_base);
+ qinval_entry = &qinval_entries[index % (1 << QINVAL_ENTRY_ORDER)];
qinval_entry->q.iec_inv_dsc.lo.type = TYPE_INVAL_IEC;
qinval_entry->q.iec_inv_dsc.lo.granu = granu;
qinval_entry->q.iec_inv_dsc.lo.res_1 = 0;
@@ -445,7 +445,7 @@ int enable_qinval(struct iommu *iommu)
if ( qi_ctrl->qinval_maddr == 0 )
{
drhd = iommu_to_drhd(iommu);
- qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, NUM_QINVAL_PAGES);
+ qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, QINVAL_ARCH_PAGE_NR);
if ( qi_ctrl->qinval_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
@@ -464,7 +464,7 @@ int enable_qinval(struct iommu *iommu)
* registers are automatically reset to 0 with write
* to IQA register.
*/
- qi_ctrl->qinval_maddr |= IQA_REG_QS;
+ qi_ctrl->qinval_maddr |= QINVAL_PAGE_ORDER;
spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
Index: xen-dev/xen/drivers/passthrough/vtd/utils.c
===================================================================
--- xen-dev.orig/xen/drivers/passthrough/vtd/utils.c
+++ xen-dev/xen/drivers/passthrough/vtd/utils.c
@@ -226,8 +226,7 @@ static void dump_iommu_info(unsigned cha
/* Dump interrupt remapping table. */
u64 iremap_maddr = dmar_readq(iommu->reg, DMAR_IRTA_REG);
int nr_entry = 1 << ((iremap_maddr & 0xF) + 1);
- struct iremap_entry *iremap_entries =
- (struct iremap_entry *)map_vtd_domain_page(iremap_maddr);
+ struct iremap_entry *iremap_entries = NULL;
printk(" Interrupt remapping table (nr_entry=0x%x. "
"Only dump P=1 entries here):\n", nr_entry);
@@ -235,7 +234,18 @@ static void dump_iommu_info(unsigned cha
"FPD P\n");
for ( i = 0; i < nr_entry; i++ )
{
- struct iremap_entry *p = iremap_entries + i;
+ struct iremap_entry *p;
+ if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
+ {
+ /* This entry across page boundry */
+ u64 entry_base = iremap_maddr +
+ (( i >> IREMAP_ENTRY_ORDER ) << PAGE_SHIFT );
+ if ( iremap_entries )
+ unmap_vtd_domain_page(iremap_entries);
+ iremap_entries =
+ (struct iremap_entry *)map_vtd_domain_page(entry_base);
+ }
+ p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
if ( !p->lo.p )
continue;
@@ -246,8 +256,9 @@ static void dump_iommu_info(unsigned cha
(u32)p->lo.dlm, (u32)p->lo.tm, (u32)p->lo.rh,
(u32)p->lo.dm, (u32)p->lo.fpd, (u32)p->lo.p);
}
+ if ( iremap_entries )
+ unmap_vtd_domain_page(iremap_entries);
- unmap_vtd_domain_page(iremap_entries);
}
}
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
reply other threads:[~2009-10-16 8:58 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4AD835B3.6030306@intel.com \
--to=edwin.zhai@intel.com \
--cc=JBeulich@novell.com \
--cc=keir.fraser@eu.citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.