All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] amd iommu: Automatic page coalescing
@ 2011-08-12 12:25 Wei Wang2
  0 siblings, 0 replies; only message in thread
From: Wei Wang2 @ 2011-08-12 12:25 UTC (permalink / raw)
  To: xen-devel@lists.xensource.com; +Cc: Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 631 bytes --]

Hi,
This patch implements automatic page coalescing when separated io page table 
is used. It uses ignore bits in iommu pde to cache how many entries lower 
next page level are suitable for coalescing and then builds a super page 
entry when all lower entries are contiguous.  This patch has been tested OK 
for weeks mainly with graphic devices and 3D mark vantage.
Thanks
Wei

Signed-off-by: Wei Wang <wei.wang2@amd.com>
--
Advanced Micro Devices GmbH
Sitz: Dornach, Gemeinde Aschheim, 
Landkreis München Registergericht München, 
HRB Nr. 43632 WEEE Registrierungsnummer 129 19551
Geschäftsführer:
Alberto Bozzo

[-- Attachment #2: iommu_spage.patch --]
[-- Type: text/x-diff, Size: 22434 bytes --]

diff -r 0f36c2eec2e1 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c	Thu Jul 28 15:40:54 2011 +0100
+++ b/xen/drivers/passthrough/amd/iommu_map.c	Fri Aug 12 14:03:29 2011 +0200
@@ -168,98 +168,59 @@ void flush_command_buffer(struct amd_iom
     AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
 }
 
-static void clear_iommu_l1e_present(u64 l2e, unsigned long gfn)
-{
-    u32 *l1e;
-    int offset;
-    void *l1_table;
-
-    l1_table = map_domain_page(l2e >> PAGE_SHIFT);
-
-    offset = gfn & (~PTE_PER_TABLE_MASK);
-    l1e = (u32*)(l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE));
-
-    /* clear l1 entry */
-    l1e[0] = l1e[1] = 0;
-
-    unmap_domain_page(l1_table);
-}
-
-static int set_iommu_l1e_present(u64 l2e, unsigned long gfn,
-                                 u64 maddr, int iw, int ir)
-{
-    u64 addr_lo, addr_hi, maddr_old;
+/* Given pfn and page table level, return pde index */
+static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
+{
+    unsigned int idx;
+
+    idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
+    idx &= ~PTE_PER_TABLE_MASK;
+    return idx;
+}
+
+void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
+{
+    u64 *table, *pte;
+
+    table = map_domain_page(l1_mfn);
+    pte = table + pfn_to_pde_idx(gfn, IOMMU_PAGING_MODE_LEVEL_1);
+    *pte = 0;
+    unmap_domain_page(table);
+}
+
+static bool_t set_iommu_pde_present(u32 *pde, unsigned long next_mfn, 
+                                    unsigned int next_level,
+                                    bool_t iw, bool_t ir)
+{
+    u64 addr_lo, addr_hi, maddr_old, maddr_next;
     u32 entry;
-    void *l1_table;
-    int offset;
-    u32 *l1e;
-    int need_flush = 0;
-
-    l1_table = map_domain_page(l2e >> PAGE_SHIFT);
-
-    offset = gfn & (~PTE_PER_TABLE_MASK);
-    l1e = (u32*)((u8*)l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE));
-
-    addr_hi = get_field_from_reg_u32(l1e[1],
+    bool_t need_flush = 0;
+
+    maddr_next = (u64)next_mfn << PAGE_SHIFT;
+
+    addr_hi = get_field_from_reg_u32(pde[1],
                                      IOMMU_PTE_ADDR_HIGH_MASK,
                                      IOMMU_PTE_ADDR_HIGH_SHIFT);
-    addr_lo = get_field_from_reg_u32(l1e[0],
+    addr_lo = get_field_from_reg_u32(pde[0],
                                      IOMMU_PTE_ADDR_LOW_MASK,
                                      IOMMU_PTE_ADDR_LOW_SHIFT);
 
-    maddr_old = ((addr_hi << 32) | addr_lo) << PAGE_SHIFT;
-
-    if ( maddr_old && (maddr_old != maddr) )
+    maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
+
+    if ( maddr_old != maddr_next )
         need_flush = 1;
 
-    addr_lo = maddr & DMA_32BIT_MASK;
-    addr_hi = maddr >> 32;
-
-    set_field_in_reg_u32((u32)addr_hi, 0,
-                         IOMMU_PTE_ADDR_HIGH_MASK,
-                         IOMMU_PTE_ADDR_HIGH_SHIFT, &entry);
-    set_field_in_reg_u32(iw ? IOMMU_CONTROL_ENABLED :
-                         IOMMU_CONTROL_DISABLED, entry,
-                         IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
-                         IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT, &entry);
-    set_field_in_reg_u32(ir ? IOMMU_CONTROL_ENABLED :
-                         IOMMU_CONTROL_DISABLED, entry,
-                         IOMMU_PTE_IO_READ_PERMISSION_MASK,
-                         IOMMU_PTE_IO_READ_PERMISSION_SHIFT, &entry);
-    l1e[1] = entry;
-
-    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
-                         IOMMU_PTE_ADDR_LOW_MASK,
-                         IOMMU_PTE_ADDR_LOW_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_PAGING_MODE_LEVEL_0, entry,
-                         IOMMU_PTE_NEXT_LEVEL_MASK,
-                         IOMMU_PTE_NEXT_LEVEL_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
-                         IOMMU_PTE_PRESENT_MASK,
-                         IOMMU_PTE_PRESENT_SHIFT, &entry);
-    l1e[0] = entry;
-
-    unmap_domain_page(l1_table);
-    return need_flush;
-}
-
-static void amd_iommu_set_page_directory_entry(u32 *pde, 
-                                               u64 next_ptr, u8 next_level)
-{
-    u64 addr_lo, addr_hi;
-    u32 entry;
-
-    addr_lo = next_ptr & DMA_32BIT_MASK;
-    addr_hi = next_ptr >> 32;
+    addr_lo = maddr_next & DMA_32BIT_MASK;
+    addr_hi = maddr_next >> 32;
 
     /* enable read/write permissions,which will be enforced at the PTE */
     set_field_in_reg_u32((u32)addr_hi, 0,
                          IOMMU_PDE_ADDR_HIGH_MASK,
                          IOMMU_PDE_ADDR_HIGH_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+    set_field_in_reg_u32(iw, entry,
                          IOMMU_PDE_IO_WRITE_PERMISSION_MASK,
                          IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT, &entry);
-    set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+    set_field_in_reg_u32(ir, entry,
                          IOMMU_PDE_IO_READ_PERMISSION_MASK,
                          IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry);
     pde[1] = entry;
@@ -275,6 +236,26 @@ static void amd_iommu_set_page_directory
                          IOMMU_PDE_PRESENT_MASK,
                          IOMMU_PDE_PRESENT_SHIFT, &entry);
     pde[0] = entry;
+
+    return need_flush;
+}
+
+static bool_t set_iommu_pte_present(unsigned long pt_mfn, unsigned long gfn, 
+                                    unsigned long next_mfn, int pde_level, 
+                                    bool_t iw, bool_t ir)
+{
+    u64 *table;
+    u32 *pde;
+    bool_t need_flush = 0;
+
+    table = map_domain_page(pt_mfn);
+
+    pde = (u32*)(table + pfn_to_pde_idx(gfn, pde_level));
+
+    need_flush = set_iommu_pde_present(pde, next_mfn, 
+                                       IOMMU_PAGING_MODE_LEVEL_0, iw, ir);
+    unmap_domain_page(table);
+    return need_flush;
 }
 
 void amd_iommu_set_root_page_table(
@@ -413,11 +394,18 @@ u64 amd_iommu_get_next_table_from_pte(u3
     return ptr;
 }
 
+static unsigned int iommu_next_level(u32 *entry)
+{
+    return get_field_from_reg_u32(entry[0],
+                                  IOMMU_PDE_NEXT_LEVEL_MASK,
+                                  IOMMU_PDE_NEXT_LEVEL_SHIFT);
+}
+
 static int amd_iommu_is_pte_present(u32 *entry)
 {
-    return (get_field_from_reg_u32(entry[0],
-                                   IOMMU_PDE_PRESENT_MASK,
-                                   IOMMU_PDE_PRESENT_SHIFT));
+    return get_field_from_reg_u32(entry[0],
+                                  IOMMU_PDE_PRESENT_MASK,
+                                  IOMMU_PDE_PRESENT_SHIFT);
 }
 
 void invalidate_dev_table_entry(struct amd_iommu *iommu,
@@ -439,54 +427,241 @@ void invalidate_dev_table_entry(struct a
     send_iommu_command(iommu, cmd);
 }
 
-static u64 iommu_l2e_from_pfn(struct page_info *table, int level,
-                              unsigned long io_pfn)
-{
-    unsigned long offset;
-    void *pde = NULL;
-    void *table_vaddr;
-    u64 next_table_maddr = 0;
-    unsigned int lowest = 1;
-
-    BUG_ON( table == NULL || level < lowest );
-
-    if ( level == lowest )
-        return page_to_maddr(table);
-
-    while ( level > lowest )
-    {
-        offset = io_pfn >> ((PTE_PER_TABLE_SHIFT *
-                             (level - IOMMU_PAGING_MODE_LEVEL_1)));
-        offset &= ~PTE_PER_TABLE_MASK;
-
-        table_vaddr = __map_domain_page(table);
-        pde = table_vaddr + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE);
-        next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
-
-        if ( !amd_iommu_is_pte_present(pde) )
+/* For each pde, We use ignored bits (bit 1 - bit 8 and bit 63)
+ * to save pde count, pde count = 511 is a candidate of page coalescing.
+ */
+static unsigned int get_pde_count(u64 pde)
+{
+    unsigned int count;
+    u64 upper_mask = 1ULL << 63 ;
+    u64 lower_mask = 0xFF << 1;
+
+    count = ((pde & upper_mask) >> 55) | ((pde & lower_mask) >> 1);
+    return count;
+}
+
+/* Convert pde count into iommu pte ignored bits */
+static void set_pde_count(u64 *pde, unsigned int count)
+{
+    u64 upper_mask = 1ULL << 8 ;
+    u64 lower_mask = 0xFF;
+    u64 pte_mask = (~(1ULL << 63)) & (~(0xFF << 1));
+
+    *pde &= pte_mask;
+    *pde |= ((count & upper_mask ) << 55) | ((count & lower_mask ) << 1);
+}
+
+/* Return 1, if pages are suitable for merging at merge_level.
+ * otherwise increase pde count if mfn is contigous with mfn - 1
+ */
+static int iommu_update_pde_count(struct domain *d, unsigned long pt_mfn,
+                                  unsigned long gfn, unsigned long mfn,
+                                  unsigned int merge_level)
+{
+    unsigned int pde_count, next_level;
+    unsigned long first_mfn;
+    u64 *table, *pde, *ntable;
+    u64 ntable_maddr, mask;
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+    bool_t ok = 0;
+
+    ASSERT( spin_is_locked(&hd->mapping_lock) && pt_mfn );
+
+    next_level = merge_level - 1;
+
+    /* get pde at merge level */
+    table = map_domain_page(pt_mfn);
+    pde = table + pfn_to_pde_idx(gfn, merge_level);
+
+    /* get page table of next level */
+    ntable_maddr = amd_iommu_get_next_table_from_pte((u32*)pde);
+    ntable = map_domain_page(ntable_maddr >> PAGE_SHIFT);
+
+    /* get the first mfn of next level */
+    first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
+
+    if ( first_mfn == 0 )
+        goto out;
+
+    mask = (1ULL<< (PTE_PER_TABLE_SHIFT * next_level)) - 1;
+
+    if ( ((first_mfn & mask) == 0) &&
+         (((gfn & mask) | first_mfn) == mfn) )
+    {
+        pde_count = get_pde_count(*pde);
+
+        if ( pde_count == (PTE_PER_TABLE_SIZE - 1) )
+            ok = 1;
+        else if ( pde_count < (PTE_PER_TABLE_SIZE - 1))
         {
-            if ( next_table_maddr == 0 )
+            pde_count++;
+            set_pde_count(pde, pde_count);
+        }
+    }
+
+    else
+        /* non-contiguous mapping */
+        set_pde_count(pde, 0);
+
+out:
+    unmap_domain_page(ntable);
+    unmap_domain_page(table);
+
+    return ok;
+}
+
+static int iommu_merge_pages(struct domain *d, unsigned long pt_mfn,
+                             unsigned long gfn, unsigned int flags,
+                             unsigned int merge_level)
+{
+    u64 *table, *pde, *ntable;
+    u64 ntable_mfn;
+    unsigned long first_mfn;
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+
+    ASSERT( spin_is_locked(&hd->mapping_lock) && pt_mfn );
+
+    table = map_domain_page(pt_mfn);
+    pde = table + pfn_to_pde_idx(gfn, merge_level);
+
+    /* get first mfn */
+    ntable_mfn = amd_iommu_get_next_table_from_pte((u32*)pde) >> PAGE_SHIFT;
+
+    if ( ntable_mfn == 0 )
+    {
+        unmap_domain_page(table);
+        return 1;
+    }
+
+    ntable = map_domain_page(ntable_mfn);
+    first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
+
+    if ( first_mfn == 0 )
+    {
+        unmap_domain_page(ntable);
+        unmap_domain_page(table);
+        return 1;
+    }
+
+    /* setup super page mapping, next level = 0 */
+    set_iommu_pde_present((u32*)pde, first_mfn,
+                          IOMMU_PAGING_MODE_LEVEL_0,
+                          !!(flags & IOMMUF_writable),
+                          !!(flags & IOMMUF_readable));
+
+    amd_iommu_flush_all_pages(d);
+
+    unmap_domain_page(ntable);
+    unmap_domain_page(table);
+    return 0;
+}
+
+/* Walk io page tables and build level page tables if necessary
+ * {Re, un}mapping super page frames causes re-allocation of io
+ * page tables.
+ */
+static int iommu_pde_from_gfn(struct domain *d, unsigned long pfn, 
+                              unsigned long pt_mfn[])
+{
+    u64 *pde, *next_table_vaddr;
+    unsigned long  next_table_mfn;
+    unsigned int level;
+    struct page_info *table;
+    struct hvm_iommu *hd = domain_hvm_iommu(d);
+
+    table = hd->root_table;
+    level = hd->paging_mode;
+
+    BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 || 
+            level > IOMMU_PAGING_MODE_LEVEL_6 );
+
+    next_table_mfn = page_to_mfn(table);
+
+    if ( level == IOMMU_PAGING_MODE_LEVEL_1 )
+    {
+        pt_mfn[level] = next_table_mfn;
+        return 0;
+    }
+
+    while ( level > IOMMU_PAGING_MODE_LEVEL_1 )
+    {
+        unsigned int next_level = level - 1;
+        pt_mfn[level] = next_table_mfn;
+
+        next_table_vaddr = map_domain_page(next_table_mfn);
+        pde = next_table_vaddr + pfn_to_pde_idx(pfn, level);
+
+        /* Here might be a super page frame */
+        next_table_mfn = amd_iommu_get_next_table_from_pte((uint32_t*)pde) 
+                         >> PAGE_SHIFT;
+
+        /* Split super page frame into smaller pieces.*/
+        if ( amd_iommu_is_pte_present((u32*)pde) &&
+             (iommu_next_level((u32*)pde) == 0) &&
+             next_table_mfn != 0 )
+        {
+            int i;
+            unsigned long mfn, gfn;
+            unsigned int page_sz;
+
+            page_sz = 1 << (PTE_PER_TABLE_SHIFT * (next_level - 1));
+            gfn =  pfn & ~((1 << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
+            mfn = next_table_mfn;
+
+            /* allocate lower level page table */
+            table = alloc_amd_iommu_pgtable();
+            if ( table == NULL )
+            {
+                AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
+                unmap_domain_page(next_table_vaddr);
+                return 1;
+            }
+
+            next_table_mfn = page_to_mfn(table);
+            set_iommu_pde_present((u32*)pde, next_table_mfn, next_level, 
+                                  !!IOMMUF_writable, !!IOMMUF_readable);
+
+            for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
+            {
+                set_iommu_pte_present(next_table_mfn, gfn, mfn, next_level,
+                                      !!IOMMUF_writable, !!IOMMUF_readable);
+                mfn += page_sz;
+                gfn += page_sz;
+             }
+
+            amd_iommu_flush_all_pages(d);
+        }
+
+        /* Install lower level page table for non-present entries */
+        else if ( !amd_iommu_is_pte_present((u32*)pde) )
+        {
+            if ( next_table_mfn == 0 )
             {
                 table = alloc_amd_iommu_pgtable();
                 if ( table == NULL )
                 {
-                    printk("AMD-Vi: Cannot allocate I/O page table\n");
-                    return 0;
+                    AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
+                    unmap_domain_page(next_table_vaddr);
+                    return 1;
                 }
-                next_table_maddr = page_to_maddr(table);
-                amd_iommu_set_page_directory_entry(
-                    (u32 *)pde, next_table_maddr, level - 1);
+                next_table_mfn = page_to_mfn(table);
+                set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
+                                      !!IOMMUF_writable, !!IOMMUF_readable);
             }
             else /* should never reach here */
-                return 0;
+            {
+                unmap_domain_page(next_table_vaddr);
+                return 1;
+            }
         }
 
-        unmap_domain_page(table_vaddr);
-        table = maddr_to_page(next_table_maddr);
+        unmap_domain_page(next_table_vaddr);
         level--;
     }
 
-    return next_table_maddr;
+    /* mfn of level 1 page table */
+    pt_mfn[level] = next_table_mfn;
+    return 0;
 }
 
 static int update_paging_mode(struct domain *d, unsigned long gfn)
@@ -500,7 +675,7 @@ static int update_paging_mode(struct dom
     struct page_info *new_root = NULL;
     struct page_info *old_root = NULL;
     void *new_root_vaddr;
-    u64 old_root_maddr;
+    unsigned long old_root_mfn;
     struct hvm_iommu *hd = domain_hvm_iommu(d);
 
     level = hd->paging_mode;
@@ -522,12 +697,13 @@ static int update_paging_mode(struct dom
         }
 
         new_root_vaddr = __map_domain_page(new_root);
-        old_root_maddr = page_to_maddr(old_root);
-        amd_iommu_set_page_directory_entry((u32 *)new_root_vaddr,
-                                           old_root_maddr, level);
+        old_root_mfn = page_to_mfn(old_root);
+        set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
+                              !!IOMMUF_writable, !!IOMMUF_readable);
         level++;
         old_root = new_root;
         offset >>= PTE_PER_TABLE_SHIFT;
+        unmap_domain_page(new_root_vaddr);
     }
 
     if ( new_root != NULL )
@@ -575,14 +751,17 @@ int amd_iommu_map_page(struct domain *d,
 int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
                        unsigned int flags)
 {
-    u64 iommu_l2e;
-    int need_flush = 0;
+    bool_t need_flush = 0;
     struct hvm_iommu *hd = domain_hvm_iommu(d);
+    unsigned long pt_mfn[7];
+    unsigned int merge_level;
 
     BUG_ON( !hd->root_table );
 
     if ( iommu_hap_pt_share && is_hvm_domain(d) )
         return 0;
+
+    memset(pt_mfn, 0, sizeof(pt_mfn));
 
     spin_lock(&hd->mapping_lock);
 
@@ -592,14 +771,14 @@ int amd_iommu_map_page(struct domain *d,
     {
         if ( update_paging_mode(d, gfn) )
         {
+            spin_unlock(&hd->mapping_lock);
             AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
             domain_crash(d);
             return -EFAULT;
         }
     }
 
-    iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
-    if ( iommu_l2e == 0 )
+    if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
     {
         spin_unlock(&hd->mapping_lock);
         AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
@@ -607,25 +786,56 @@ int amd_iommu_map_page(struct domain *d,
         return -EFAULT;
     }
 
-    need_flush = set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT,
+    /* Install 4k mapping first */
+    need_flush = set_iommu_pte_present(pt_mfn[1], gfn, mfn, 
+                                       IOMMU_PAGING_MODE_LEVEL_1,
                                        !!(flags & IOMMUF_writable),
                                        !!(flags & IOMMUF_readable));
-    if ( need_flush )
-        amd_iommu_flush_pages(d, gfn, 0);
-
+
+    /* Do not increase pde count if io mapping has not been changed */
+    if ( !need_flush )
+        goto out;
+
+    amd_iommu_flush_pages(d, gfn, 0);
+
+    for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+          merge_level <= hd->paging_mode; merge_level++ )
+    {
+        if ( pt_mfn[merge_level] == 0 )
+            break;
+        if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
+                                     gfn, mfn, merge_level) )
+            break;
+        /* Deallocate lower level page table */
+        free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
+
+        if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn, 
+                               flags, merge_level) )
+        {
+            spin_unlock(&hd->mapping_lock);
+            AMD_IOMMU_DEBUG("Merge iommu page failed at level %d, "
+                            "gfn = %lx mfn = %lx\n", merge_level, gfn, mfn);
+            domain_crash(d);
+            return -EFAULT;
+        }
+    }
+
+out:
     spin_unlock(&hd->mapping_lock);
     return 0;
 }
 
 int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
 {
-    u64 iommu_l2e;
+    unsigned long pt_mfn[7];
     struct hvm_iommu *hd = domain_hvm_iommu(d);
 
     BUG_ON( !hd->root_table );
 
     if ( iommu_hap_pt_share && is_hvm_domain(d) )
         return 0;
+
+    memset(pt_mfn, 0, sizeof(pt_mfn));
 
     spin_lock(&hd->mapping_lock);
 
@@ -635,15 +845,14 @@ int amd_iommu_unmap_page(struct domain *
     {
         if ( update_paging_mode(d, gfn) )
         {
+            spin_unlock(&hd->mapping_lock);
             AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
             domain_crash(d);
             return -EFAULT;
         }
     }
 
-    iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
-
-    if ( iommu_l2e == 0 )
+    if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
     {
         spin_unlock(&hd->mapping_lock);
         AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
@@ -652,7 +861,7 @@ int amd_iommu_unmap_page(struct domain *
     }
 
     /* mark PTE as 'page not present' */
-    clear_iommu_l1e_present(iommu_l2e, gfn);
+    clear_iommu_pte_present(pt_mfn[1], gfn);
     spin_unlock(&hd->mapping_lock);
 
     amd_iommu_flush_pages(d, gfn, 0);
diff -r 0f36c2eec2e1 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Jul 28 15:40:54 2011 +0100
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Aug 12 14:03:29 2011 +0200
@@ -237,7 +237,9 @@ static void __init amd_iommu_dom0_init(s
              * XXX Should we really map all non-RAM (above 4G)? Minimally
              * a pfn_valid() check would seem desirable here.
              */
-            amd_iommu_map_page(d, pfn, pfn, IOMMUF_readable|IOMMUF_writable);
+            if ( mfn_valid(pfn) )
+                amd_iommu_map_page(d, pfn, pfn, 
+                                   IOMMUF_readable|IOMMUF_writable);
         }
     }
 
@@ -333,7 +335,8 @@ static void deallocate_next_page_table(s
 {
     void *table_vaddr, *pde;
     u64 next_table_maddr;
-    int index;
+    int index, next_level, present;
+    u32 *entry;
 
     table_vaddr = __map_domain_page(pg);
 
@@ -343,7 +346,18 @@ static void deallocate_next_page_table(s
         {
             pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
             next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
-            if ( next_table_maddr != 0 )
+            entry = (u32*)pde;
+
+            next_level = get_field_from_reg_u32(entry[0],
+                                                IOMMU_PDE_NEXT_LEVEL_MASK,
+                                                IOMMU_PDE_NEXT_LEVEL_SHIFT);
+
+            present = get_field_from_reg_u32(entry[0],
+                                             IOMMU_PDE_PRESENT_MASK,
+                                             IOMMU_PDE_PRESENT_SHIFT);
+
+            if ( (next_table_maddr != 0) && (next_level != 0)
+                && present )
             {
                 deallocate_next_page_table(
                     maddr_to_page(next_table_maddr), level - 1);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-08-12 12:25 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-12 12:25 [PATCH] amd iommu: Automatic page coalescing Wei Wang2

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.