Re: [Qemu-devel] [PATCH] intel_iommu: large page support

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: "Michael S. Tsirkin" <mst@redhat.com>
To: Jason Wang <jasowang@redhat.com>
Cc: qemu-devel@nongnu.org, pbonzini@redhat.com, ehabkost@redhat.com,
	rth@twiddle.net
Subject: Re: [Qemu-devel] [PATCH] intel_iommu: large page support
Date: Thu, 14 Jan 2016 11:28:10 +0200	[thread overview]
Message-ID: <20160114112703-mutt-send-email-mst@redhat.com> (raw)
In-Reply-To: <1452750444-17750-1-git-send-email-jasowang@redhat.com>

On Thu, Jan 14, 2016 at 12:47:24AM -0500, Jason Wang wrote:
> Current intel_iommu only supports 4K page which may not be sufficient
> to cover guest working set. This patch tries to enable 2M and 1G mapping
> for intel_iommu. This is also useful for future device IOTLB
> implementation to have a better hit rate.
> 
> Major work is adding a page mask field on IOTLB entry to make it
> support large page. And also use the slpte level as key to do IOTLB
> lookup. MAMV was increased to 18 to support direct invalidation for 1G
> mapping.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Cc: Eduardo Habkost <ehabkost@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Looks good, thanks!

I was going to comment that changes such as MAMV would
have to be versioned, when I noticed that this device
is unmigrateable ATM.

So no issue, but we do need to fix migration for it.

> ---
> Test was done by virtio-net-pmd/vfio with 2M or 1G mapping in guest.
> ---
>  hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++------------
>  hw/i386/intel_iommu_internal.h |  6 ++--
>  include/hw/i386/intel_iommu.h  |  1 +
>  3 files changed, 59 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 3fe27fa..68940a0 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -152,14 +152,27 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
>      return entry->domain_id == domain_id;
>  }
>  
> +/* The shift of an addr for a certain level of paging structure */
> +static inline uint32_t vtd_slpt_level_shift(uint32_t level)
> +{
> +    return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
> +}
> +
> +static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
> +{
> +    return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
> +}
> +
>  static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
>                                          gpointer user_data)
>  {
>      VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
>      VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
> -    uint64_t gfn = info->gfn & info->mask;
> +    uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
> +    uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
>      return (entry->domain_id == info->domain_id) &&
> -            ((entry->gfn & info->mask) == gfn);
> +            (((entry->gfn & info->mask) == gfn) ||
> +             (entry->gfn == gfn_tlb));
>  }
>  
>  /* Reset all the gen of VTDAddressSpace to zero and set the gen of
> @@ -193,24 +206,46 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
>      g_hash_table_remove_all(s->iotlb);
>  }
>  
> +static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id,
> +                                  uint32_t level)
> +{
> +    return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
> +           ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
> +}
> +
> +static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
> +{
> +    return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
> +}
> +
>  static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
>                                         hwaddr addr)
>  {
> +    VTDIOTLBEntry *entry;
>      uint64_t key;
> +    int level;
> +
> +    for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
> +        key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
> +                                source_id, level);
> +        entry = g_hash_table_lookup(s->iotlb, &key);
> +        if (entry) {
> +            goto out;
> +        }
> +    }
>  
> -    key = (addr >> VTD_PAGE_SHIFT_4K) |
> -           ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
> -    return g_hash_table_lookup(s->iotlb, &key);
> -
> +out:
> +    return entry;
>  }
>  
>  static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>                               uint16_t domain_id, hwaddr addr, uint64_t slpte,
> -                             bool read_flags, bool write_flags)
> +                             bool read_flags, bool write_flags,
> +                             uint32_t level)
>  {
>      VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
>      uint64_t *key = g_malloc(sizeof(*key));
> -    uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K;
> +    uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
>  
>      VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
>                  " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
> @@ -225,7 +260,8 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
>      entry->slpte = slpte;
>      entry->read_flags = read_flags;
>      entry->write_flags = write_flags;
> -    *key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
> +    entry->mask = vtd_slpt_level_page_mask(level);
> +    *key = vtd_get_iotlb_key(gfn, source_id, level);
>      g_hash_table_replace(s->iotlb, key, entry);
>  }
>  
> @@ -500,12 +536,6 @@ static inline dma_addr_t vtd_get_slpt_base_from_context(VTDContextEntry *ce)
>      return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
>  }
>  
> -/* The shift of an addr for a certain level of paging structure */
> -static inline uint32_t vtd_slpt_level_shift(uint32_t level)
> -{
> -    return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
> -}
> -
>  static inline uint64_t vtd_get_slpte_addr(uint64_t slpte)
>  {
>      return slpte & VTD_SL_PT_BASE_ADDR_MASK;
> @@ -761,7 +791,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>      VTDContextEntry ce;
>      uint8_t bus_num = pci_bus_num(bus);
>      VTDContextCacheEntry *cc_entry = &vtd_as->context_cache_entry;
> -    uint64_t slpte;
> +    uint64_t slpte, page_mask;
>      uint32_t level;
>      uint16_t source_id = vtd_make_source_id(bus_num, devfn);
>      int ret_fr;
> @@ -801,6 +831,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          slpte = iotlb_entry->slpte;
>          reads = iotlb_entry->read_flags;
>          writes = iotlb_entry->write_flags;
> +        page_mask = iotlb_entry->mask;
>          goto out;
>      }
>      /* Try to fetch context-entry from cache first */
> @@ -847,12 +878,13 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
>          return;
>      }
>  
> +    page_mask = vtd_slpt_level_page_mask(level);
>      vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
> -                     reads, writes);
> +                     reads, writes, level);
>  out:
> -    entry->iova = addr & VTD_PAGE_MASK_4K;
> -    entry->translated_addr = vtd_get_slpte_addr(slpte) & VTD_PAGE_MASK_4K;
> -    entry->addr_mask = ~VTD_PAGE_MASK_4K;
> +    entry->iova = addr & page_mask;
> +    entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask;
> +    entry->addr_mask = ~page_mask;
>      entry->perm = (writes ? 2 : 0) + (reads ? 1 : 0);
>  }
>  
> @@ -990,7 +1022,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
>  
>      assert(am <= VTD_MAMV);
>      info.domain_id = domain_id;
> -    info.gfn = addr >> VTD_PAGE_SHIFT_4K;
> +    info.addr = addr;
>      info.mask = ~((1 << am) - 1);
>      g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
>  }
> @@ -1916,7 +1948,7 @@ static void vtd_init(IntelIOMMUState *s)
>      s->iq_last_desc_type = VTD_INV_DESC_NONE;
>      s->next_frcd_reg = 0;
>      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
> -             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI;
> +             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
>      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
>  
>      vtd_reset_context_cache(s);
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index ba288ab..e5f514c 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -113,6 +113,7 @@
>  
>  /* The shift of source_id in the key of IOTLB hash table */
>  #define VTD_IOTLB_SID_SHIFT         36
> +#define VTD_IOTLB_LVL_SHIFT         44
>  #define VTD_IOTLB_MAX_SIZE          1024    /* Max size of the hash table */
>  
>  /* IOTLB_REG */
> @@ -185,9 +186,10 @@
>  #define VTD_CAP_ND                  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
>  #define VTD_MGAW                    39  /* Maximum Guest Address Width */
>  #define VTD_CAP_MGAW                (((VTD_MGAW - 1) & 0x3fULL) << 16)
> -#define VTD_MAMV                    9ULL
> +#define VTD_MAMV                    18ULL
>  #define VTD_CAP_MAMV                (VTD_MAMV << 48)
>  #define VTD_CAP_PSI                 (1ULL << 39)
> +#define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
>  
>  /* Supported Adjusted Guest Address Widths */
>  #define VTD_CAP_SAGAW_SHIFT         8
> @@ -320,7 +322,7 @@ typedef struct VTDInvDesc VTDInvDesc;
>  /* Information about page-selective IOTLB invalidate */
>  struct VTDIOTLBPageInvInfo {
>      uint16_t domain_id;
> -    uint64_t gfn;
> +    uint64_t addr;
>      uint8_t mask;
>  };
>  typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 5dbadb7..b024ffa 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -83,6 +83,7 @@ struct VTDIOTLBEntry {
>      uint64_t gfn;
>      uint16_t domain_id;
>      uint64_t slpte;
> +    uint64_t mask;
>      bool read_flags;
>      bool write_flags;
>  };
> -- 
> 1.8.3.1

next prev parent reply	other threads:[~2016-01-14  9:28 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-14  5:47 [Qemu-devel] [PATCH] intel_iommu: large page support Jason Wang
2016-01-14  9:28 ` Michael S. Tsirkin [this message]
2016-01-15  3:15   ` Jason Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160114112703-mutt-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).