qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Yu Zhang <yu.c.zhang@linux.intel.com>
To: qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Peter Xu <peterx@redhat.com>
Subject: [Qemu-devel] [PATCH v1 2/3] intel-iommu: extend VTD emulation to allow 57-bit IOVA address width.
Date: Fri,  9 Nov 2018 19:49:46 +0800	[thread overview]
Message-ID: <1541764187-10732-3-git-send-email-yu.c.zhang@linux.intel.com> (raw)
In-Reply-To: <1541764187-10732-1-git-send-email-yu.c.zhang@linux.intel.com>

A 5-level paging capable VM may choose to use 57-bit IOVA address width.
E.g. guest applications like DPDK prefer to use its VA as IOVA when
performing VFIO map/unmap operations, to avoid the burden of managing the
IOVA space.

This patch extends the current vIOMMU logic to cover the extended address
width. When creating a VM with 5-level paging feature, one can choose to
create a virtual VTD with 5-level paging capability, with configurations
like "-device intel-iommu,x-aw-bits=57".

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
---
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c          | 54 ++++++++++++++++++++++++++++++++----------
 hw/i386/intel_iommu_internal.h |  6 +++++
 include/hw/i386/intel_iommu.h  |  1 +
 3 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e772fca..9cdf755 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -664,16 +664,16 @@ static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
 
 /*
  * Rsvd field masks for spte:
- *     Index [1] to [4] 4k pages
- *     Index [5] to [8] large pages
+ *     Index [1] to [5] 4k pages
+ *     Index [6] to [10] large pages
  */
-static uint64_t vtd_paging_entry_rsvd_field[9];
+static uint64_t vtd_paging_entry_rsvd_field[11];
 
 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
 {
     if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) {
         /* Maybe large page */
-        return slpte & vtd_paging_entry_rsvd_field[level + 4];
+        return slpte & vtd_paging_entry_rsvd_field[level + 5];
     } else {
         return slpte & vtd_paging_entry_rsvd_field[level];
     }
@@ -3125,6 +3125,9 @@ static void vtd_init(IntelIOMMUState *s)
     if (s->aw_bits == VTD_AW_48BIT) {
         s->cap |= VTD_CAP_SAGAW_48bit;
     }
+    else if (s->aw_bits == VTD_AW_57BIT) {
+        s->cap |= VTD_CAP_SAGAW_57bit | VTD_CAP_SAGAW_48bit;
+    }
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
     s->haw_bits = cpu->phys_bits;
 
@@ -3136,10 +3139,12 @@ static void vtd_init(IntelIOMMUState *s)
     vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->haw_bits);
     vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->haw_bits);
     vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[5] = VTD_SPTE_PAGE_L5_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[9] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[10] = VTD_SPTE_LPAGE_L5_RSVD_MASK(s->haw_bits);
 
     if (x86_iommu->intr_supported) {
         s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
@@ -3238,6 +3243,23 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &vtd_as->as;
 }
 
+static bool host_has_la57(void)
+{
+    uint32_t ecx, unused;
+
+    host_cpuid(7, 0, &unused, &unused, &ecx, &unused);
+    return ecx & CPUID_7_0_ECX_LA57;
+}
+
+static bool guest_has_la57(void)
+{
+    CPUState *cs = first_cpu;
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    return env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57;
+}
+
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -3264,11 +3286,19 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
         }
     }
 
-    /* Currently only address widths supported are 39 and 48 bits */
+    /* Currently address widths supported are 39, 48, and 57 bits */
     if ((s->aw_bits != VTD_AW_39BIT) &&
-        (s->aw_bits != VTD_AW_48BIT)) {
-        error_setg(errp, "Supported values for x-aw-bits are: %d, %d",
-                   VTD_AW_39BIT, VTD_AW_48BIT);
+        (s->aw_bits != VTD_AW_48BIT) &&
+        (s->aw_bits != VTD_AW_57BIT)) {
+        error_setg(errp, "Supported values for x-aw-bits are: %d, %d, %d",
+                   VTD_AW_39BIT, VTD_AW_48BIT, VTD_AW_57BIT);
+        return false;
+    }
+
+    if ((s->aw_bits == VTD_AW_57BIT) &&
+        !(host_has_la57() && guest_has_la57())) {
+        error_setg(errp, "Do not support 57-bit DMA address, unless both "
+                         "host and guest are capable of 5-level paging.\n");
         return false;
     }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d084099..a7ef24b 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -212,6 +212,8 @@
 #define VTD_CAP_SAGAW_39bit         (0x2ULL << VTD_CAP_SAGAW_SHIFT)
  /* 48-bit AGAW, 4-level page-table */
 #define VTD_CAP_SAGAW_48bit         (0x4ULL << VTD_CAP_SAGAW_SHIFT)
+ /* 57-bit AGAW, 5-level page-table */
+#define VTD_CAP_SAGAW_57bit         (0x8ULL << VTD_CAP_SAGAW_SHIFT)
 
 /* IQT_REG */
 #define VTD_IQT_QT(val)             (((val) >> 4) & 0x7fffULL)
@@ -379,6 +381,8 @@ typedef union VTDInvDesc VTDInvDesc;
         (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \
         (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L5_RSVD_MASK(aw) \
+        (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \
         (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \
@@ -387,6 +391,8 @@ typedef union VTDInvDesc VTDInvDesc;
         (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \
         (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L5_RSVD_MASK(aw) \
+        (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 820451c..7474c4f 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -49,6 +49,7 @@
 #define DMAR_REG_SIZE               0x230
 #define VTD_AW_39BIT                39
 #define VTD_AW_48BIT                48
+#define VTD_AW_57BIT                57
 #define VTD_ADDRESS_WIDTH           VTD_AW_39BIT
 #define VTD_HAW_MASK(aw)            ((1ULL << (aw)) - 1)
 
-- 
1.9.1

  parent reply	other threads:[~2018-11-09 11:51 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-09 11:49 [Qemu-devel] [PATCH v1 0/3] intel-iommu: add support for 5-level virtual IOMMU Yu Zhang
2018-11-09 11:49 ` [Qemu-devel] [PATCH v1 1/3] intel-iommu: differentiate host address width from IOVA address width Yu Zhang
2018-11-12  8:15   ` Peter Xu
2018-11-12  9:28     ` Yu Zhang
2018-11-09 11:49 ` Yu Zhang [this message]
2018-11-12  8:36   ` [Qemu-devel] [PATCH v1 2/3] intel-iommu: extend VTD emulation to allow 57-bit " Peter Xu
2018-11-12  9:42     ` Yu Zhang
2018-11-13  3:37       ` Peter Xu
2018-11-13  5:04         ` Peter Xu
2018-11-13  5:45           ` Yu Zhang
2018-11-13  6:12             ` Peter Xu
2018-11-13  6:59               ` Yu Zhang
2018-11-13  5:41         ` Yu Zhang
2018-11-09 11:49 ` [Qemu-devel] [PATCH v1 3/3] intel-iommu: search iotlb for levels supported by the " Yu Zhang
2018-11-12  8:51   ` Peter Xu
2018-11-12  9:25     ` Yu Zhang
2018-11-12  9:36       ` Peter Xu
2018-11-12 12:38         ` Yu Zhang
2018-11-13  5:18           ` Peter Xu
2018-11-13  5:53             ` Yu Zhang
2018-11-09 22:32 ` [Qemu-devel] [PATCH v1 0/3] intel-iommu: add support for 5-level virtual IOMMU no-reply
2018-11-12  8:53 ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1541764187-10732-3-git-send-email-yu.c.zhang@linux.intel.com \
    --to=yu.c.zhang@linux.intel.com \
    --cc=ehabkost@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).