All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yu Zhang <yu.c.zhang@linux.intel.com>
To: qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Peter Xu <peterx@redhat.com>
Subject: [Qemu-devel] [PATCH v1 2/3] intel-iommu: extend VTD emulation to allow 57-bit IOVA address width.
Date: Fri,  9 Nov 2018 19:49:46 +0800	[thread overview]
Message-ID: <1541764187-10732-3-git-send-email-yu.c.zhang@linux.intel.com> (raw)
In-Reply-To: <1541764187-10732-1-git-send-email-yu.c.zhang@linux.intel.com>

A 5-level paging capable VM may choose to use 57-bit IOVA address width.
E.g. guest applications like DPDK prefer to use its VA as IOVA when
performing VFIO map/unmap operations, to avoid the burden of managing the
IOVA space.

This patch extends the current vIOMMU logic to cover the extended address
width. When creating a VM with 5-level paging feature, one can choose to
create a virtual VTD with 5-level paging capability, with configurations
like "-device intel-iommu,x-aw-bits=57".

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
---
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
---
 hw/i386/intel_iommu.c          | 54 ++++++++++++++++++++++++++++++++----------
 hw/i386/intel_iommu_internal.h |  6 +++++
 include/hw/i386/intel_iommu.h  |  1 +
 3 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index e772fca..9cdf755 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -664,16 +664,16 @@ static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
 
 /*
  * Rsvd field masks for spte:
- *     Index [1] to [4] 4k pages
- *     Index [5] to [8] large pages
+ *     Index [1] to [5] 4k pages
+ *     Index [6] to [10] large pages
  */
-static uint64_t vtd_paging_entry_rsvd_field[9];
+static uint64_t vtd_paging_entry_rsvd_field[11];
 
 static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
 {
     if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) {
         /* Maybe large page */
-        return slpte & vtd_paging_entry_rsvd_field[level + 4];
+        return slpte & vtd_paging_entry_rsvd_field[level + 5];
     } else {
         return slpte & vtd_paging_entry_rsvd_field[level];
     }
@@ -3125,6 +3125,9 @@ static void vtd_init(IntelIOMMUState *s)
     if (s->aw_bits == VTD_AW_48BIT) {
         s->cap |= VTD_CAP_SAGAW_48bit;
     }
+    else if (s->aw_bits == VTD_AW_57BIT) {
+        s->cap |= VTD_CAP_SAGAW_57bit | VTD_CAP_SAGAW_48bit;
+    }
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
     s->haw_bits = cpu->phys_bits;
 
@@ -3136,10 +3139,12 @@ static void vtd_init(IntelIOMMUState *s)
     vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->haw_bits);
     vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->haw_bits);
     vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->haw_bits);
-    vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[5] = VTD_SPTE_PAGE_L5_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[9] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[10] = VTD_SPTE_LPAGE_L5_RSVD_MASK(s->haw_bits);
 
     if (x86_iommu->intr_supported) {
         s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
@@ -3238,6 +3243,23 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &vtd_as->as;
 }
 
+static bool host_has_la57(void)
+{
+    uint32_t ecx, unused;
+
+    host_cpuid(7, 0, &unused, &unused, &ecx, &unused);
+    return ecx & CPUID_7_0_ECX_LA57;
+}
+
+static bool guest_has_la57(void)
+{
+    CPUState *cs = first_cpu;
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    return env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57;
+}
+
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
@@ -3264,11 +3286,19 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
         }
     }
 
-    /* Currently only address widths supported are 39 and 48 bits */
+    /* Currently address widths supported are 39, 48, and 57 bits */
     if ((s->aw_bits != VTD_AW_39BIT) &&
-        (s->aw_bits != VTD_AW_48BIT)) {
-        error_setg(errp, "Supported values for x-aw-bits are: %d, %d",
-                   VTD_AW_39BIT, VTD_AW_48BIT);
+        (s->aw_bits != VTD_AW_48BIT) &&
+        (s->aw_bits != VTD_AW_57BIT)) {
+        error_setg(errp, "Supported values for x-aw-bits are: %d, %d, %d",
+                   VTD_AW_39BIT, VTD_AW_48BIT, VTD_AW_57BIT);
+        return false;
+    }
+
+    if ((s->aw_bits == VTD_AW_57BIT) &&
+        !(host_has_la57() && guest_has_la57())) {
+        error_setg(errp, "Do not support 57-bit DMA address, unless both "
+                         "host and guest are capable of 5-level paging.\n");
         return false;
     }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index d084099..a7ef24b 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -212,6 +212,8 @@
 #define VTD_CAP_SAGAW_39bit         (0x2ULL << VTD_CAP_SAGAW_SHIFT)
  /* 48-bit AGAW, 4-level page-table */
 #define VTD_CAP_SAGAW_48bit         (0x4ULL << VTD_CAP_SAGAW_SHIFT)
+ /* 57-bit AGAW, 5-level page-table */
+#define VTD_CAP_SAGAW_57bit         (0x8ULL << VTD_CAP_SAGAW_SHIFT)
 
 /* IQT_REG */
 #define VTD_IQT_QT(val)             (((val) >> 4) & 0x7fffULL)
@@ -379,6 +381,8 @@ typedef union VTDInvDesc VTDInvDesc;
         (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \
         (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L5_RSVD_MASK(aw) \
+        (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \
         (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \
@@ -387,6 +391,8 @@ typedef union VTDInvDesc VTDInvDesc;
         (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 #define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \
         (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L5_RSVD_MASK(aw) \
+        (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
 
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 820451c..7474c4f 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -49,6 +49,7 @@
 #define DMAR_REG_SIZE               0x230
 #define VTD_AW_39BIT                39
 #define VTD_AW_48BIT                48
+#define VTD_AW_57BIT                57
 #define VTD_ADDRESS_WIDTH           VTD_AW_39BIT
 #define VTD_HAW_MASK(aw)            ((1ULL << (aw)) - 1)
 
-- 
1.9.1

  parent reply	other threads:[~2018-11-09 11:51 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-09 11:49 [Qemu-devel] [PATCH v1 0/3] intel-iommu: add support for 5-level virtual IOMMU Yu Zhang
2018-11-09 11:49 ` [Qemu-devel] [PATCH v1 1/3] intel-iommu: differentiate host address width from IOVA address width Yu Zhang
2018-11-12  8:15   ` Peter Xu
2018-11-12  9:28     ` Yu Zhang
2018-11-09 11:49 ` Yu Zhang [this message]
2018-11-12  8:36   ` [Qemu-devel] [PATCH v1 2/3] intel-iommu: extend VTD emulation to allow 57-bit " Peter Xu
2018-11-12  9:42     ` Yu Zhang
2018-11-13  3:37       ` Peter Xu
2018-11-13  5:04         ` Peter Xu
2018-11-13  5:45           ` Yu Zhang
2018-11-13  6:12             ` Peter Xu
2018-11-13  6:59               ` Yu Zhang
2018-11-13  5:41         ` Yu Zhang
2018-11-09 11:49 ` [Qemu-devel] [PATCH v1 3/3] intel-iommu: search iotlb for levels supported by the " Yu Zhang
2018-11-12  8:51   ` Peter Xu
2018-11-12  9:25     ` Yu Zhang
2018-11-12  9:36       ` Peter Xu
2018-11-12 12:38         ` Yu Zhang
2018-11-13  5:18           ` Peter Xu
2018-11-13  5:53             ` Yu Zhang
2018-11-09 22:32 ` [Qemu-devel] [PATCH v1 0/3] intel-iommu: add support for 5-level virtual IOMMU no-reply
2018-11-12  8:53 ` Peter Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1541764187-10732-3-git-send-email-yu.c.zhang@linux.intel.com \
    --to=yu.c.zhang@linux.intel.com \
    --cc=ehabkost@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.