qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Yu Zhang <yu.c.zhang@linux.intel.com>
To: qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	Igor Mammedov <imammedo@redhat.com>,
	Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Peter Xu <peterx@redhat.com>
Subject: [Qemu-devel] [PATCH v3 1/2] intel-iommu: differentiate host address width from IOVA address width.
Date: Wed, 12 Dec 2018 21:05:38 +0800	[thread overview]
Message-ID: <1544619939-18102-2-git-send-email-yu.c.zhang@linux.intel.com> (raw)
In-Reply-To: <1544619939-18102-1-git-send-email-yu.c.zhang@linux.intel.com>

Currently, vIOMMU is using the value of IOVA address width, instead of
the host address width(HAW) to calculate the number of reserved bits in
data structures such as root entries, context entries, and entries of
DMA paging structures etc.

However values of IOVA address width and of the HAW may not equal. For
example, a 48-bit IOVA can only be mapped to host addresses no wider than
46 bits. Using 48, instead of 46 to calculate the reserved bit may result
in an invalid IOVA being accepted.

To fix this, a new field - haw_bits is introduced in struct IntelIOMMUState,
whose value is initialized based on the maximum physical address set to
guest CPU. Also, definitions such as VTD_HOST_AW_39/48BIT etc. are renamed
to clarify.

Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
---
Cc: "Michael S. Tsirkin" <mst@redhat.com> 
Cc: Igor Mammedov <imammedo@redhat.com> 
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com> 
Cc: Richard Henderson <rth@twiddle.net> 
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
---
 hw/i386/acpi-build.c          |  2 +-
 hw/i386/intel_iommu.c         | 55 ++++++++++++++++++++++++-------------------
 include/hw/i386/intel_iommu.h |  9 +++----
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 236a20e..b989523 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2431,7 +2431,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
     }
 
     dmar = acpi_data_push(table_data, sizeof(*dmar));
-    dmar->host_address_width = intel_iommu->aw_bits - 1;
+    dmar->host_address_width = intel_iommu->haw_bits - 1;
     dmar->flags = dmar_flags;
 
     /* DMAR Remapping Hardware Unit Definition structure */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d97bcbc..0e88c63 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -707,7 +707,8 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
  */
 static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
                              uint64_t *slptep, uint32_t *slpte_level,
-                             bool *reads, bool *writes, uint8_t aw_bits)
+                             bool *reads, bool *writes, uint8_t aw_bits,
+                             uint8_t haw_bits)
 {
     dma_addr_t addr = vtd_ce_get_slpt_base(ce);
     uint32_t level = vtd_ce_get_level(ce);
@@ -760,7 +761,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
             *slpte_level = level;
             return 0;
         }
-        addr = vtd_get_slpte_addr(slpte, aw_bits);
+        addr = vtd_get_slpte_addr(slpte, haw_bits);
         level--;
     }
 }
@@ -783,6 +784,7 @@ typedef struct {
     void *private;
     bool notify_unmap;
     uint8_t aw;
+    uint8_t haw;
     uint16_t domain_id;
 } vtd_page_walk_info;
 
@@ -925,7 +927,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
              * This is a valid PDE (or even bigger than PDE).  We need
              * to walk one further level.
              */
-            ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw),
+            ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->haw),
                                       iova, MIN(iova_next, end), level - 1,
                                       read_cur, write_cur, info);
         } else {
@@ -942,7 +944,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
             entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
             entry.addr_mask = ~subpage_mask;
             /* NOTE: this is only meaningful if entry_valid == true */
-            entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
+            entry.translated_addr = vtd_get_slpte_addr(slpte, info->haw);
             ret = vtd_page_walk_one(&entry, info);
         }
 
@@ -1002,7 +1004,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
         return -VTD_FR_ROOT_ENTRY_P;
     }
 
-    if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
+    if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->haw_bits))) {
         trace_vtd_re_invalid(re.rsvd, re.val);
         return -VTD_FR_ROOT_ENTRY_RSVD;
     }
@@ -1019,7 +1021,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
     }
 
     if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
-               (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
+               (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->haw_bits))) {
         trace_vtd_ce_invalid(ce->hi, ce->lo);
         return -VTD_FR_CONTEXT_ENTRY_RSVD;
     }
@@ -1056,6 +1058,7 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
         .private = (void *)&vtd_as->iommu,
         .notify_unmap = true,
         .aw = s->aw_bits,
+        .haw = s->haw_bits,
         .as = vtd_as,
         .domain_id = VTD_CONTEXT_ENTRY_DID(ce->hi),
     };
@@ -1360,7 +1363,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
     }
 
     ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
-                               &reads, &writes, s->aw_bits);
+                               &reads, &writes, s->aw_bits, s->haw_bits);
     if (ret_fr) {
         ret_fr = -ret_fr;
         if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
@@ -1378,7 +1381,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
 out:
     vtd_iommu_unlock(s);
     entry->iova = addr & page_mask;
-    entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
+    entry->translated_addr = vtd_get_slpte_addr(slpte, s->haw_bits) & page_mask;
     entry->addr_mask = ~page_mask;
     entry->perm = access_flags;
     return true;
@@ -1396,7 +1399,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s)
 {
     s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
     s->root_extended = s->root & VTD_RTADDR_RTT;
-    s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits);
+    s->root &= VTD_RTADDR_ADDR_MASK(s->haw_bits);
 
     trace_vtd_reg_dmar_root(s->root, s->root_extended);
 }
@@ -1412,7 +1415,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
     uint64_t value = 0;
     value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
     s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
-    s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits);
+    s->intr_root = value & VTD_IRTA_ADDR_MASK(s->haw_bits);
     s->intr_eime = value & VTD_IRTA_EIME;
 
     /* Notify global invalidation */
@@ -1689,7 +1692,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
     trace_vtd_inv_qi_enable(en);
 
     if (en) {
-        s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits);
+        s->iq = iqa_val & VTD_IQA_IQA_MASK(s->haw_bits);
         /* 2^(x+8) entries */
         s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
         s->qi_enabled = true;
@@ -2629,7 +2632,7 @@ static Property vtd_properties[] = {
                             ON_OFF_AUTO_AUTO),
     DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
     DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits,
-                      VTD_HOST_ADDRESS_WIDTH),
+                      VTD_ADDRESS_WIDTH),
     DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -3080,6 +3083,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
                 .private = (void *)n,
                 .notify_unmap = false,
                 .aw = s->aw_bits,
+                .haw = s->haw_bits,
                 .as = vtd_as,
                 .domain_id = VTD_CONTEXT_ENTRY_DID(ce.hi),
             };
@@ -3100,6 +3104,8 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
 static void vtd_init(IntelIOMMUState *s)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+    CPUState *cs = first_cpu;
+    X86CPU *cpu = X86_CPU(cs);
 
     memset(s->csr, 0, DMAR_REG_SIZE);
     memset(s->wmask, 0, DMAR_REG_SIZE);
@@ -3119,23 +3125,24 @@ static void vtd_init(IntelIOMMUState *s)
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
              VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
              VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
-    if (s->aw_bits == VTD_HOST_AW_48BIT) {
+    if (s->aw_bits == VTD_AW_48BIT) {
         s->cap |= VTD_CAP_SAGAW_48bit;
     }
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
+    s->haw_bits = cpu->phys_bits;
 
     /*
      * Rsvd field masks for spte
      */
     vtd_paging_entry_rsvd_field[0] = ~0ULL;
-    vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
-    vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits);
+    vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->haw_bits);
+    vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->haw_bits);
 
     if (x86_iommu->intr_supported) {
         s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
@@ -3261,10 +3268,10 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
     }
 
     /* Currently only address widths supported are 39 and 48 bits */
-    if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
-        (s->aw_bits != VTD_HOST_AW_48BIT)) {
+    if ((s->aw_bits != VTD_AW_39BIT) &&
+        (s->aw_bits != VTD_AW_48BIT)) {
         error_setg(errp, "Supported values for x-aw-bits are: %d, %d",
-                   VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
+                   VTD_AW_39BIT, VTD_AW_48BIT);
         return false;
     }
 
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index ed4e758..820451c 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -47,9 +47,9 @@
 #define VTD_SID_TO_DEVFN(sid)       ((sid) & 0xff)
 
 #define DMAR_REG_SIZE               0x230
-#define VTD_HOST_AW_39BIT           39
-#define VTD_HOST_AW_48BIT           48
-#define VTD_HOST_ADDRESS_WIDTH      VTD_HOST_AW_39BIT
+#define VTD_AW_39BIT                39
+#define VTD_AW_48BIT                48
+#define VTD_ADDRESS_WIDTH           VTD_AW_39BIT
 #define VTD_HAW_MASK(aw)            ((1ULL << (aw)) - 1)
 
 #define DMAR_REPORT_F_INTR          (1)
@@ -244,7 +244,8 @@ struct IntelIOMMUState {
     bool intr_eime;                 /* Extended interrupt mode enabled */
     OnOffAuto intr_eim;             /* Toggle for EIM cabability */
     bool buggy_eim;                 /* Force buggy EIM unless eim=off */
-    uint8_t aw_bits;                /* Host/IOVA address width (in bits) */
+    uint8_t aw_bits;                /* IOVA address width (in bits) */
+    uint8_t haw_bits;               /* Hardware address width (in bits) */
 
     /*
      * Protects IOMMU states in general.  Currently it protects the
-- 
1.9.1

  reply	other threads:[~2018-12-12 13:08 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-12 13:05 [Qemu-devel] [PATCH v3 0/2] intel-iommu: add support for 5-level virtual IOMMU Yu Zhang
2018-12-12 13:05 ` Yu Zhang [this message]
2018-12-17 13:17   ` [Qemu-devel] [PATCH v3 1/2] intel-iommu: differentiate host address width from IOVA address width Igor Mammedov
2018-12-18  9:27     ` Yu Zhang
2018-12-18 14:23       ` Michael S. Tsirkin
2018-12-18 14:55       ` Igor Mammedov
2018-12-18 14:58         ` Michael S. Tsirkin
2018-12-19  3:03           ` Yu Zhang
2018-12-19  3:12             ` Michael S. Tsirkin
2018-12-19  6:28               ` Yu Zhang
2018-12-19 15:30                 ` Michael S. Tsirkin
2018-12-19  2:57         ` Yu Zhang
2018-12-19 10:40           ` Igor Mammedov
2018-12-19 16:47             ` Michael S. Tsirkin
2018-12-20  5:59               ` Yu Zhang
2018-12-20 21:18             ` Eduardo Habkost
2018-12-21 14:13               ` Igor Mammedov
2018-12-21 16:09                 ` Yu Zhang
2018-12-21 17:04                   ` Michael S. Tsirkin
2018-12-21 17:37                     ` Yu Zhang
2018-12-21 19:02                       ` Michael S. Tsirkin
2018-12-21 20:01                         ` Eduardo Habkost
2018-12-22  1:11                         ` Yu Zhang
2018-12-25 16:56                           ` Michael S. Tsirkin
2018-12-26  5:30                             ` Yu Zhang
2018-12-27 15:14                               ` Eduardo Habkost
2018-12-28  2:32                                 ` Yu Zhang
2018-12-29  1:29                                   ` Eduardo Habkost
2019-01-15  7:13                                     ` Yu Zhang
2019-01-18  7:10                                       ` Yu Zhang
2018-12-27 14:54                 ` Eduardo Habkost
2018-12-28 11:42                   ` Igor Mammedov
2018-12-20 20:58       ` Eduardo Habkost
2018-12-12 13:05 ` [Qemu-devel] [PATCH v3 2/2] intel-iommu: extend VTD emulation to allow 57-bit " Yu Zhang
2018-12-17 13:29   ` Igor Mammedov
2018-12-18  9:47     ` Yu Zhang
2018-12-18 10:01       ` Yu Zhang
2018-12-18 12:43         ` Michael S. Tsirkin
2018-12-18 13:45           ` Yu Zhang
2018-12-18 14:49             ` Michael S. Tsirkin
2018-12-19  3:40               ` Yu Zhang
2018-12-19  4:35                 ` Michael S. Tsirkin
2018-12-19  5:57                   ` Yu Zhang
2018-12-19 15:23                     ` Michael S. Tsirkin
2018-12-20  5:49                       ` Yu Zhang
2018-12-20 18:28                         ` Michael S. Tsirkin
2018-12-21 16:19                           ` Yu Zhang
2018-12-21 17:15                             ` Michael S. Tsirkin
2018-12-21 17:34                               ` Yu Zhang
2018-12-21 18:10                                 ` Michael S. Tsirkin
2018-12-22  0:41                                   ` Yu Zhang
2018-12-25 17:00                                     ` Michael S. Tsirkin
2018-12-26  5:58                                       ` Yu Zhang
2018-12-25  1:59                                 ` Tian, Kevin
2018-12-14  9:17 ` [Qemu-devel] [PATCH v3 0/2] intel-iommu: add support for 5-level virtual IOMMU Yu Zhang
2019-01-15  4:02 ` Michael S. Tsirkin
2019-01-15  7:27   ` Yu Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1544619939-18102-2-git-send-email-yu.c.zhang@linux.intel.com \
    --to=yu.c.zhang@linux.intel.com \
    --cc=ehabkost@redhat.com \
    --cc=imammedo@redhat.com \
    --cc=marcel.apfelbaum@gmail.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).