All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Auger <eric.auger@redhat.com>
To: Zhenzhong Duan <zhenzhong.duan@intel.com>, qemu-devel@nongnu.org
Cc: alex@shazbot.org, clg@redhat.com, mst@redhat.com,
	jasowang@redhat.com, peterx@redhat.com, ddutile@redhat.com,
	jgg@nvidia.com, nicolinc@nvidia.com, skolothumtho@nvidia.com,
	joao.m.martins@oracle.com, clement.mathieu--drif@eviden.com,
	kevin.tian@intel.com, yi.l.liu@intel.com, chao.p.peng@intel.com,
	Yi Sun <yi.y.sun@linux.intel.com>
Subject: Re: [PATCH v10 13/19] intel_iommu_accel: Bind/unbind guest page table to host
Date: Tue, 6 Jan 2026 08:46:14 +0100	[thread overview]
Message-ID: <47e8dccf-ae2c-40ba-b6ce-96fde1da7610@redhat.com> (raw)
In-Reply-To: <20260106061304.314546-14-zhenzhong.duan@intel.com>



On 1/6/26 7:12 AM, Zhenzhong Duan wrote:
> This captures the guest PASID table entry modifications and propagates
> the changes to host to attach a hwpt with type determined per guest IOMMU
> PGTT configuration.
>
> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
> coming from VFIO.
>
> Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>

Reviewed-by: Eric Auger <eric.auger@redhat.com> Thanks Eric


> ---
>  hw/i386/intel_iommu_accel.h   |   7 +++
>  include/hw/i386/intel_iommu.h |   2 +
>  hw/i386/intel_iommu.c         |  22 ++++++-
>  hw/i386/intel_iommu_accel.c   | 114 ++++++++++++++++++++++++++++++++++
>  hw/i386/trace-events          |   3 +
>  5 files changed, 145 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index d049cab3e1..82821ec0ef 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -16,6 +16,7 @@
>  bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
>                            Error **errp);
>  VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as);
> +bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>  #else
>  static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>                                          VTDHostIOMMUDevice *vtd_hiod,
> @@ -30,5 +31,11 @@ static inline VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
>  {
>      return NULL;
>  }
> +
> +static inline bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as,
> +                                             Error **errp)
> +{
> +    return true;
> +}
>  #endif
>  #endif
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 401322665a..6c61fd39c7 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -154,6 +154,8 @@ struct VTDAddressSpace {
>       * with the guest IOMMU pgtables for a device.
>       */
>      IOVATree *iova_tree;
> +
> +    uint32_t fs_hwpt_id;
>  };
>  
>  struct VTDIOTLBEntry {
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 9edd625b1a..f9b80e3257 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -87,7 +87,11 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>      g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>      while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>          VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> -        pc_entry->valid = false;
> +        if (pc_entry->valid) {
> +            pc_entry->valid = false;
> +            /* It's fatal to get failure during reset */
> +            vtd_propagate_guest_pasid(vtd_as, &error_fatal);
> +        }
>      }
>  }
>  
> @@ -3073,6 +3077,8 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>      VTDPASIDEntry pe;
>      IOMMUNotifier *n;
>      uint16_t did;
> +    const char *err_prefix = "Attaching to HWPT failed: ";
> +    Error *local_err = NULL;
>  
>      if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>          if (!pc_entry->valid) {
> @@ -3093,7 +3099,9 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>              vtd_address_space_unmap(vtd_as, n);
>          }
>          vtd_switch_address_space(vtd_as);
> -        return;
> +
> +        err_prefix = "Detaching from HWPT failed: ";
> +        goto do_bind_unbind;
>      }
>  
>      /*
> @@ -3121,12 +3129,20 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>      if (!pc_entry->valid) {
>          pc_entry->pasid_entry = pe;
>          pc_entry->valid = true;
> -    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Replacing HWPT attachment failed: ";
> +    } else {
>          return;
>      }
>  
>      vtd_switch_address_space(vtd_as);
>      vtd_address_space_sync(vtd_as);
> +
> +do_bind_unbind:
> +    /* TODO: Fault event injection into guest, report error to QEMU for now */
> +    if (!vtd_propagate_guest_pasid(vtd_as, &local_err)) {
> +        error_reportf_err(local_err, "%s", err_prefix);
> +    }
>  }
>  
>  static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index ebfc503d64..748a6c7e1c 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -13,6 +13,7 @@
>  #include "intel_iommu_internal.h"
>  #include "intel_iommu_accel.h"
>  #include "hw/pci/pci_bus.h"
> +#include "trace.h"
>  
>  bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
>                            Error **errp)
> @@ -68,3 +69,116 @@ VTDHostIOMMUDevice *vtd_find_hiod_iommufd(VTDAddressSpace *as)
>      }
>      return NULL;
>  }
> +
> +static bool vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                               VTDPASIDEntry *pe, uint32_t *fs_hwpt_id,
> +                               Error **errp)
> +{
> +    struct iommu_hwpt_vtd_s1 vtd = {};
> +
> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE(pe) ? IOMMU_VTD_S1_SRE : 0) |
> +                (VTD_SM_PASID_ENTRY_WPE(pe) ? IOMMU_VTD_S1_WPE : 0) |
> +                (VTD_SM_PASID_ENTRY_EAFE(pe) ? IOMMU_VTD_S1_EAFE : 0);
> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
> +
> +    return iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, idev->hwpt_id,
> +                                      0, IOMMU_HWPT_DATA_VTD_S1, sizeof(vtd),
> +                                      &vtd, fs_hwpt_id, errp);
> +}
> +
> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                                    VTDAddressSpace *vtd_as)
> +{
> +    if (!vtd_as->fs_hwpt_id) {
> +        return;
> +    }
> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt_id);
> +    vtd_as->fs_hwpt_id = 0;
> +}
> +
> +static bool vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                      VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
> +    uint32_t hwpt_id = idev->hwpt_id;
> +    bool ret;
> +
> +    /*
> +     * We can get here only if flts=on, the supported PGTT is FST or PT.
> +     * Catch invalid PGTT when processing invalidation request to avoid
> +     * attaching to wrong hwpt.
> +     */
> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
> +        error_setg(errp, "Invalid PGTT type %d",
> +                   (uint8_t)VTD_SM_PASID_ENTRY_PGTT(pe));
> +        return false;
> +    }
> +
> +    if (vtd_pe_pgtt_is_fst(pe)) {
> +        if (!vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
> +            return false;
> +        }
> +    }
> +
> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, ret);
> +    if (ret) {
> +        /* Destroy old fs_hwpt if it's a replacement */
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +        if (vtd_pe_pgtt_is_fst(pe)) {
> +            vtd_as->fs_hwpt_id = hwpt_id;
> +        }
> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
> +    }
> +
> +    return ret;
> +}
> +
> +static bool vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                      VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    uint32_t pasid = vtd_as->pasid;
> +    bool ret;
> +
> +    if (s->dmar_enabled && s->root_scalable) {
> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, ret);
> +    } else {
> +        /*
> +         * If DMAR remapping is disabled or guest switches to legacy mode,
> +         * we fallback to the default HWPT which contains shadow page table.
> +         * So guest DMA could still work.
> +         */
> +        ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
> +                                           ret);
> +    }
> +
> +    if (ret) {
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +    }
> +
> +    return ret;
> +}
> +
> +bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> +    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
> +
> +    /* Ignore emulated device or legacy VFIO backed device */
> +    if (!vtd_as->iommu_state->fsts || !vtd_hiod) {
> +        return true;
> +    }
> +
> +    if (pc_entry->valid) {
> +        return vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
> +    }
> +
> +    return vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
> +}
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index b704f4f90c..5a3ee1cf64 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -73,6 +73,9 @@ vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>  vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
>  vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
>  vtd_reset_exit(void) ""
> +vtd_device_attach_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
> +vtd_device_detach_hwpt(uint32_t dev_id, uint32_t pasid, int ret) "dev_id %d pasid %d ret: %d"
> +vtd_device_reattach_def_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
>  
>  # amd_iommu.c
>  amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32



  reply	other threads:[~2026-01-06  7:46 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-06  6:12 [PATCH v10 00/19] intel_iommu: Enable first stage translation for passthrough device Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 01/19] intel_iommu: Rename vtd_ce_get_rid2pasid_entry to vtd_ce_get_pasid_entry Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 02/19] intel_iommu: Delete RPS capability related supporting code Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 03/19] intel_iommu: Update terminology to match VTD spec Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 04/19] hw/pci: Export pci_device_get_iommu_bus_devfn() and return bool Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 05/19] hw/pci: Introduce pci_device_get_viommu_flags() Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 06/19] intel_iommu: Implement get_viommu_flags() callback Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 07/19] intel_iommu: Introduce a new structure VTDHostIOMMUDevice Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 08/19] vfio/iommufd: Force creating nesting parent HWPT Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 09/19] intel_iommu_accel: Check for compatibility with IOMMUFD backed device when x-flts=on Zhenzhong Duan
2026-01-13  3:12   ` Yi Liu
2026-01-06  6:12 ` [PATCH v10 10/19] intel_iommu_accel: Fail passthrough device under PCI bridge if x-flts=on Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 11/19] intel_iommu_accel: Stick to system MR for IOMMUFD backed host device when x-flts=on Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 12/19] intel_iommu: Add some macros and inline functions Zhenzhong Duan
2026-01-06  7:38   ` Eric Auger
2026-01-06  6:12 ` [PATCH v10 13/19] intel_iommu_accel: Bind/unbind guest page table to host Zhenzhong Duan
2026-01-06  7:46   ` Eric Auger [this message]
2026-01-06  6:12 ` [PATCH v10 14/19] intel_iommu_accel: Propagate PASID-based iotlb invalidation " Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 15/19] intel_iommu: Replay all pasid bindings when either SRTP or TE bit is changed Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 16/19] intel_iommu: Replay pasid bindings after context cache invalidation Zhenzhong Duan
2026-01-06  6:12 ` [PATCH v10 17/19] intel_iommu: Add migration support with x-flts=on Zhenzhong Duan
2026-01-06  6:13 ` [PATCH v10 18/19] intel_iommu: Enable host device when x-flts=on in scalable mode Zhenzhong Duan
2026-01-06  6:13 ` [PATCH v10 19/19] docs/devel: Add IOMMUFD nesting documentation Zhenzhong Duan
2026-01-09 14:34 ` [PATCH v10 00/19] intel_iommu: Enable first stage translation for passthrough device Cédric Le Goater
2026-01-09 14:50   ` Cédric Le Goater
2026-01-12 22:04   ` Michael S. Tsirkin
2026-01-13  3:12   ` Yi Liu
2026-01-13  7:35     ` Cédric Le Goater

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47e8dccf-ae2c-40ba-b6ce-96fde1da7610@redhat.com \
    --to=eric.auger@redhat.com \
    --cc=alex@shazbot.org \
    --cc=chao.p.peng@intel.com \
    --cc=clement.mathieu--drif@eviden.com \
    --cc=clg@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=mst@redhat.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=skolothumtho@nvidia.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.