Re: [PATCH v1 3/4] iommu/hyperv: Add para-virtualized IOMMU support for Hyper-V guest

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jason Gunthorpe <jgg@ziepe.ca>
To: Yu Zhang <zhangyu1@linux.microsoft.com>
Cc: linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org,
	iommu@lists.linux.dev, linux-pci@vger.kernel.org,
	linux-arch@vger.kernel.org, wei.liu@kernel.org,
	kys@microsoft.com, haiyangz@microsoft.com, decui@microsoft.com,
	longli@microsoft.com, joro@8bytes.org, will@kernel.org,
	robin.murphy@arm.com, bhelgaas@google.com,
	kwilczynski@kernel.org, lpieralisi@kernel.org, mani@kernel.org,
	robh@kernel.org, arnd@arndb.de, mhklinux@outlook.com,
	jacob.pan@linux.microsoft.com, tgopinath@linux.microsoft.com,
	easwar.hariharan@linux.microsoft.com
Subject: Re: [PATCH v1 3/4] iommu/hyperv: Add para-virtualized IOMMU support for Hyper-V guest
Date: Fri, 15 May 2026 19:31:39 -0300	[thread overview]
Message-ID: <20260515223139.GK7702@ziepe.ca> (raw)
In-Reply-To: <20260511162408.1180069-4-zhangyu1@linux.microsoft.com>

On Tue, May 12, 2026 at 12:24:07AM +0800, Yu Zhang wrote:
> +/*
> + * Identity and blocking domains are static singletons: identity is a 1:1
> + * passthrough with no page table, blocking rejects all DMA. Neither holds
> + * per-IOMMU state, so one instance suffices even with multiple vIOMMUs.
> + */
> +static struct hv_iommu_domain hv_identity_domain;
> +static struct hv_iommu_domain hv_blocking_domain;
> +static const struct iommu_domain_ops hv_iommu_identity_domain_ops;
> +static const struct iommu_domain_ops hv_iommu_blocking_domain_ops;

Please follow the format of other drivers and statically initialize
these here instead of in C code.

> +static struct iommu_ops hv_iommu_ops;
> +static LIST_HEAD(hv_iommu_pci_bus_list);
> +static DEFINE_SPINLOCK(hv_iommu_pci_bus_lock);
> +
> +#define hv_iommu_present(iommu_cap) (iommu_cap & HV_IOMMU_CAP_PRESENT)
> +#define hv_iommu_s1_domain_supported(iommu_cap) (iommu_cap & HV_IOMMU_CAP_S1)
> +#define hv_iommu_5lvl_supported(iommu_cap) (iommu_cap & HV_IOMMU_CAP_S1_5LVL)
> +#define hv_iommu_ats_supported(iommu_cap) (iommu_cap & HV_IOMMU_CAP_ATS)

prefer to see static inlines

> +static void hv_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
> +{
> +	u64 status;
> +	unsigned long flags;
> +	struct hv_input_detach_device_domain *input;
> +	struct pci_dev *pdev;
> +	struct hv_iommu_domain *hv_domain = to_hv_iommu_domain(domain);
> +	struct hv_iommu_endpoint *vdev = dev_iommu_priv_get(dev);
> +
> +	/* See the attach function, only PCI devices for now */
> +	if (!dev_is_pci(dev) || vdev->hv_domain != hv_domain)
> +		return;
> +
> +	pdev = to_pci_dev(dev);
> +
> +	dev_dbg(dev, "detaching from domain %d\n", hv_domain->device_domain.domain_id.id);
> +
> +	local_irq_save(flags);
> +
> +	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
> +	memset(input, 0, sizeof(*input));
> +	input->partition_id = HV_PARTITION_ID_SELF;
> +	if (hv_iommu_lookup_logical_dev_id(pdev, &input->device_id.as_uint64)) {
> +		local_irq_restore(flags);
> +		dev_warn(&pdev->dev, "no IOMMU registration for vPCI bus on detach\n");
> +		return;
> +	}
> +	status = hv_do_hypercall(HVCALL_DETACH_DEVICE_DOMAIN, input, NULL);

FWIW the hypervisor cannot implement the linux attach semantics if it
has detach/attach. It must support simply 'attach' which atomically
changes the translation. With detach you have a confusing problem if
errors happen in the middle of the sequence the device is left in an
unclear state. You should at least document what state the hypervisor
is supposed to leaave the translation iin during these failures..

> +static void hv_iommu_release_device(struct device *dev)
> +{
> +	struct hv_iommu_endpoint *vdev = dev_iommu_priv_get(dev);
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +
> +	if (pdev->ats_enabled)
> +		pci_disable_ats(pdev);
> +
> +	dev_iommu_priv_set(dev, NULL);
> +	set_dma_ops(dev, NULL);

Does the driver really need to mess with set_dma_ops ? I'd rather not
see that in new iommu drivers if at all possible :|

> +static int __init hv_initialize_static_domains(void)
> +{
> +	int ret;
> +	struct hv_iommu_domain *hv_domain;
> +
> +	/* Default stage-1 identity domain */
> +	hv_domain = &hv_identity_domain;
> +
> +	ret = hv_create_device_domain(hv_domain, HV_DEVICE_DOMAIN_TYPE_S1);
> +	if (ret)
> +		return ret;
> +
> +	ret = hv_configure_device_domain(hv_domain, IOMMU_DOMAIN_IDENTITY);
> +	if (ret)
> +		goto delete_identity_domain;
> +
> +	hv_domain->domain.type = IOMMU_DOMAIN_IDENTITY;
> +	hv_domain->domain.ops = &hv_iommu_identity_domain_ops;
> +	hv_domain->domain.owner = &hv_iommu_ops;
> +	hv_domain->domain.geometry = hv_iommu_device->geometry;
> +	hv_domain->domain.pgsize_bitmap = hv_iommu_device->pgsize_bitmap;

identity doesn't use geometry or pgsize_bitmap

> +	/* Default stage-1 blocked domain */
> +	hv_domain = &hv_blocking_domain;
> +
> +	ret = hv_create_device_domain(hv_domain, HV_DEVICE_DOMAIN_TYPE_S1);
> +	if (ret)
> +		goto delete_identity_domain;
> +
> +	ret = hv_configure_device_domain(hv_domain, IOMMU_DOMAIN_BLOCKED);
> +	if (ret)
> +		goto delete_blocked_domain;
> +
> +	hv_domain->domain.type = IOMMU_DOMAIN_BLOCKED;
> +	hv_domain->domain.ops = &hv_iommu_blocking_domain_ops;
> +	hv_domain->domain.owner = &hv_iommu_ops;
> +	hv_domain->domain.geometry = hv_iommu_device->geometry;
> +	hv_domain->domain.pgsize_bitmap = hv_iommu_device->pgsize_bitmap;

Nor does blocked

> +#define INTERRUPT_RANGE_START	(0xfee00000)
> +#define INTERRUPT_RANGE_END	(0xfeefffff)
> +static void hv_iommu_get_resv_regions(struct device *dev,
> +		struct list_head *head)
> +{
> +	struct iommu_resv_region *region;
> +
> +	region = iommu_alloc_resv_region(INTERRUPT_RANGE_START,
> +				      INTERRUPT_RANGE_END - INTERRUPT_RANGE_START + 1,
> +				      0, IOMMU_RESV_MSI, GFP_KERNEL);

Surprised these constants are not discovered from the hv?

> +static void hv_iommu_flush_iotlb_all(struct iommu_domain *domain)
> +{
> +	hv_flush_device_domain(to_hv_iommu_domain(domain));
> +}
> +
> +static void hv_iommu_iotlb_sync(struct iommu_domain *domain,
> +				struct iommu_iotlb_gather *iotlb_gather)
> +{
> +	hv_flush_device_domain(to_hv_iommu_domain(domain));
> +
> +	iommu_put_pages_list(&iotlb_gather->freelist);
> +}

Full invalidation only huh?

> +static const struct iommu_domain_ops hv_iommu_identity_domain_ops = {
> +	.attach_dev	= hv_iommu_attach_dev,
> +};
> +
> +static const struct iommu_domain_ops hv_iommu_blocking_domain_ops = {
> +	.attach_dev	= hv_iommu_attach_dev,
> +};

Usually I would expect these to have their own attach
functions. blocking in particular must have an attach op that cannot
fail. It is used to recover the device back to a known translation in
case of cascading other errors.

> +static const struct iommu_domain_ops hv_iommu_paging_domain_ops = {
> +	.attach_dev	= hv_iommu_attach_dev,
> +	IOMMU_PT_DOMAIN_OPS(x86_64),
> +	.flush_iotlb_all = hv_iommu_flush_iotlb_all,
> +	.iotlb_sync = hv_iommu_iotlb_sync,
> +	.free = hv_iommu_paging_domain_free,
> +};
> +
> +static struct iommu_domain *hv_iommu_domain_alloc_paging(struct device *dev)
> +{
> +	int ret;
> +	struct hv_iommu_domain *hv_domain;
> +	struct pt_iommu_x86_64_cfg cfg = {};
> +
> +	hv_domain = kzalloc_obj(*hv_domain, GFP_KERNEL);
> +	if (!hv_domain)
> +		return ERR_PTR(-ENOMEM);
> +
> +	ret = hv_create_device_domain(hv_domain, HV_DEVICE_DOMAIN_TYPE_S1);
> +	if (ret) {
> +		kfree(hv_domain);
> +		return ERR_PTR(ret);
> +	}
> +
> +	hv_domain->domain.geometry = hv_iommu_device->geometry;

geoemtry shouldn't be set here, it is overriden by
pt_iommu_x86_64_init() with the exact page table configuration

> +static void __init hv_init_iommu_device(struct hv_iommu_dev *hv_iommu,
> +			struct hv_output_get_iommu_capabilities *hv_iommu_cap)
> +{
> +	ida_init(&hv_iommu->domain_ids);
> +
> +	hv_iommu->cap = hv_iommu_cap->iommu_cap;
> +	hv_iommu->max_iova_width = hv_iommu_cap->max_iova_width;
> +	if (!hv_iommu_5lvl_supported(hv_iommu->cap) &&
> +	    hv_iommu->max_iova_width > 48) {
> +		pr_info("5-level paging not supported, limiting iova width to 48.\n");
> +		hv_iommu->max_iova_width = 48;
> +	}
> +
> +	hv_iommu->geometry = (struct iommu_domain_geometry) {
> +		.aperture_start = 0,
> +		.aperture_end = (((u64)1) << hv_iommu->max_iova_width) - 1,
> +		.force_aperture = true,
> +	};
> +
> +	hv_iommu->first_domain = HV_DEVICE_DOMAIN_ID_DEFAULT + 1;
> +	hv_iommu->last_domain = HV_DEVICE_DOMAIN_ID_NULL - 1;
> +	/* Only x86 page sizes (4K/2M/1G) are supported */
> +	hv_iommu->pgsize_bitmap = hv_iommu_cap->pgsize_bitmap &
> +				  (SZ_4K | SZ_2M | SZ_1G);
> +	if (hv_iommu->pgsize_bitmap != hv_iommu_cap->pgsize_bitmap)
> +		pr_warn("unsupported page sizes masked: 0x%llx -> 0x%llx\n",
> +			hv_iommu_cap->pgsize_bitmap, hv_iommu->pgsize_bitmap);

IKD if you need this logic, the way the page table code is used it
just sort of falls out naturally that other page sizes are ignored.

> +struct hv_iommu_domain {
> +	union {
> +		struct iommu_domain    domain;
> +		struct pt_iommu        pt_iommu;
> +		struct pt_iommu_x86_64 pt_iommu_x86_64;
> +	};

You should retain the build assertions here

Jason

next prev parent reply	other threads:[~2026-05-15 22:31 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-11 16:24 [PATCH v1 0/4] Hyper-V: Add para-virtualized IOMMU support for Linux guests Yu Zhang
2026-05-11 16:24 ` [PATCH v1 1/4] iommu: Move Hyper-V IOMMU driver to its own subdirectory Yu Zhang
2026-05-15 22:19   ` Jason Gunthorpe
2026-05-11 16:24 ` [PATCH v1 2/4] hyperv: Introduce new hypercall interfaces used by Hyper-V guest IOMMU Yu Zhang
2026-05-12 21:24   ` sashiko-bot
2026-05-11 16:24 ` [PATCH v1 3/4] iommu/hyperv: Add para-virtualized IOMMU support for Hyper-V guest Yu Zhang
2026-05-12 22:30   ` sashiko-bot
2026-05-13 18:39   ` Jacob Pan
2026-05-15 12:38     ` Yu Zhang
2026-05-14 18:13   ` Michael Kelley
2026-05-15 13:59     ` Yu Zhang
2026-05-15 14:51       ` Michael Kelley
2026-05-15 16:53         ` Yu Zhang
2026-05-15 17:36           ` Michael Kelley
2026-05-15 22:31   ` Jason Gunthorpe [this message]
2026-05-11 16:24 ` [PATCH v1 4/4] iommu/hyperv: Add page-selective IOTLB flush support Yu Zhang
2026-05-12 23:45   ` sashiko-bot
2026-05-14 18:14   ` Michael Kelley
2026-05-14 21:16     ` Michael Kelley
2026-05-15 16:23     ` Yu Zhang
2026-05-15 18:00       ` Michael Kelley
2026-05-15 22:35   ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260515223139.GK7702@ziepe.ca \
    --to=jgg@ziepe.ca \
    --cc=arnd@arndb.de \
    --cc=bhelgaas@google.com \
    --cc=decui@microsoft.com \
    --cc=easwar.hariharan@linux.microsoft.com \
    --cc=haiyangz@microsoft.com \
    --cc=iommu@lists.linux.dev \
    --cc=jacob.pan@linux.microsoft.com \
    --cc=joro@8bytes.org \
    --cc=kwilczynski@kernel.org \
    --cc=kys@microsoft.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=longli@microsoft.com \
    --cc=lpieralisi@kernel.org \
    --cc=mani@kernel.org \
    --cc=mhklinux@outlook.com \
    --cc=robh@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=tgopinath@linux.microsoft.com \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=zhangyu1@linux.microsoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.