[PATCH 5/5] vfio/iommu_type1: Simplify group attachment

public inbox for linux-tegra@vger.kernel.org
 help / color / mirror / Atom feed

From: Nicolin Chen <nicolinc@nvidia.com>
To: <jgg@nvidia.com>, <joro@8bytes.org>, <will@kernel.org>,
	<marcan@marcan.st>, <sven@svenpeter.dev>, <robin.murphy@arm.com>,
	<robdclark@gmail.com>, <m.szyprowski@samsung.com>,
	<krzysztof.kozlowski@linaro.org>, <baolu.lu@linux.intel.com>,
	<agross@kernel.org>, <bjorn.andersson@linaro.org>,
	<matthias.bgg@gmail.com>, <heiko@sntech.de>,
	<orsonzhai@gmail.com>, <baolin.wang7@gmail.com>,
	<zhang.lyra@gmail.com>, <wens@csie.org>,
	<jernej.skrabec@gmail.com>, <samuel@sholland.org>,
	<jean-philippe@linaro.org>, <alex.williamson@redhat.com>
Cc: <suravee.suthikulpanit@amd.com>, <alyssa@rosenzweig.io>,
	<alim.akhtar@samsung.com>, <dwmw2@infradead.org>,
	<yong.wu@mediatek.com>, <mjrosato@linux.ibm.com>,
	<gerald.schaefer@linux.ibm.com>, <thierry.reding@gmail.com>,
	<vdumpa@nvidia.com>, <jonathanh@nvidia.com>, <cohuck@redhat.com>,
	<iommu@lists.linux-foundation.org>,
	<linux-kernel@vger.kernel.org>,
	<linux-arm-kernel@lists.infradead.org>,
	<linux-arm-msm@vger.kernel.org>,
	<linux-samsung-soc@vger.kernel.org>,
	<linux-mediatek@lists.infradead.org>,
	<linux-rockchip@lists.infradead.org>,
	<linux-s390@vger.kernel.org>, <linux-sunxi@lists.linux.dev>,
	<linux-tegra@vger.kernel.org>,
	<virtualization@lists.linux-foundation.org>,
	<kvm@vger.kernel.org>
Subject: [PATCH 5/5] vfio/iommu_type1: Simplify group attachment
Date: Sun, 5 Jun 2022 23:19:27 -0700	[thread overview]
Message-ID: <20220606061927.26049-6-nicolinc@nvidia.com> (raw)
In-Reply-To: <20220606061927.26049-1-nicolinc@nvidia.com>

Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.

Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.

Remove the duplicated domain->ops comparison that is taken care of in the
IOMMU core.

Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/vfio/vfio_iommu_type1.c | 306 +++++++++++++++++---------------
 1 file changed, 161 insertions(+), 145 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b45b1cc118ef..c6f937e1d71f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -86,6 +86,7 @@ struct vfio_domain {
 	struct list_head	group_list;
 	bool			fgsp : 1;	/* Fine-grained super pages */
 	bool			enforce_cache_coherency : 1;
+	bool			msi_cookie : 1;
 };
 
 struct vfio_dma {
@@ -2153,12 +2154,161 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
 	list_splice_tail(iova_copy, iova);
 }
 
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+			       struct vfio_iommu_group *group)
+{
+	struct iommu_domain *new_domain;
+	struct vfio_domain *domain;
+	int ret = 0;
+
+	/* Try to match an existing compatible domain */
+	list_for_each_entry (domain, &iommu->domain_list, next) {
+		ret = iommu_attach_group(domain->domain, group->iommu_group);
+		if (ret == -EMEDIUMTYPE)
+			continue;
+		if (ret)
+			return ERR_PTR(ret);
+		list_add(&group->next, &domain->group_list);
+		return domain;
+	}
+
+	new_domain = iommu_domain_alloc(bus);
+	if (!new_domain)
+		return ERR_PTR(-EIO);
+
+	if (iommu->nesting) {
+		ret = iommu_enable_nesting(new_domain);
+		if (ret)
+			goto out_free_iommu_domain;
+	}
+
+	ret = iommu_attach_group(new_domain, group->iommu_group);
+	if (ret)
+		goto out_free_iommu_domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain) {
+		ret = -ENOMEM;
+		goto out_detach;
+	}
+
+	domain->domain = new_domain;
+	vfio_test_domain_fgsp(domain);
+
+	/*
+	 * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+	 * no-snoop set) then VFIO always turns this feature on because on Intel
+	 * platforms it optimizes KVM to disable wbinvd emulation.
+	 */
+	if (new_domain->ops->enforce_cache_coherency)
+		domain->enforce_cache_coherency =
+			new_domain->ops->enforce_cache_coherency(new_domain);
+
+	/* replay mappings on new domains */
+	ret = vfio_iommu_replay(iommu, domain);
+	if (ret)
+		goto out_free_domain;
+
+	/*
+	 * An iommu backed group can dirty memory directly and therefore
+	 * demotes the iommu scope until it declares itself dirty tracking
+	 * capable via the page pinning interface.
+	 */
+	iommu->num_non_pinned_groups++;
+
+	INIT_LIST_HEAD(&domain->group_list);
+	list_add(&group->next, &domain->group_list);
+	list_add(&domain->next, &iommu->domain_list);
+	vfio_update_pgsize_bitmap(iommu);
+	return domain;
+
+out_free_domain:
+	kfree(domain);
+out_detach:
+	iommu_detach_group(domain->domain, group->iommu_group);
+out_free_iommu_domain:
+	iommu_domain_free(new_domain);
+	return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+	struct rb_node *node;
+
+	while ((node = rb_first(&iommu->dma_list)))
+		vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+	struct rb_node *n, *p;
+
+	n = rb_first(&iommu->dma_list);
+	for (; n; n = rb_next(n)) {
+		struct vfio_dma *dma;
+		long locked = 0, unlocked = 0;
+
+		dma = rb_entry(n, struct vfio_dma, node);
+		unlocked += vfio_unmap_unpin(iommu, dma, false);
+		p = rb_first(&dma->pfn_list);
+		for (; p; p = rb_next(p)) {
+			struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+							 node);
+
+			if (!is_invalid_reserved_pfn(vpfn->pfn))
+				locked++;
+		}
+		vfio_lock_acct(dma, locked - unlocked, true);
+	}
+}
+
+static void vfio_iommu_detach_destroy_domain(struct vfio_domain *domain,
+					     struct vfio_iommu *iommu,
+					     struct vfio_iommu_group *group)
+{
+	iommu_detach_group(domain->domain, group->iommu_group);
+	list_del(&group->next);
+	if (!list_empty(&domain->group_list))
+		return;
+
+	/*
+	 * Group ownership provides privilege, if the group list is empty, the
+	 * domain goes away. If it's the last domain with iommu and external
+	 * domain doesn't exist, then all the mappings go away too. If it's the
+	 * last domain with iommu and external domain exist, update accounting
+	 */
+	if (list_is_singular(&iommu->domain_list)) {
+		if (list_empty(&iommu->emulated_iommu_groups)) {
+			WARN_ON(iommu->notifier.head);
+			vfio_iommu_unmap_unpin_all(iommu);
+		} else {
+			vfio_iommu_unmap_unpin_reaccount(iommu);
+		}
+	}
+	iommu_domain_free(domain->domain);
+	list_del(&domain->next);
+	kfree(domain);
+
+	/*
+	 * Removal of a group without dirty tracking may allow the iommu scope
+	 * to be promoted.
+	 */
+	if (!group->pinned_page_dirty_scope) {
+		iommu->num_non_pinned_groups--;
+		if (iommu->dirty_page_tracking)
+			vfio_iommu_populate_bitmap_full(iommu);
+	}
+
+	vfio_update_pgsize_bitmap(iommu);
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 		struct iommu_group *iommu_group, enum vfio_group_type type)
 {
 	struct vfio_iommu *iommu = iommu_data;
 	struct vfio_iommu_group *group;
-	struct vfio_domain *domain, *d;
+	struct vfio_domain *domain;
 	struct bus_type *bus = NULL;
 	bool resv_msi, msi_remap;
 	phys_addr_t resv_msi_base = 0;
@@ -2197,26 +2347,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	if (ret)
 		goto out_free_group;
 
-	ret = -ENOMEM;
-	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
-	if (!domain)
+	domain = vfio_iommu_alloc_attach_domain(bus, iommu, group);
+	if (IS_ERR(domain)) {
+		ret = PTR_ERR(domain);
 		goto out_free_group;
-
-	ret = -EIO;
-	domain->domain = iommu_domain_alloc(bus);
-	if (!domain->domain)
-		goto out_free_domain;
-
-	if (iommu->nesting) {
-		ret = iommu_enable_nesting(domain->domain);
-		if (ret)
-			goto out_domain;
 	}
 
-	ret = iommu_attach_group(domain->domain, group->iommu_group);
-	if (ret)
-		goto out_domain;
-
 	/* Get aperture info */
 	geo = &domain->domain->geometry;
 	if (vfio_iommu_aper_conflict(iommu, geo->aperture_start,
@@ -2254,9 +2390,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 
 	resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
 
-	INIT_LIST_HEAD(&domain->group_list);
-	list_add(&group->next, &domain->group_list);
-
 	msi_remap = irq_domain_check_msi_remap() ||
 		    iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
 
@@ -2267,117 +2400,32 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		goto out_detach;
 	}
 
-	/*
-	 * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
-	 * no-snoop set) then VFIO always turns this feature on because on Intel
-	 * platforms it optimizes KVM to disable wbinvd emulation.
-	 */
-	if (domain->domain->ops->enforce_cache_coherency)
-		domain->enforce_cache_coherency =
-			domain->domain->ops->enforce_cache_coherency(
-				domain->domain);
-
-	/*
-	 * Try to match an existing compatible domain.  We don't want to
-	 * preclude an IOMMU driver supporting multiple bus_types and being
-	 * able to include different bus_types in the same IOMMU domain, so
-	 * we test whether the domains use the same iommu_ops rather than
-	 * testing if they're on the same bus_type.
-	 */
-	list_for_each_entry(d, &iommu->domain_list, next) {
-		if (d->domain->ops == domain->domain->ops) {
-			iommu_detach_group(domain->domain, group->iommu_group);
-			if (!iommu_attach_group(d->domain,
-						group->iommu_group)) {
-				list_add(&group->next, &d->group_list);
-				iommu_domain_free(domain->domain);
-				kfree(domain);
-				goto done;
-			}
-
-			ret = iommu_attach_group(domain->domain,
-						 group->iommu_group);
-			if (ret)
-				goto out_domain;
-		}
-	}
-
-	vfio_test_domain_fgsp(domain);
-
-	/* replay mappings on new domains */
-	ret = vfio_iommu_replay(iommu, domain);
-	if (ret)
-		goto out_detach;
-
-	if (resv_msi) {
+	if (resv_msi && !domain->msi_cookie) {
 		ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
 		if (ret && ret != -ENODEV)
 			goto out_detach;
+		domain->msi_cookie = true;
 	}
 
-	list_add(&domain->next, &iommu->domain_list);
-	vfio_update_pgsize_bitmap(iommu);
-done:
 	/* Delete the old one and insert new iova list */
 	vfio_iommu_iova_insert_copy(iommu, &iova_copy);
 
-	/*
-	 * An iommu backed group can dirty memory directly and therefore
-	 * demotes the iommu scope until it declares itself dirty tracking
-	 * capable via the page pinning interface.
-	 */
-	iommu->num_non_pinned_groups++;
 	mutex_unlock(&iommu->lock);
 	vfio_iommu_resv_free(&group_resv_regions);
 
 	return 0;
 
 out_detach:
-	iommu_detach_group(domain->domain, group->iommu_group);
-out_domain:
-	iommu_domain_free(domain->domain);
-	vfio_iommu_iova_free(&iova_copy);
-	vfio_iommu_resv_free(&group_resv_regions);
-out_free_domain:
-	kfree(domain);
+	vfio_iommu_detach_destroy_domain(domain, iommu, group);
 out_free_group:
 	kfree(group);
 out_unlock:
 	mutex_unlock(&iommu->lock);
+	vfio_iommu_iova_free(&iova_copy);
+	vfio_iommu_resv_free(&group_resv_regions);
 	return ret;
 }
 
-static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
-{
-	struct rb_node *node;
-
-	while ((node = rb_first(&iommu->dma_list)))
-		vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
-}
-
-static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
-{
-	struct rb_node *n, *p;
-
-	n = rb_first(&iommu->dma_list);
-	for (; n; n = rb_next(n)) {
-		struct vfio_dma *dma;
-		long locked = 0, unlocked = 0;
-
-		dma = rb_entry(n, struct vfio_dma, node);
-		unlocked += vfio_unmap_unpin(iommu, dma, false);
-		p = rb_first(&dma->pfn_list);
-		for (; p; p = rb_next(p)) {
-			struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
-							 node);
-
-			if (!is_invalid_reserved_pfn(vpfn->pfn))
-				locked++;
-		}
-		vfio_lock_acct(dma, locked - unlocked, true);
-	}
-}
-
 /*
  * Called when a domain is removed in detach. It is possible that
  * the removed domain decided the iova aperture window. Modify the
@@ -2491,44 +2539,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 		group = find_iommu_group(domain, iommu_group);
 		if (!group)
 			continue;
-
-		iommu_detach_group(domain->domain, group->iommu_group);
-		list_del(&group->next);
-		/*
-		 * Group ownership provides privilege, if the group list is
-		 * empty, the domain goes away. If it's the last domain with
-		 * iommu and external domain doesn't exist, then all the
-		 * mappings go away too. If it's the last domain with iommu and
-		 * external domain exist, update accounting
-		 */
-		if (list_empty(&domain->group_list)) {
-			if (list_is_singular(&iommu->domain_list)) {
-				if (list_empty(&iommu->emulated_iommu_groups)) {
-					WARN_ON(iommu->notifier.head);
-					vfio_iommu_unmap_unpin_all(iommu);
-				} else {
-					vfio_iommu_unmap_unpin_reaccount(iommu);
-				}
-			}
-			iommu_domain_free(domain->domain);
-			list_del(&domain->next);
-			kfree(domain);
-			vfio_iommu_aper_expand(iommu, &iova_copy);
-			vfio_update_pgsize_bitmap(iommu);
-			/*
-			 * Removal of a group without dirty tracking may allow
-			 * the iommu scope to be promoted.
-			 */
-			if (!group->pinned_page_dirty_scope) {
-				iommu->num_non_pinned_groups--;
-				if (iommu->dirty_page_tracking)
-					vfio_iommu_populate_bitmap_full(iommu);
-			}
-		}
+		vfio_iommu_detach_destroy_domain(domain, iommu, group);
 		kfree(group);
 		break;
 	}
 
+	vfio_iommu_aper_expand(iommu, &iova_copy);
 	if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
 		vfio_iommu_iova_insert_copy(iommu, &iova_copy);
 	else
-- 
2.17.1

next prev parent reply	other threads:[~2022-06-06  6:20 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-06  6:19 [PATCH 0/5] Simplify vfio_iommu_type1 attach/detach routine Nicolin Chen
2022-06-06  6:19 ` [PATCH 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group Nicolin Chen
2022-06-07  3:23   ` Baolu Lu
2022-06-07  4:03     ` Nicolin Chen
2022-06-08  7:49   ` Tian, Kevin
2022-06-08 17:38     ` Nicolin Chen
2022-06-06  6:19 ` [PATCH 2/5] iommu: Ensure device has the same iommu_ops as the domain Nicolin Chen
2022-06-06 14:33   ` Robin Murphy
2022-06-06 16:51     ` Nicolin Chen
2022-06-06 17:50       ` Robin Murphy
2022-06-06 18:28         ` Nicolin Chen
2022-06-06 18:52           ` Jason Gunthorpe
2022-06-06  6:19 ` [PATCH 3/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency Nicolin Chen
2022-06-08  8:28   ` Tian, Kevin
2022-06-08 11:17     ` Jason Gunthorpe
2022-06-08 23:48       ` Tian, Kevin
2022-06-14 20:45         ` Nicolin Chen
2022-06-15  7:35           ` Tian, Kevin
2022-06-15 23:12             ` Nicolin Chen
2022-06-06  6:19 ` [PATCH 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group() Nicolin Chen
2022-06-08  8:35   ` Tian, Kevin
2022-06-08 17:46     ` Nicolin Chen
2022-06-06  6:19 ` Nicolin Chen [this message]
2022-06-07  7:44 ` [PATCH 0/5] Simplify vfio_iommu_type1 attach/detach routine Baolu Lu
2022-06-07 11:58   ` Jason Gunthorpe
2022-06-07 12:42     ` Baolu Lu

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b45b1cc118e dfblob:c6f937e1d71 )
 OR (
bs:"[PATCH 5/5] vfio/iommu_type1: Simplify group attachment" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220606061927.26049-6-nicolinc@nvidia.com \
    --to=nicolinc@nvidia.com \
    --cc=agross@kernel.org \
    --cc=alex.williamson@redhat.com \
    --cc=alim.akhtar@samsung.com \
    --cc=alyssa@rosenzweig.io \
    --cc=baolin.wang7@gmail.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=bjorn.andersson@linaro.org \
    --cc=cohuck@redhat.com \
    --cc=dwmw2@infradead.org \
    --cc=gerald.schaefer@linux.ibm.com \
    --cc=heiko@sntech.de \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jean-philippe@linaro.org \
    --cc=jernej.skrabec@gmail.com \
    --cc=jgg@nvidia.com \
    --cc=jonathanh@nvidia.com \
    --cc=joro@8bytes.org \
    --cc=krzysztof.kozlowski@linaro.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mediatek@lists.infradead.org \
    --cc=linux-rockchip@lists.infradead.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linux-samsung-soc@vger.kernel.org \
    --cc=linux-sunxi@lists.linux.dev \
    --cc=linux-tegra@vger.kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=marcan@marcan.st \
    --cc=matthias.bgg@gmail.com \
    --cc=mjrosato@linux.ibm.com \
    --cc=orsonzhai@gmail.com \
    --cc=robdclark@gmail.com \
    --cc=robin.murphy@arm.com \
    --cc=samuel@sholland.org \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=sven@svenpeter.dev \
    --cc=thierry.reding@gmail.com \
    --cc=vdumpa@nvidia.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=wens@csie.org \
    --cc=will@kernel.org \
    --cc=yong.wu@mediatek.com \
    --cc=zhang.lyra@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox