From: Nicolin Chen <nicolinc@nvidia.com>
To: <joro@8bytes.org>, <will@kernel.org>, <marcan@marcan.st>,
<sven@svenpeter.dev>, <robin.murphy@arm.com>,
<robdclark@gmail.com>, <baolu.lu@linux.intel.com>,
<matthias.bgg@gmail.com>, <orsonzhai@gmail.com>,
<baolin.wang7@gmail.com>, <zhang.lyra@gmail.com>,
<jean-philippe@linaro.org>, <alex.williamson@redhat.com>,
<jgg@nvidia.com>, <kevin.tian@intel.com>
Cc: mjrosato@linux.ibm.com, jordan@cosmicpenguin.net,
thierry.reding@gmail.com, alyssa@rosenzweig.io,
linux-s390@vger.kernel.org, kvm@vger.kernel.org,
jonathanh@nvidia.com, yong.wu@mediatek.com,
yangyingliang@huawei.com, gerald.schaefer@linux.ibm.com,
linux-arm-msm@vger.kernel.org, john.garry@huawei.com,
christophe.jaillet@wanadoo.fr, thunder.leizhen@huawei.com,
linux-tegra@vger.kernel.org, tglx@linutronix.de,
virtualization@lists.linux-foundation.org,
linux-arm-kernel@lists.infradead.org, chenxiang66@hisilicon.com,
cohuck@redhat.com, linux-kernel@vger.kernel.org,
iommu@lists.linux-foundation.org, suravee.suthikulpanit@amd.com,
linux-mediatek@lists.infradead.org, dwmw2@infradead.org
Subject: [PATCH v3 5/5] vfio/iommu_type1: Simplify group attachment
Date: Thu, 23 Jun 2022 13:00:29 -0700 [thread overview]
Message-ID: <20220623200029.26007-6-nicolinc@nvidia.com> (raw)
In-Reply-To: <20220623200029.26007-1-nicolinc@nvidia.com>
Un-inline the domain specific logic from the attach/detach_group ops into
two paired functions vfio_iommu_alloc_attach_domain() and
vfio_iommu_detach_destroy_domain() that strictly deal with creating and
destroying struct vfio_domains.
Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group()
and avoid the extra domain allocations and attach/detach sequences of the
old code. This allows properly detecting an actual attach error, like
-ENOMEM, vs treating all attach errors as an incompatible domain.
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/vfio/vfio_iommu_type1.c | 321 +++++++++++++++++---------------
1 file changed, 174 insertions(+), 147 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b9ccb3cfac5d..3ffa4e2d9d18 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2153,15 +2153,174 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
}
+static struct vfio_domain *
+vfio_iommu_alloc_attach_domain(struct bus_type *bus, struct vfio_iommu *iommu,
+ struct vfio_iommu_group *group,
+ struct list_head *group_resv_regions)
+{
+ struct iommu_domain *new_domain;
+ struct vfio_domain *domain;
+ phys_addr_t resv_msi_base;
+ int ret = 0;
+
+ /* Try to match an existing compatible domain */
+ list_for_each_entry (domain, &iommu->domain_list, next) {
+ ret = iommu_attach_group(domain->domain, group->iommu_group);
+ /* -EMEDIUMTYPE means an incompatible domain, so try next one */
+ if (ret == -EMEDIUMTYPE)
+ continue;
+ if (ret)
+ return ERR_PTR(ret);
+ goto done;
+ }
+
+ new_domain = iommu_domain_alloc(bus);
+ if (!new_domain)
+ return ERR_PTR(-EIO);
+
+ if (iommu->nesting) {
+ ret = iommu_enable_nesting(new_domain);
+ if (ret)
+ goto out_free_iommu_domain;
+ }
+
+ ret = iommu_attach_group(new_domain, group->iommu_group);
+ if (ret)
+ goto out_free_iommu_domain;
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain) {
+ ret = -ENOMEM;
+ goto out_detach;
+ }
+
+ domain->domain = new_domain;
+ vfio_test_domain_fgsp(domain);
+
+ /*
+ * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
+ * no-snoop set) then VFIO always turns this feature on because on Intel
+ * platforms it optimizes KVM to disable wbinvd emulation.
+ */
+ if (new_domain->ops->enforce_cache_coherency)
+ domain->enforce_cache_coherency =
+ new_domain->ops->enforce_cache_coherency(new_domain);
+
+ /* replay mappings on new domains */
+ ret = vfio_iommu_replay(iommu, domain);
+ if (ret)
+ goto out_free_domain;
+
+ if (vfio_iommu_has_sw_msi(group_resv_regions, &resv_msi_base)) {
+ ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
+ if (ret && ret != -ENODEV)
+ goto out_free_domain;
+ }
+
+ INIT_LIST_HEAD(&domain->group_list);
+ list_add(&domain->next, &iommu->domain_list);
+ vfio_update_pgsize_bitmap(iommu);
+
+done:
+ list_add(&group->next, &domain->group_list);
+
+ /*
+ * An iommu backed group can dirty memory directly and therefore
+ * demotes the iommu scope until it declares itself dirty tracking
+ * capable via the page pinning interface.
+ */
+ iommu->num_non_pinned_groups++;
+
+ return domain;
+
+out_free_domain:
+ kfree(domain);
+out_detach:
+ iommu_detach_group(new_domain, group->iommu_group);
+out_free_iommu_domain:
+ iommu_domain_free(new_domain);
+ return ERR_PTR(ret);
+}
+
+static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
+{
+ struct rb_node *node;
+
+ while ((node = rb_first(&iommu->dma_list)))
+ vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
+}
+
+static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
+{
+ struct rb_node *n, *p;
+
+ n = rb_first(&iommu->dma_list);
+ for (; n; n = rb_next(n)) {
+ struct vfio_dma *dma;
+ long locked = 0, unlocked = 0;
+
+ dma = rb_entry(n, struct vfio_dma, node);
+ unlocked += vfio_unmap_unpin(iommu, dma, false);
+ p = rb_first(&dma->pfn_list);
+ for (; p; p = rb_next(p)) {
+ struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
+ node);
+
+ if (!is_invalid_reserved_pfn(vpfn->pfn))
+ locked++;
+ }
+ vfio_lock_acct(dma, locked - unlocked, true);
+ }
+}
+
+static void vfio_iommu_detach_destroy_domain(struct vfio_domain *domain,
+ struct vfio_iommu *iommu,
+ struct vfio_iommu_group *group)
+{
+ iommu_detach_group(domain->domain, group->iommu_group);
+ list_del(&group->next);
+ if (!list_empty(&domain->group_list))
+ goto out_dirty;
+
+ /*
+ * Group ownership provides privilege, if the group list is empty, the
+ * domain goes away. If it's the last domain with iommu and external
+ * domain doesn't exist, then all the mappings go away too. If it's the
+ * last domain with iommu and external domain exist, update accounting
+ */
+ if (list_is_singular(&iommu->domain_list)) {
+ if (list_empty(&iommu->emulated_iommu_groups)) {
+ WARN_ON(iommu->notifier.head);
+ vfio_iommu_unmap_unpin_all(iommu);
+ } else {
+ vfio_iommu_unmap_unpin_reaccount(iommu);
+ }
+ }
+ iommu_domain_free(domain->domain);
+ list_del(&domain->next);
+ kfree(domain);
+ vfio_update_pgsize_bitmap(iommu);
+
+out_dirty:
+ /*
+ * Removal of a group without dirty tracking may allow the iommu scope
+ * to be promoted.
+ */
+ if (!group->pinned_page_dirty_scope) {
+ iommu->num_non_pinned_groups--;
+ if (iommu->dirty_page_tracking)
+ vfio_iommu_populate_bitmap_full(iommu);
+ }
+}
+
static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group, enum vfio_group_type type)
{
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
- struct vfio_domain *domain, *d;
+ struct vfio_domain *domain;
struct bus_type *bus = NULL;
- bool resv_msi, msi_remap;
- phys_addr_t resv_msi_base = 0;
+ bool msi_remap;
struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy);
LIST_HEAD(group_resv_regions);
@@ -2197,26 +2356,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (ret)
goto out_free_group;
- ret = -ENOMEM;
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
+ ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
+ if (ret)
goto out_free_group;
- ret = -EIO;
- domain->domain = iommu_domain_alloc(bus);
- if (!domain->domain)
- goto out_free_domain;
-
- if (iommu->nesting) {
- ret = iommu_enable_nesting(domain->domain);
- if (ret)
- goto out_domain;
+ domain = vfio_iommu_alloc_attach_domain(bus, iommu, group,
+ &group_resv_regions);
+ if (IS_ERR(domain)) {
+ ret = PTR_ERR(domain);
+ goto out_free_group;
}
- ret = iommu_attach_group(domain->domain, group->iommu_group);
- if (ret)
- goto out_domain;
-
/* Get aperture info */
geo = &domain->domain->geometry;
if (vfio_iommu_aper_conflict(iommu, geo->aperture_start,
@@ -2225,10 +2375,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_detach;
}
- ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
- if (ret)
- goto out_detach;
-
if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) {
ret = -EINVAL;
goto out_detach;
@@ -2252,11 +2398,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
if (ret)
goto out_detach;
- resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);
-
- INIT_LIST_HEAD(&domain->group_list);
- list_add(&group->next, &domain->group_list);
-
msi_remap = irq_domain_check_msi_remap() ||
iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
@@ -2267,107 +2408,25 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_detach;
}
- /*
- * If the IOMMU can block non-coherent operations (ie PCIe TLPs with
- * no-snoop set) then VFIO always turns this feature on because on Intel
- * platforms it optimizes KVM to disable wbinvd emulation.
- */
- if (domain->domain->ops->enforce_cache_coherency)
- domain->enforce_cache_coherency =
- domain->domain->ops->enforce_cache_coherency(
- domain->domain);
-
- /* Try to match an existing compatible domain */
- list_for_each_entry(d, &iommu->domain_list, next) {
- iommu_detach_group(domain->domain, group->iommu_group);
- if (!iommu_attach_group(d->domain, group->iommu_group)) {
- list_add(&group->next, &d->group_list);
- iommu_domain_free(domain->domain);
- kfree(domain);
- goto done;
- }
-
- ret = iommu_attach_group(domain->domain, group->iommu_group);
- if (ret)
- goto out_domain;
- }
-
- vfio_test_domain_fgsp(domain);
-
- /* replay mappings on new domains */
- ret = vfio_iommu_replay(iommu, domain);
- if (ret)
- goto out_detach;
-
- if (resv_msi) {
- ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
- if (ret && ret != -ENODEV)
- goto out_detach;
- }
-
- list_add(&domain->next, &iommu->domain_list);
- vfio_update_pgsize_bitmap(iommu);
-done:
/* Delete the old one and insert new iova list */
vfio_iommu_iova_insert_copy(iommu, &iova_copy);
- /*
- * An iommu backed group can dirty memory directly and therefore
- * demotes the iommu scope until it declares itself dirty tracking
- * capable via the page pinning interface.
- */
- iommu->num_non_pinned_groups++;
mutex_unlock(&iommu->lock);
vfio_iommu_resv_free(&group_resv_regions);
return 0;
out_detach:
- iommu_detach_group(domain->domain, group->iommu_group);
-out_domain:
- iommu_domain_free(domain->domain);
- vfio_iommu_iova_free(&iova_copy);
- vfio_iommu_resv_free(&group_resv_regions);
-out_free_domain:
- kfree(domain);
+ vfio_iommu_detach_destroy_domain(domain, iommu, group);
out_free_group:
kfree(group);
out_unlock:
mutex_unlock(&iommu->lock);
+ vfio_iommu_iova_free(&iova_copy);
+ vfio_iommu_resv_free(&group_resv_regions);
return ret;
}
-static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu)
-{
- struct rb_node *node;
-
- while ((node = rb_first(&iommu->dma_list)))
- vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node));
-}
-
-static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
-{
- struct rb_node *n, *p;
-
- n = rb_first(&iommu->dma_list);
- for (; n; n = rb_next(n)) {
- struct vfio_dma *dma;
- long locked = 0, unlocked = 0;
-
- dma = rb_entry(n, struct vfio_dma, node);
- unlocked += vfio_unmap_unpin(iommu, dma, false);
- p = rb_first(&dma->pfn_list);
- for (; p; p = rb_next(p)) {
- struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,
- node);
-
- if (!is_invalid_reserved_pfn(vpfn->pfn))
- locked++;
- }
- vfio_lock_acct(dma, locked - unlocked, true);
- }
-}
-
/*
* Called when a domain is removed in detach. It is possible that
* the removed domain decided the iova aperture window. Modify the
@@ -2482,44 +2541,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
group = find_iommu_group(domain, iommu_group);
if (!group)
continue;
-
- iommu_detach_group(domain->domain, group->iommu_group);
- list_del(&group->next);
- /*
- * Group ownership provides privilege, if the group list is
- * empty, the domain goes away. If it's the last domain with
- * iommu and external domain doesn't exist, then all the
- * mappings go away too. If it's the last domain with iommu and
- * external domain exist, update accounting
- */
- if (list_empty(&domain->group_list)) {
- if (list_is_singular(&iommu->domain_list)) {
- if (list_empty(&iommu->emulated_iommu_groups)) {
- WARN_ON(iommu->notifier.head);
- vfio_iommu_unmap_unpin_all(iommu);
- } else {
- vfio_iommu_unmap_unpin_reaccount(iommu);
- }
- }
- iommu_domain_free(domain->domain);
- list_del(&domain->next);
- kfree(domain);
- vfio_iommu_aper_expand(iommu, &iova_copy);
- vfio_update_pgsize_bitmap(iommu);
- }
- /*
- * Removal of a group without dirty tracking may allow
- * the iommu scope to be promoted.
- */
- if (!group->pinned_page_dirty_scope) {
- iommu->num_non_pinned_groups--;
- if (iommu->dirty_page_tracking)
- vfio_iommu_populate_bitmap_full(iommu);
- }
+ vfio_iommu_detach_destroy_domain(domain, iommu, group);
kfree(group);
break;
}
+ vfio_iommu_aper_expand(iommu, &iova_copy);
if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
vfio_iommu_iova_insert_copy(iommu, &iova_copy);
else
--
2.17.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2022-06-23 20:04 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-23 20:00 [PATCH v3 0/5] Simplify vfio_iommu_type1 attach/detach routine Nicolin Chen
2022-06-23 20:00 ` [PATCH v3 1/5] iommu: Return -EMEDIUMTYPE for incompatible domain and device/group Nicolin Chen
2022-06-24 1:35 ` Baolu Lu
2022-06-24 2:44 ` Nicolin Chen
2022-06-24 5:38 ` Yong Wu
2022-06-24 5:41 ` Nicolin Chen
2022-06-24 6:16 ` Tian, Kevin
2022-06-24 10:35 ` Yong Wu
2022-06-24 18:19 ` Jason Gunthorpe
2022-06-29 19:47 ` Nicolin Chen
2022-06-30 8:21 ` Robin Murphy
2022-06-30 9:57 ` Tian, Kevin
2022-06-30 15:47 ` Nicolin Chen
2022-06-30 9:33 ` Yong Wu
2022-06-30 15:45 ` Nicolin Chen
2022-06-23 20:00 ` [PATCH v3 2/5] vfio/iommu_type1: Prefer to reuse domains vs match enforced cache coherency Nicolin Chen
2022-06-24 1:50 ` Baolu Lu
2022-06-23 20:00 ` [PATCH v3 3/5] vfio/iommu_type1: Remove the domain->ops comparison Nicolin Chen
2022-06-24 18:28 ` Jason Gunthorpe
2022-06-24 18:46 ` Jason Gunthorpe
2022-06-23 20:00 ` [PATCH v3 4/5] vfio/iommu_type1: Clean up update_dirty_scope in detach_group() Nicolin Chen
2022-06-23 20:00 ` Nicolin Chen [this message]
2022-06-27 6:57 ` [PATCH v3 5/5] vfio/iommu_type1: Simplify group attachment Tian, Kevin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220623200029.26007-6-nicolinc@nvidia.com \
--to=nicolinc@nvidia.com \
--cc=alex.williamson@redhat.com \
--cc=alyssa@rosenzweig.io \
--cc=baolin.wang7@gmail.com \
--cc=baolu.lu@linux.intel.com \
--cc=chenxiang66@hisilicon.com \
--cc=christophe.jaillet@wanadoo.fr \
--cc=cohuck@redhat.com \
--cc=dwmw2@infradead.org \
--cc=gerald.schaefer@linux.ibm.com \
--cc=iommu@lists.linux-foundation.org \
--cc=jean-philippe@linaro.org \
--cc=jgg@nvidia.com \
--cc=john.garry@huawei.com \
--cc=jonathanh@nvidia.com \
--cc=jordan@cosmicpenguin.net \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mediatek@lists.infradead.org \
--cc=linux-s390@vger.kernel.org \
--cc=linux-tegra@vger.kernel.org \
--cc=marcan@marcan.st \
--cc=matthias.bgg@gmail.com \
--cc=mjrosato@linux.ibm.com \
--cc=orsonzhai@gmail.com \
--cc=robdclark@gmail.com \
--cc=robin.murphy@arm.com \
--cc=suravee.suthikulpanit@amd.com \
--cc=sven@svenpeter.dev \
--cc=tglx@linutronix.de \
--cc=thierry.reding@gmail.com \
--cc=thunder.leizhen@huawei.com \
--cc=virtualization@lists.linux-foundation.org \
--cc=will@kernel.org \
--cc=yangyingliang@huawei.com \
--cc=yong.wu@mediatek.com \
--cc=zhang.lyra@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).