From: Jason Gunthorpe <jgg@nvidia.com>
To: Yi Liu <yi.l.liu@intel.com>
Cc: joro@8bytes.org, kevin.tian@intel.com, baolu.lu@linux.intel.com,
alex.williamson@redhat.com, robin.murphy@arm.com,
eric.auger@redhat.com, nicolinc@nvidia.com, kvm@vger.kernel.org,
chao.p.peng@linux.intel.com, yi.y.sun@linux.intel.com,
iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org, zhenzhong.duan@intel.com,
joao.m.martins@oracle.com
Subject: Re: [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent domain
Date: Thu, 22 Feb 2024 11:16:18 -0400 [thread overview]
Message-ID: <20240222151618.GA13330@nvidia.com> (raw)
In-Reply-To: <d6b9ccff-3aba-4508-b83e-4bc3bc859e96@intel.com>
On Thu, Feb 22, 2024 at 04:34:10PM +0800, Yi Liu wrote:
> > It doesn't mean that the S2 is globally shared across all the nesting
> > translations (like ARM does), and you still have to iterate over every
> > nesting DID.
> >
> > In light of that this design seems to have gone a bit off..
> >
> > A domain should have a list of places that need invalidation,
> > specifically a list of DIDs and ATCs that need an invalidation to be
> > issued.
> >
> > Instead we now somehow have 4 different lists in the domain the
> > invalidation code iterates over?
> >
> > So I would think this:
> >
> > struct dmar_domain {
> > struct xarray iommu_array; /* Attached IOMMU array */
> > struct list_head devices; /* all devices' list */
> > struct list_head dev_pasids; /* all attached pasids */
> > struct list_head s1_domains;
> >
> > Would make sense to be collapsed into one logical list of attached
> > devices:
> >
> > struct intel_iommu_domain_attachment {
> > unsigned int did;
> > ioasid_t pasid;
> > struct device_domain_info *info;
> > list_head item;
> > };
> >
> > When you attach a S1/S2 nest you allocate two of the above structs and
> > one is linked on the S1 and one is linked on the S2..
>
> yes, this shall work. ATC flushing should be fine. But there can be a
> drawback when flushing DIDs. VT-d side, if there are multiple devices
> behind the same iommu unit, just need one DID flushing is enough. But
> after the above change, the number of DID flushing would increase per
> the number of devices. Although no functional issue, but it submits
> duplicated invalidation.
At least the three server drivers all have this same problem, I would
like to seem some core code to help solve it, since it is tricky and
the RCU idea is good..
Collapsing invalidations can be done by sorting, I think this was
Robin's suggestion. But we could also easially maintain multiple list
threads hidden inside the API, or allocate a multi-level list.
Something very approximately like this:
Jason
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d14413916f93a0..7b2de139e7c437 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -38,6 +38,8 @@
#include "iommu-sva.h"
+#include <linux/iommu-alist.h>
+
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
static DEFINE_IDA(iommu_global_pasid_ida);
diff --git a/include/linux/iommu-alist.h b/include/linux/iommu-alist.h
new file mode 100644
index 00000000000000..7a9243f8b2b5f8
--- /dev/null
+++ b/include/linux/iommu-alist.h
@@ -0,0 +1,126 @@
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/iommu.h>
+
+/*
+ * Place in an iommu_domain to keep track of what devices have been attached to
+ * it.
+ */
+struct iommu_attach_list {
+ spinlock_t lock;
+ struct list_head all;
+};
+
+/*
+ * Allocate for every time a device is attached to a domain
+ */
+struct iommu_attach_elm {
+ /*
+ * Note that this pointer is accessed under RCU, the driver has to be
+ * careful to rcu free it.
+ */
+ struct iommu_device *iommu_device;
+ ioasid_t pasid;
+ u8 using_atc;
+ struct list_head all_item;
+
+ /* May not be accessed under RCU! */
+ struct device *device;
+};
+
+void iommu_attach_init(struct iommu_attach_list *alist)
+{
+ spin_lock_init(&alist->lock);
+}
+
+int iommu_attach_add(struct iommu_attach_list *alist,
+ struct iommu_attach_elm *elm)
+{
+ struct list_head *insert_after;
+
+ spin_lock(&alist->lock);
+ insert_after = list_find_sorted_insertion(alist, elm, cmp);
+ list_add_rcu(&elm->all_item, insert_after);
+ spin_unlock(&alist->lock);
+}
+
+void iommu_attach_del_free(struct iommu_attach_list *alist, struct iommu_attach_elm *elm)
+{
+ spin_lock(&alist->lock);
+ list_del_rcu(&elm->all_item);
+ spin_unlock(&alist->lock);
+ /* assumes 0 offset */
+ kfree_rcu_mightsleep(elm);
+}
+
+static struct iommu_attach_elm *
+__iommu_attach_next_iommu(struct iommu_attach_list *alist,
+ struct iommu_attach_elm *pos,
+ struct iommu_attach_elm *last)
+{
+ struct iommu_attach_elm *next;
+
+ do {
+ next = list_next_or_null_rcu(&alist->all, &pos->all_item,
+ struct iommu_attach_elm, all_item);
+ if (!next)
+ return NULL;
+ if (!last)
+ return next;
+ } while (pos->iommu_device == next->iommu_device);
+ return next;
+}
+
+/* assumes 0 offset */
+#define iommu_attach_next_iommu(alist, pos, last, member) \
+ container_of(__iommu_attach_next_iommu(alist, &(pos)->member, \
+ &(last)->member), \
+ typeof(*pos), member)
+
+#define iommu_attach_for_each_iommu(pos, last, alist, member) \
+ for (({ \
+ RCU_LOCKDEP_WARN( \
+ !rcu_read_lock_any_held(), \
+ "RCU-list traversed in non-reader section!"); \
+ }), \
+ pos = list_first_or_null_rcu(&(alist)->all, typeof(*pos), \
+ member.all_item), \
+ last = NULL; \
+ pos; pos = iommu_attach_next_iommu(alist, pos, last, member))
+
+/* Example */
+struct driver_domain {
+ struct iommu_domain domain;
+ struct iommu_attach_list alist;
+};
+
+struct driver_attach_elm {
+ struct iommu_attach_elm aelm;
+ unsigned int cache_tag;
+};
+
+static void example(struct driver_domain *domain)
+{
+ struct driver_attach_elm *elm;
+ struct driver_attach_elm *pos, *last;
+ bool need_atc;
+
+ iommu_attach_init(&domain->alist);
+
+ elm = kzalloc(sizeof(*elm), GFP_KERNEL);
+ iommu_attach_add(&domain->alist, &elm->aelm);
+
+ rcu_read_lock();
+ iommu_attach_for_each_iommu(pos, last, &domain->alist, aelm) {
+ invalidate_iommu(elm);
+ need_atc |= elm->aelm.using_atc;
+ }
+ if (need_atc) {
+ list_for_each_entry_rcu(pos, &domain->alist.all,
+ aelm.all_item) {
+ if (pos->aelm.using_atc)
+ invalidate_atc(elm);
+ }
+ }
+ rcu_read_unlock();
+}
next prev parent reply other threads:[~2024-02-22 15:16 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-08 8:22 [PATCH rc 0/8] Add missing cache flush and dirty tracking set for nested parent domain Yi Liu
2024-02-08 8:23 ` [PATCH rc 1/8] iommu/vt-d: Track nested domains in parent Yi Liu
2024-02-08 8:28 ` Tian, Kevin
2024-02-08 9:23 ` Yi Liu
2024-02-08 8:23 ` [PATCH rc 2/8] iommu/vt-d: Add __iommu_flush_iotlb_psi() Yi Liu
2024-02-08 8:30 ` Tian, Kevin
2024-02-08 8:23 ` [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent domain Yi Liu
2024-02-08 8:38 ` Tian, Kevin
2024-02-09 2:40 ` Baolu Lu
2024-02-21 15:19 ` Jason Gunthorpe
2024-02-22 8:34 ` Yi Liu
2024-02-22 15:16 ` Jason Gunthorpe [this message]
2024-02-08 8:23 ` [PATCH rc 4/8] iommu/vt-d: Update iotlb in nested domain attach Yi Liu
2024-02-08 8:40 ` Tian, Kevin
2024-02-08 8:23 ` [PATCH rc 5/8] iommu/vt-d: Add missing device iotlb flush for parent domain Yi Liu
2024-02-08 8:42 ` Tian, Kevin
2024-02-08 8:23 ` [PATCH rc 6/8] iommu/vt-d: Remove @domain parameter from intel_pasid_setup_dirty_tracking() Yi Liu
2024-02-08 8:43 ` Tian, Kevin
2024-02-08 10:29 ` Joao Martins
2024-02-08 8:23 ` [PATCH rc 7/8] iommu/vt-d: Wrap the dirty tracking loop to be a helper Yi Liu
2024-02-08 8:45 ` Tian, Kevin
2024-02-08 10:29 ` Joao Martins
2024-02-09 2:40 ` Baolu Lu
2024-02-08 8:23 ` [PATCH rc 8/8] iommu/vt-d: Add missing dirty tracking set for parent domain Yi Liu
2024-02-08 8:53 ` Tian, Kevin
2024-02-08 9:23 ` Yi Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240222151618.GA13330@nvidia.com \
--to=jgg@nvidia.com \
--cc=alex.williamson@redhat.com \
--cc=baolu.lu@linux.intel.com \
--cc=chao.p.peng@linux.intel.com \
--cc=eric.auger@redhat.com \
--cc=iommu@lists.linux.dev \
--cc=joao.m.martins@oracle.com \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=nicolinc@nvidia.com \
--cc=robin.murphy@arm.com \
--cc=yi.l.liu@intel.com \
--cc=yi.y.sun@linux.intel.com \
--cc=zhenzhong.duan@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox