Linux Kernel Selftest development
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: Yi Liu <yi.l.liu@intel.com>
Cc: joro@8bytes.org, kevin.tian@intel.com, baolu.lu@linux.intel.com,
	alex.williamson@redhat.com, robin.murphy@arm.com,
	eric.auger@redhat.com, nicolinc@nvidia.com, kvm@vger.kernel.org,
	chao.p.peng@linux.intel.com, yi.y.sun@linux.intel.com,
	iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org, zhenzhong.duan@intel.com,
	joao.m.martins@oracle.com
Subject: Re: [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent domain
Date: Thu, 22 Feb 2024 11:16:18 -0400	[thread overview]
Message-ID: <20240222151618.GA13330@nvidia.com> (raw)
In-Reply-To: <d6b9ccff-3aba-4508-b83e-4bc3bc859e96@intel.com>

On Thu, Feb 22, 2024 at 04:34:10PM +0800, Yi Liu wrote:
> > It doesn't mean that the S2 is globally shared across all the nesting
> > translations (like ARM does), and you still have to iterate over every
> > nesting DID.
> > 
> > In light of that this design seems to have gone a bit off..
> > 
> > A domain should have a list of places that need invalidation,
> > specifically a list of DIDs and ATCs that need an invalidation to be
> > issued.
> > 
> > Instead we now somehow have 4 different lists in the domain the
> > invalidation code iterates over?
> > 
> > So I would think this:
> > 
> > struct dmar_domain {
> > 	struct xarray iommu_array;	/* Attached IOMMU array */
> > 	struct list_head devices;	/* all devices' list */
> > 	struct list_head dev_pasids;	/* all attached pasids */
> > 	struct list_head s1_domains;
> > 
> > Would make sense to be collapsed into one logical list of attached
> > devices:
> > 
> > struct intel_iommu_domain_attachment {
> >     unsigned int did;
> >     ioasid_t pasid;
> >     struct device_domain_info *info;
> >     list_head item;
> > };
> > 
> > When you attach a S1/S2 nest you allocate two of the above structs and
> > one is linked on the S1 and one is linked on the S2..
> 
> yes, this shall work. ATC flushing should be fine. But there can be a
> drawback when flushing DIDs. VT-d side, if there are multiple devices
> behind the same iommu unit, just need one DID flushing is enough. But
> after the above change, the number of DID flushing would increase per
> the number of devices. Although no functional issue, but it submits
> duplicated invalidation.

At least the three server drivers all have this same problem, I would
like to seem some core code to help solve it, since it is tricky and
the RCU idea is good..

Collapsing invalidations can be done by sorting, I think this was
Robin's suggestion.  But we could also easially maintain multiple list
threads hidden inside the API, or allocate a multi-level list.

Something very approximately like this:

Jason

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d14413916f93a0..7b2de139e7c437 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -38,6 +38,8 @@
 
 #include "iommu-sva.h"
 
+#include <linux/iommu-alist.h>
+
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 static DEFINE_IDA(iommu_global_pasid_ida);
diff --git a/include/linux/iommu-alist.h b/include/linux/iommu-alist.h
new file mode 100644
index 00000000000000..7a9243f8b2b5f8
--- /dev/null
+++ b/include/linux/iommu-alist.h
@@ -0,0 +1,126 @@
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/iommu.h>
+
+/*
+ * Place in an iommu_domain to keep track of what devices have been attached to
+ * it.
+ */
+struct iommu_attach_list {
+	spinlock_t lock;
+	struct list_head all;
+};
+
+/*
+ * Allocate for every time a device is attached to a domain
+ */
+struct iommu_attach_elm {
+	/*
+	 * Note that this pointer is accessed under RCU, the driver has to be
+	 * careful to rcu free it.
+	 */
+	struct iommu_device *iommu_device;
+	ioasid_t pasid;
+	u8 using_atc;
+	struct list_head all_item;
+
+	/* May not be accessed under RCU! */
+	struct device *device;
+};
+
+void iommu_attach_init(struct iommu_attach_list *alist)
+{
+	spin_lock_init(&alist->lock);
+}
+
+int iommu_attach_add(struct iommu_attach_list *alist,
+		     struct iommu_attach_elm *elm)
+{
+	struct list_head *insert_after;
+
+	spin_lock(&alist->lock);
+	insert_after = list_find_sorted_insertion(alist, elm, cmp);
+	list_add_rcu(&elm->all_item, insert_after);
+	spin_unlock(&alist->lock);
+}
+
+void iommu_attach_del_free(struct iommu_attach_list *alist, struct iommu_attach_elm *elm)
+{
+	spin_lock(&alist->lock);
+	list_del_rcu(&elm->all_item);
+	spin_unlock(&alist->lock);
+	/* assumes 0 offset */
+	kfree_rcu_mightsleep(elm);
+}
+
+static struct iommu_attach_elm *
+__iommu_attach_next_iommu(struct iommu_attach_list *alist,
+			  struct iommu_attach_elm *pos,
+			  struct iommu_attach_elm *last)
+{
+	struct iommu_attach_elm *next;
+
+	do {
+		next = list_next_or_null_rcu(&alist->all, &pos->all_item,
+					     struct iommu_attach_elm, all_item);
+		if (!next)
+			return NULL;
+		if (!last)
+			return next;
+	} while (pos->iommu_device == next->iommu_device);
+	return next;
+}
+
+/* assumes 0 offset */
+#define iommu_attach_next_iommu(alist, pos, last, member)             \
+	container_of(__iommu_attach_next_iommu(alist, &(pos)->member, \
+					       &(last)->member),      \
+		     typeof(*pos), member)
+
+#define iommu_attach_for_each_iommu(pos, last, alist, member)              \
+	for (({                                                            \
+		     RCU_LOCKDEP_WARN(                                     \
+			     !rcu_read_lock_any_held(),                    \
+			     "RCU-list traversed in non-reader section!"); \
+	     }),                                                           \
+	     pos = list_first_or_null_rcu(&(alist)->all, typeof(*pos),     \
+					  member.all_item),                \
+	     last = NULL;                                                  \
+	     pos; pos = iommu_attach_next_iommu(alist, pos, last, member))
+
+/* Example */
+struct driver_domain {
+	struct iommu_domain domain;
+	struct iommu_attach_list alist;
+};
+
+struct driver_attach_elm {
+	struct iommu_attach_elm aelm;
+	unsigned int cache_tag;
+};
+
+static void example(struct driver_domain *domain)
+{
+	struct driver_attach_elm *elm;
+	struct driver_attach_elm *pos, *last;
+	bool need_atc;
+
+	iommu_attach_init(&domain->alist);
+
+	elm = kzalloc(sizeof(*elm), GFP_KERNEL);
+	iommu_attach_add(&domain->alist, &elm->aelm);
+
+	rcu_read_lock();
+	iommu_attach_for_each_iommu(pos, last, &domain->alist, aelm) {
+		invalidate_iommu(elm);
+		need_atc |= elm->aelm.using_atc;
+	}
+	if (need_atc) {
+		list_for_each_entry_rcu(pos, &domain->alist.all,
+					aelm.all_item) {
+			if (pos->aelm.using_atc)
+				invalidate_atc(elm);
+		}
+	}
+	rcu_read_unlock();
+}

  reply	other threads:[~2024-02-22 15:16 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-08  8:22 [PATCH rc 0/8] Add missing cache flush and dirty tracking set for nested parent domain Yi Liu
2024-02-08  8:23 ` [PATCH rc 1/8] iommu/vt-d: Track nested domains in parent Yi Liu
2024-02-08  8:28   ` Tian, Kevin
2024-02-08  9:23     ` Yi Liu
2024-02-08  8:23 ` [PATCH rc 2/8] iommu/vt-d: Add __iommu_flush_iotlb_psi() Yi Liu
2024-02-08  8:30   ` Tian, Kevin
2024-02-08  8:23 ` [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent domain Yi Liu
2024-02-08  8:38   ` Tian, Kevin
2024-02-09  2:40     ` Baolu Lu
2024-02-21 15:19   ` Jason Gunthorpe
2024-02-22  8:34     ` Yi Liu
2024-02-22 15:16       ` Jason Gunthorpe [this message]
2024-02-08  8:23 ` [PATCH rc 4/8] iommu/vt-d: Update iotlb in nested domain attach Yi Liu
2024-02-08  8:40   ` Tian, Kevin
2024-02-08  8:23 ` [PATCH rc 5/8] iommu/vt-d: Add missing device iotlb flush for parent domain Yi Liu
2024-02-08  8:42   ` Tian, Kevin
2024-02-08  8:23 ` [PATCH rc 6/8] iommu/vt-d: Remove @domain parameter from intel_pasid_setup_dirty_tracking() Yi Liu
2024-02-08  8:43   ` Tian, Kevin
2024-02-08 10:29   ` Joao Martins
2024-02-08  8:23 ` [PATCH rc 7/8] iommu/vt-d: Wrap the dirty tracking loop to be a helper Yi Liu
2024-02-08  8:45   ` Tian, Kevin
2024-02-08 10:29   ` Joao Martins
2024-02-09  2:40   ` Baolu Lu
2024-02-08  8:23 ` [PATCH rc 8/8] iommu/vt-d: Add missing dirty tracking set for parent domain Yi Liu
2024-02-08  8:53   ` Tian, Kevin
2024-02-08  9:23     ` Yi Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240222151618.GA13330@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=joao.m.martins@oracle.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox