All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@nvidia.com>
To: Yi Liu <yi.l.liu@intel.com>
Cc: joro@8bytes.org, kevin.tian@intel.com, baolu.lu@linux.intel.com,
	alex.williamson@redhat.com, robin.murphy@arm.com,
	eric.auger@redhat.com, nicolinc@nvidia.com, kvm@vger.kernel.org,
	chao.p.peng@linux.intel.com, yi.y.sun@linux.intel.com,
	iommu@lists.linux.dev, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org, zhenzhong.duan@intel.com,
	joao.m.martins@oracle.com
Subject: Re: [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent domain
Date: Thu, 22 Feb 2024 11:16:18 -0400	[thread overview]
Message-ID: <20240222151618.GA13330@nvidia.com> (raw)
In-Reply-To: <d6b9ccff-3aba-4508-b83e-4bc3bc859e96@intel.com>

On Thu, Feb 22, 2024 at 04:34:10PM +0800, Yi Liu wrote:
> > It doesn't mean that the S2 is globally shared across all the nesting
> > translations (like ARM does), and you still have to iterate over every
> > nesting DID.
> > 
> > In light of that this design seems to have gone a bit off..
> > 
> > A domain should have a list of places that need invalidation,
> > specifically a list of DIDs and ATCs that need an invalidation to be
> > issued.
> > 
> > Instead we now somehow have 4 different lists in the domain the
> > invalidation code iterates over?
> > 
> > So I would think this:
> > 
> > struct dmar_domain {
> > 	struct xarray iommu_array;	/* Attached IOMMU array */
> > 	struct list_head devices;	/* all devices' list */
> > 	struct list_head dev_pasids;	/* all attached pasids */
> > 	struct list_head s1_domains;
> > 
> > Would make sense to be collapsed into one logical list of attached
> > devices:
> > 
> > struct intel_iommu_domain_attachment {
> >     unsigned int did;
> >     ioasid_t pasid;
> >     struct device_domain_info *info;
> >     list_head item;
> > };
> > 
> > When you attach a S1/S2 nest you allocate two of the above structs and
> > one is linked on the S1 and one is linked on the S2..
> 
> yes, this shall work. ATC flushing should be fine. But there can be a
> drawback when flushing DIDs. VT-d side, if there are multiple devices
> behind the same iommu unit, just need one DID flushing is enough. But
> after the above change, the number of DID flushing would increase per
> the number of devices. Although no functional issue, but it submits
> duplicated invalidation.

At least the three server drivers all have this same problem, I would
like to seem some core code to help solve it, since it is tricky and
the RCU idea is good..

Collapsing invalidations can be done by sorting, I think this was
Robin's suggestion.  But we could also easially maintain multiple list
threads hidden inside the API, or allocate a multi-level list.

Something very approximately like this:

Jason

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d14413916f93a0..7b2de139e7c437 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -38,6 +38,8 @@
 
 #include "iommu-sva.h"
 
+#include <linux/iommu-alist.h>
+
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 static DEFINE_IDA(iommu_global_pasid_ida);
diff --git a/include/linux/iommu-alist.h b/include/linux/iommu-alist.h
new file mode 100644
index 00000000000000..7a9243f8b2b5f8
--- /dev/null
+++ b/include/linux/iommu-alist.h
@@ -0,0 +1,126 @@
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/iommu.h>
+
+/*
+ * Place in an iommu_domain to keep track of what devices have been attached to
+ * it.
+ */
+struct iommu_attach_list {
+	spinlock_t lock;
+	struct list_head all;
+};
+
+/*
+ * Allocate for every time a device is attached to a domain
+ */
+struct iommu_attach_elm {
+	/*
+	 * Note that this pointer is accessed under RCU, the driver has to be
+	 * careful to rcu free it.
+	 */
+	struct iommu_device *iommu_device;
+	ioasid_t pasid;
+	u8 using_atc;
+	struct list_head all_item;
+
+	/* May not be accessed under RCU! */
+	struct device *device;
+};
+
+void iommu_attach_init(struct iommu_attach_list *alist)
+{
+	spin_lock_init(&alist->lock);
+}
+
+int iommu_attach_add(struct iommu_attach_list *alist,
+		     struct iommu_attach_elm *elm)
+{
+	struct list_head *insert_after;
+
+	spin_lock(&alist->lock);
+	insert_after = list_find_sorted_insertion(alist, elm, cmp);
+	list_add_rcu(&elm->all_item, insert_after);
+	spin_unlock(&alist->lock);
+}
+
+void iommu_attach_del_free(struct iommu_attach_list *alist, struct iommu_attach_elm *elm)
+{
+	spin_lock(&alist->lock);
+	list_del_rcu(&elm->all_item);
+	spin_unlock(&alist->lock);
+	/* assumes 0 offset */
+	kfree_rcu_mightsleep(elm);
+}
+
+static struct iommu_attach_elm *
+__iommu_attach_next_iommu(struct iommu_attach_list *alist,
+			  struct iommu_attach_elm *pos,
+			  struct iommu_attach_elm *last)
+{
+	struct iommu_attach_elm *next;
+
+	do {
+		next = list_next_or_null_rcu(&alist->all, &pos->all_item,
+					     struct iommu_attach_elm, all_item);
+		if (!next)
+			return NULL;
+		if (!last)
+			return next;
+	} while (pos->iommu_device == next->iommu_device);
+	return next;
+}
+
+/* assumes 0 offset */
+#define iommu_attach_next_iommu(alist, pos, last, member)             \
+	container_of(__iommu_attach_next_iommu(alist, &(pos)->member, \
+					       &(last)->member),      \
+		     typeof(*pos), member)
+
+#define iommu_attach_for_each_iommu(pos, last, alist, member)              \
+	for (({                                                            \
+		     RCU_LOCKDEP_WARN(                                     \
+			     !rcu_read_lock_any_held(),                    \
+			     "RCU-list traversed in non-reader section!"); \
+	     }),                                                           \
+	     pos = list_first_or_null_rcu(&(alist)->all, typeof(*pos),     \
+					  member.all_item),                \
+	     last = NULL;                                                  \
+	     pos; pos = iommu_attach_next_iommu(alist, pos, last, member))
+
+/* Example */
+struct driver_domain {
+	struct iommu_domain domain;
+	struct iommu_attach_list alist;
+};
+
+struct driver_attach_elm {
+	struct iommu_attach_elm aelm;
+	unsigned int cache_tag;
+};
+
+static void example(struct driver_domain *domain)
+{
+	struct driver_attach_elm *elm;
+	struct driver_attach_elm *pos, *last;
+	bool need_atc;
+
+	iommu_attach_init(&domain->alist);
+
+	elm = kzalloc(sizeof(*elm), GFP_KERNEL);
+	iommu_attach_add(&domain->alist, &elm->aelm);
+
+	rcu_read_lock();
+	iommu_attach_for_each_iommu(pos, last, &domain->alist, aelm) {
+		invalidate_iommu(elm);
+		need_atc |= elm->aelm.using_atc;
+	}
+	if (need_atc) {
+		list_for_each_entry_rcu(pos, &domain->alist.all,
+					aelm.all_item) {
+			if (pos->aelm.using_atc)
+				invalidate_atc(elm);
+		}
+	}
+	rcu_read_unlock();
+}

  reply	other threads:[~2024-02-22 15:16 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-08  8:22 [PATCH rc 0/8] Add missing cache flush and dirty tracking set for nested parent domain Yi Liu
2024-02-08  8:23 ` [PATCH rc 1/8] iommu/vt-d: Track nested domains in parent Yi Liu
2024-02-08  8:28   ` Tian, Kevin
2024-02-08  9:23     ` Yi Liu
2024-02-08  8:23 ` [PATCH rc 2/8] iommu/vt-d: Add __iommu_flush_iotlb_psi() Yi Liu
2024-02-08  8:30   ` Tian, Kevin
2024-02-08  8:23 ` [PATCH rc 3/8] iommu/vt-d: Add missing iotlb flush for parent domain Yi Liu
2024-02-08  8:38   ` Tian, Kevin
2024-02-09  2:40     ` Baolu Lu
2024-02-21 15:19   ` Jason Gunthorpe
2024-02-22  8:34     ` Yi Liu
2024-02-22 15:16       ` Jason Gunthorpe [this message]
2024-02-08  8:23 ` [PATCH rc 4/8] iommu/vt-d: Update iotlb in nested domain attach Yi Liu
2024-02-08  8:40   ` Tian, Kevin
2024-02-08  8:23 ` [PATCH rc 5/8] iommu/vt-d: Add missing device iotlb flush for parent domain Yi Liu
2024-02-08  8:42   ` Tian, Kevin
2024-02-08  8:23 ` [PATCH rc 6/8] iommu/vt-d: Remove @domain parameter from intel_pasid_setup_dirty_tracking() Yi Liu
2024-02-08  8:43   ` Tian, Kevin
2024-02-08 10:29   ` Joao Martins
2024-02-08  8:23 ` [PATCH rc 7/8] iommu/vt-d: Wrap the dirty tracking loop to be a helper Yi Liu
2024-02-08  8:45   ` Tian, Kevin
2024-02-08 10:29   ` Joao Martins
2024-02-09  2:40   ` Baolu Lu
2024-02-08  8:23 ` [PATCH rc 8/8] iommu/vt-d: Add missing dirty tracking set for parent domain Yi Liu
2024-02-08  8:53   ` Tian, Kevin
2024-02-08  9:23     ` Yi Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240222151618.GA13330@nvidia.com \
    --to=jgg@nvidia.com \
    --cc=alex.williamson@redhat.com \
    --cc=baolu.lu@linux.intel.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=eric.auger@redhat.com \
    --cc=iommu@lists.linux.dev \
    --cc=joao.m.martins@oracle.com \
    --cc=joro@8bytes.org \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=nicolinc@nvidia.com \
    --cc=robin.murphy@arm.com \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    --cc=zhenzhong.duan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.