From: Joao Martins <joao.m.martins@oracle.com>
To: iommu@lists.linux-foundation.org
Cc: Jean-Philippe Brucker <jean-philippe@linaro.org>,
Kevin Tian <kevin.tian@intel.com>,
Yishai Hadas <yishaih@nvidia.com>,
Jason Gunthorpe <jgg@nvidia.com>,
kvm@vger.kernel.org, Will Deacon <will@kernel.org>,
Cornelia Huck <cohuck@redhat.com>,
Alex Williamson <alex.williamson@redhat.com>,
Joao Martins <joao.m.martins@oracle.com>,
David Woodhouse <dwmw2@infradead.org>,
Robin Murphy <robin.murphy@arm.com>
Subject: [PATCH RFC 03/19] iommufd: Dirty tracking data support
Date: Thu, 28 Apr 2022 22:09:17 +0100 [thread overview]
Message-ID: <20220428210933.3583-4-joao.m.martins@oracle.com> (raw)
In-Reply-To: <20220428210933.3583-1-joao.m.martins@oracle.com>
Add an IO pagetable API iopt_read_and_clear_dirty_data() that
performs the reading of dirty IOPTEs for a given IOVA range and
then copying back to userspace from each area-internal bitmap.
Underneath it uses the IOMMU equivalent API which will read the
dirty bits, as well as atomically clearing the IOPTE dirty bit
and flushing the IOTLB at the end. The dirty bitmaps pass an
iotlb_gather to allow batching the dirty-bit updates.
Most of the complexity, though, is in the handling of the user
bitmaps to avoid copies back and forth. The bitmap user addresses
need to be iterated through, pinned and then passing the pages
into iommu core. The amount of bitmap data passed at a time for a
read_and_clear_dirty() is 1 page worth of pinned base page
pointers. That equates to 16M bits, or rather 64G of data that
can be returned as 'dirtied'. The flush the IOTLB at the end of
the whole scanned IOVA range, to defer as much as possible the
potential DMA performance penalty.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
drivers/iommu/iommufd/io_pagetable.c | 169 ++++++++++++++++++++++++
drivers/iommu/iommufd/iommufd_private.h | 44 ++++++
2 files changed, 213 insertions(+)
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index f4609ef369e0..835b5040fce9 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <uapi/linux/iommufd.h>
#include "io_pagetable.h"
@@ -347,6 +348,174 @@ int iopt_set_dirty_tracking(struct io_pagetable *iopt,
return ret;
}
+int iommufd_dirty_iter_init(struct iommufd_dirty_iter *iter,
+ struct iommufd_dirty_data *bitmap)
+{
+ struct iommu_dirty_bitmap *dirty = &iter->dirty;
+ unsigned long bitmap_len;
+
+ bitmap_len = dirty_bitmap_bytes(bitmap->length >> dirty->pgshift);
+
+ import_single_range(WRITE, bitmap->data, bitmap_len,
+ &iter->bitmap_iov, &iter->bitmap_iter);
+ iter->iova = bitmap->iova;
+
+ /* Can record up to 64G at a time */
+ dirty->pages = (struct page **) __get_free_page(GFP_KERNEL);
+
+ return !dirty->pages ? -ENOMEM : 0;
+}
+
+void iommufd_dirty_iter_free(struct iommufd_dirty_iter *iter)
+{
+ struct iommu_dirty_bitmap *dirty = &iter->dirty;
+
+ if (dirty->pages) {
+ free_page((unsigned long) dirty->pages);
+ dirty->pages = NULL;
+ }
+}
+
+bool iommufd_dirty_iter_done(struct iommufd_dirty_iter *iter)
+{
+ return iov_iter_count(&iter->bitmap_iter) > 0;
+}
+
+static inline unsigned long iommufd_dirty_iter_bytes(struct iommufd_dirty_iter *iter)
+{
+ unsigned long left = iter->bitmap_iter.count - iter->bitmap_iter.iov_offset;
+
+ left = min_t(unsigned long, left, (iter->dirty.npages << PAGE_SHIFT));
+
+ return left;
+}
+
+unsigned long iommufd_dirty_iova_length(struct iommufd_dirty_iter *iter)
+{
+ unsigned long left = iommufd_dirty_iter_bytes(iter);
+
+ return ((BITS_PER_BYTE * left) << iter->dirty.pgshift);
+}
+
+unsigned long iommufd_dirty_iova(struct iommufd_dirty_iter *iter)
+{
+ unsigned long skip = iter->bitmap_iter.iov_offset;
+
+ return iter->iova + ((BITS_PER_BYTE * skip) << iter->dirty.pgshift);
+}
+
+void iommufd_dirty_iter_advance(struct iommufd_dirty_iter *iter)
+{
+ iov_iter_advance(&iter->bitmap_iter, iommufd_dirty_iter_bytes(iter));
+}
+
+void iommufd_dirty_iter_put(struct iommufd_dirty_iter *iter)
+{
+ struct iommu_dirty_bitmap *dirty = &iter->dirty;
+
+ if (dirty->npages)
+ unpin_user_pages(dirty->pages, dirty->npages);
+}
+
+int iommufd_dirty_iter_get(struct iommufd_dirty_iter *iter)
+{
+ struct iommu_dirty_bitmap *dirty = &iter->dirty;
+ unsigned long npages;
+ unsigned long ret;
+ void *addr;
+
+ addr = iter->bitmap_iov.iov_base + iter->bitmap_iter.iov_offset;
+ npages = iov_iter_npages(&iter->bitmap_iter,
+ PAGE_SIZE / sizeof(struct page *));
+
+ ret = pin_user_pages_fast((unsigned long) addr, npages,
+ FOLL_WRITE, dirty->pages);
+ if (ret <= 0)
+ return -EINVAL;
+
+ dirty->npages = ret;
+ dirty->iova = iommufd_dirty_iova(iter);
+ dirty->start_offset = offset_in_page(addr);
+ return 0;
+}
+
+static int iommu_read_and_clear_dirty(struct iommu_domain *domain,
+ struct iommufd_dirty_data *bitmap)
+{
+ const struct iommu_domain_ops *ops = domain->ops;
+ struct iommu_iotlb_gather gather;
+ struct iommufd_dirty_iter iter;
+ int ret = 0;
+
+ if (!ops || !ops->read_and_clear_dirty)
+ return -EOPNOTSUPP;
+
+ iommu_dirty_bitmap_init(&iter.dirty, bitmap->iova,
+ __ffs(bitmap->page_size), &gather);
+ ret = iommufd_dirty_iter_init(&iter, bitmap);
+ if (ret)
+ return -ENOMEM;
+
+ for (; iommufd_dirty_iter_done(&iter);
+ iommufd_dirty_iter_advance(&iter)) {
+ ret = iommufd_dirty_iter_get(&iter);
+ if (ret)
+ break;
+
+ ret = ops->read_and_clear_dirty(domain,
+ iommufd_dirty_iova(&iter),
+ iommufd_dirty_iova_length(&iter), &iter.dirty);
+
+ iommufd_dirty_iter_put(&iter);
+
+ if (ret)
+ break;
+ }
+
+ iommu_iotlb_sync(domain, &gather);
+ iommufd_dirty_iter_free(&iter);
+
+ return ret;
+}
+
+int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain,
+ struct iommufd_dirty_data *bitmap)
+{
+ unsigned long iova, length, iova_end;
+ struct iommu_domain *dom;
+ struct iopt_area *area;
+ unsigned long index;
+ int ret = -EOPNOTSUPP;
+
+ iova = bitmap->iova;
+ length = bitmap->length - 1;
+ if (check_add_overflow(iova, length, &iova_end))
+ return -EOVERFLOW;
+
+ down_read(&iopt->iova_rwsem);
+ area = iopt_find_exact_area(iopt, iova, iova_end);
+ if (!area) {
+ up_read(&iopt->iova_rwsem);
+ return -ENOENT;
+ }
+
+ if (!domain) {
+ down_read(&iopt->domains_rwsem);
+ xa_for_each(&iopt->domains, index, dom) {
+ ret = iommu_read_and_clear_dirty(dom, bitmap);
+ if (ret)
+ break;
+ }
+ up_read(&iopt->domains_rwsem);
+ } else {
+ ret = iommu_read_and_clear_dirty(domain, bitmap);
+ }
+
+ up_read(&iopt->iova_rwsem);
+ return ret;
+}
+
struct iopt_pages *iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
unsigned long *start_byte,
unsigned long length)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index d00ef3b785c5..4c12b4a8f1a6 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -8,6 +8,8 @@
#include <linux/xarray.h>
#include <linux/refcount.h>
#include <linux/uaccess.h>
+#include <linux/iommu.h>
+#include <linux/uio.h>
struct iommu_domain;
struct iommu_group;
@@ -49,8 +51,50 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
unsigned long length);
int iopt_unmap_all(struct io_pagetable *iopt);
+struct iommufd_dirty_data {
+ unsigned long iova;
+ unsigned long length;
+ unsigned long page_size;
+ unsigned long *data;
+};
+
int iopt_set_dirty_tracking(struct io_pagetable *iopt,
struct iommu_domain *domain, bool enable);
+int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
+ struct iommu_domain *domain,
+ struct iommufd_dirty_data *bitmap);
+
+struct iommufd_dirty_iter {
+ struct iommu_dirty_bitmap dirty;
+ struct iovec bitmap_iov;
+ struct iov_iter bitmap_iter;
+ unsigned long iova;
+};
+
+void iommufd_dirty_iter_put(struct iommufd_dirty_iter *iter);
+int iommufd_dirty_iter_get(struct iommufd_dirty_iter *iter);
+int iommufd_dirty_iter_init(struct iommufd_dirty_iter *iter,
+ struct iommufd_dirty_data *bitmap);
+void iommufd_dirty_iter_free(struct iommufd_dirty_iter *iter);
+bool iommufd_dirty_iter_done(struct iommufd_dirty_iter *iter);
+void iommufd_dirty_iter_advance(struct iommufd_dirty_iter *iter);
+unsigned long iommufd_dirty_iova_length(struct iommufd_dirty_iter *iter);
+unsigned long iommufd_dirty_iova(struct iommufd_dirty_iter *iter);
+static inline unsigned long dirty_bitmap_bytes(unsigned long nr_pages)
+{
+ return (ALIGN(nr_pages, BITS_PER_TYPE(u64)) / BITS_PER_BYTE);
+}
+
+/*
+ * Input argument of number of bits to bitmap_set() is unsigned integer, which
+ * further casts to signed integer for unaligned multi-bit operation,
+ * __bitmap_set().
+ * Then maximum bitmap size supported is 2^31 bits divided by 2^3 bits/byte,
+ * that is 2^28 (256 MB) which maps to 2^31 * 2^12 = 2^43 (8TB) on 4K page
+ * system.
+ */
+#define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX)
+#define DIRTY_BITMAP_SIZE_MAX dirty_bitmap_bytes(DIRTY_BITMAP_PAGES_MAX)
int iopt_access_pages(struct io_pagetable *iopt, unsigned long iova,
unsigned long npages, struct page **out_pages, bool write);
--
2.17.2
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
next prev parent reply other threads:[~2022-04-28 21:11 UTC|newest]
Thread overview: 101+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-28 21:09 [PATCH RFC 00/19] IOMMUFD Dirty Tracking Joao Martins
2022-04-28 21:09 ` [PATCH RFC 01/19] iommu: Add iommu_domain ops for dirty tracking Joao Martins
2022-04-29 7:54 ` Tian, Kevin
2022-04-29 10:44 ` Joao Martins
2022-04-29 12:08 ` Jason Gunthorpe via iommu
2022-04-29 14:26 ` Joao Martins
2022-04-29 14:35 ` Jason Gunthorpe via iommu
2022-04-29 13:40 ` Baolu Lu
2022-04-29 15:27 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 02/19] iommufd: Dirty tracking for io_pagetable Joao Martins
2022-04-29 8:07 ` Tian, Kevin
2022-04-29 10:48 ` Joao Martins
2022-04-29 11:56 ` Jason Gunthorpe via iommu
2022-04-29 14:28 ` Joao Martins
2022-04-29 23:51 ` Baolu Lu
2022-05-02 11:57 ` Joao Martins
2022-04-28 21:09 ` Joao Martins [this message]
2022-04-29 8:12 ` [PATCH RFC 03/19] iommufd: Dirty tracking data support Tian, Kevin
2022-04-29 10:54 ` Joao Martins
2022-04-29 12:09 ` Jason Gunthorpe via iommu
2022-04-29 14:33 ` Joao Martins
2022-04-30 4:11 ` Baolu Lu
2022-05-02 12:06 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 04/19] iommu: Add an unmap API that returns dirtied IOPTEs Joao Martins
2022-04-30 5:12 ` Baolu Lu
2022-05-02 12:22 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 05/19] iommufd: Add a dirty bitmap to iopt_unmap_iova() Joao Martins
2022-04-29 12:14 ` Jason Gunthorpe via iommu
2022-04-29 14:36 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 06/19] iommufd: Dirty tracking IOCTLs for the hw_pagetable Joao Martins
2022-04-28 21:09 ` [PATCH RFC 07/19] iommufd/vfio-compat: Dirty tracking IOCTLs compatibility Joao Martins
2022-04-29 12:19 ` Jason Gunthorpe via iommu
2022-04-29 14:27 ` Joao Martins
2022-04-29 14:36 ` Jason Gunthorpe via iommu
2022-04-29 14:52 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 08/19] iommufd: Add a test for dirty tracking ioctls Joao Martins
2022-04-28 21:09 ` [PATCH RFC 09/19] iommu/amd: Access/Dirty bit support in IOPTEs Joao Martins
2022-05-31 11:34 ` Suravee Suthikulpanit via iommu
2022-05-31 12:15 ` Baolu Lu
2022-05-31 15:22 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 10/19] iommu/amd: Add unmap_read_dirty() support Joao Martins
2022-05-31 12:39 ` Suravee Suthikulpanit via iommu
2022-05-31 15:51 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 11/19] iommu/amd: Print access/dirty bits if supported Joao Martins
2022-04-28 21:09 ` [PATCH RFC 12/19] iommu/arm-smmu-v3: Add feature detection for HTTU Joao Martins
2022-04-28 21:09 ` [PATCH RFC 13/19] iommu/arm-smmu-v3: Add feature detection for BBML Joao Martins
2022-04-29 11:11 ` Robin Murphy
2022-04-29 11:54 ` Joao Martins
2022-04-29 12:26 ` Robin Murphy
2022-04-29 14:34 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 14/19] iommu/arm-smmu-v3: Add read_and_clear_dirty() support Joao Martins
2022-04-28 21:09 ` [PATCH RFC 15/19] iommu/arm-smmu-v3: Add set_dirty_tracking_range() support Joao Martins
2022-04-29 8:28 ` Tian, Kevin
2022-04-29 11:05 ` Joao Martins
2022-04-29 11:19 ` Robin Murphy
2022-04-29 12:06 ` Joao Martins
2022-04-29 12:23 ` Jason Gunthorpe via iommu
2022-04-29 14:45 ` Joao Martins
2022-04-29 16:11 ` Jason Gunthorpe via iommu
2022-04-29 16:40 ` Joao Martins
2022-04-29 16:46 ` Jason Gunthorpe via iommu
2022-04-29 19:20 ` Robin Murphy
2022-05-02 11:52 ` Joao Martins
2022-05-02 11:57 ` Joao Martins
2022-05-05 7:25 ` Shameerali Kolothum Thodi via iommu
2022-05-05 9:52 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 16/19] iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping Joao Martins
2022-04-29 11:35 ` Robin Murphy
2022-04-29 12:10 ` Joao Martins
2022-04-29 12:46 ` Robin Murphy
2022-04-28 21:09 ` [PATCH RFC 17/19] iommu/arm-smmu-v3: Add unmap_read_dirty() support Joao Martins
2022-04-29 11:53 ` Robin Murphy
2022-04-28 21:09 ` [PATCH RFC 18/19] iommu/intel: Access/Dirty bit support for SL domains Joao Martins
2022-04-29 9:03 ` Tian, Kevin
2022-04-29 11:20 ` Joao Martins
2022-04-30 6:12 ` Baolu Lu
2022-05-02 12:24 ` Joao Martins
2022-04-28 21:09 ` [PATCH RFC 19/19] iommu/intel: Add unmap_read_dirty() support Joao Martins
2022-04-29 5:45 ` [PATCH RFC 00/19] IOMMUFD Dirty Tracking Tian, Kevin
2022-04-29 10:27 ` Joao Martins
2022-04-29 12:38 ` Jason Gunthorpe via iommu
2022-04-29 15:20 ` Joao Martins
2022-05-05 7:40 ` Tian, Kevin
2022-05-05 14:07 ` Jason Gunthorpe via iommu
2022-05-06 3:51 ` Tian, Kevin
2022-05-06 11:46 ` Jason Gunthorpe via iommu
2022-05-10 1:38 ` Tian, Kevin
2022-05-10 11:50 ` Joao Martins
2022-05-11 1:17 ` Tian, Kevin
2022-05-10 13:46 ` Jason Gunthorpe via iommu
2022-05-11 1:10 ` Tian, Kevin
2022-05-02 18:11 ` Alex Williamson
2022-05-02 18:52 ` Jason Gunthorpe via iommu
2022-05-03 10:48 ` Joao Martins
2022-05-05 7:42 ` Tian, Kevin
2022-05-05 10:06 ` Joao Martins
2022-05-05 11:03 ` Tian, Kevin
2022-05-05 11:50 ` Joao Martins
2022-05-06 3:14 ` Tian, Kevin
2022-05-05 13:55 ` Jason Gunthorpe via iommu
2022-05-06 3:17 ` Tian, Kevin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220428210933.3583-4-joao.m.martins@oracle.com \
--to=joao.m.martins@oracle.com \
--cc=alex.williamson@redhat.com \
--cc=cohuck@redhat.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux-foundation.org \
--cc=jean-philippe@linaro.org \
--cc=jgg@nvidia.com \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=robin.murphy@arm.com \
--cc=will@kernel.org \
--cc=yishaih@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox