From: Oak Zeng <oak.zeng@intel.com>
To: intel-xe@lists.freedesktop.org
Subject: [CI v3 11/26] drm/svm: introduce hmmptr and helper functions
Date: Tue, 28 May 2024 21:19:09 -0400 [thread overview]
Message-ID: <20240529011924.4125173-11-oak.zeng@intel.com> (raw)
In-Reply-To: <20240529011924.4125173-1-oak.zeng@intel.com>
A hmmptr is a pointer in a CPU program, like a userptr. but unlike
a userptr, a hmmptr can also be migrated to device local memory. The
other way to look at is, userptr is a special hmmptr without the
capability of migration - userptr's backing store is always in system
memory.
This is built on top of kernel HMM infrastructure thus is called hmmptr.
Helper functions are introduced to init, release and populate hmmptr.
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Brian Welty <brian.welty@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
---
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/drm_svm.c | 229 ++++++++++++++++++++++++++++++++++++++
include/drm/drm_svm.h | 54 +++++++++
4 files changed, 285 insertions(+)
create mode 100644 drivers/gpu/drm/drm_svm.c
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 959b19a04101..c390ff0dc6c1 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -20,6 +20,7 @@ menuconfig DRM
# device and dmabuf fd. Let's make sure that is available for our userspace.
select KCMP
select VIDEO
+ select HMM_MIRROR
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index f9ca4f8fa6c5..1c541468d5b0 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -89,6 +89,7 @@ drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \
drm_privacy_screen_x86.o
drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
drm-$(CONFIG_DRM_PANIC) += drm_panic.o
+drm-$(CONFIG_HMM_MIRROR) += ./drm_svm.o
obj-$(CONFIG_DRM) += drm.o
obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
diff --git a/drivers/gpu/drm/drm_svm.c b/drivers/gpu/drm/drm_svm.c
new file mode 100644
index 000000000000..66d8f8a69867
--- /dev/null
+++ b/drivers/gpu/drm/drm_svm.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+
+#include <linux/scatterlist.h>
+#include <linux/mmu_notifier.h>
+#include <linux/dma-mapping.h>
+#include <linux/memremap.h>
+#include <drm/drm_gem_dma_helper.h>
+#include <drm/drm_svm.h>
+#include <linux/swap.h>
+#include <linux/bug.h>
+#include <linux/hmm.h>
+#include <linux/mm.h>
+
+static u64 __npages_in_range(unsigned long start, unsigned long end)
+{
+ return (PAGE_ALIGN(end) - PAGE_ALIGN_DOWN(start)) >> PAGE_SHIFT;
+}
+
+/**
+ * __mark_range_accessed() - mark a range is accessed, so core mm
+ * have such information for memory eviction or write back to
+ * hard disk
+ *
+ * @hmm_pfn: hmm_pfn array to mark
+ * @npages: how many pages to mark
+ * @write: if write to this range, we mark pages in this range
+ * as dirty
+ */
+static void __mark_range_accessed(unsigned long *hmm_pfn, int npages, bool write)
+{
+ struct page *page;
+ u64 i;
+
+ for (i = 0; i < npages; i++) {
+ page = hmm_pfn_to_page(hmm_pfn[i]);
+ if (write)
+ set_page_dirty_lock(page);
+
+ mark_page_accessed(page);
+ }
+}
+
+static inline u64 __hmmptr_start(struct drm_hmmptr *hmmptr)
+{
+ struct drm_gpuva *gpuva = hmmptr->get_gpuva(hmmptr);
+ u64 start = GPUVA_START(gpuva);
+
+ return start;
+}
+
+static inline u64 __hmmptr_end(struct drm_hmmptr *hmmptr)
+{
+ struct drm_gpuva *gpuva = hmmptr->get_gpuva(hmmptr);
+ u64 end = GPUVA_END(gpuva);
+
+ return end;
+}
+
+static void drm_svm_hmmptr_unmap_dma_pages(struct drm_hmmptr *hmmptr)
+{
+ u64 npages = __npages_in_range(__hmmptr_start(hmmptr), __hmmptr_end(hmmptr));
+ unsigned long *hmm_pfn = hmmptr->pfn;
+ struct page *page;
+ u64 i;
+
+ for (i = 0; i < npages; i++) {
+ page = hmm_pfn_to_page(hmm_pfn[i]);
+ if (!page)
+ continue;
+
+ if (!is_device_private_page(page))
+ dma_unlink_range(&hmmptr->iova, i << PAGE_SHIFT);
+ }
+}
+
+/**
+ * drm_svm_hmmptr_init() - initialize a hmmptr
+ *
+ * @hmmptr: the hmmptr to initialize
+ * @ops: the mmu interval notifier ops used to invalidate hmmptr
+ */
+int drm_svm_hmmptr_init(struct drm_hmmptr *hmmptr,
+ const struct mmu_interval_notifier_ops *ops)
+{
+ struct drm_gpuva *gpuva = hmmptr->get_gpuva(hmmptr);
+ struct dma_iova_attrs *iova = &hmmptr->iova;
+ struct drm_gpuvm *gpuvm = gpuva->vm;
+ struct drm_device *drm = gpuvm->drm;
+ u64 start = GPUVA_START(gpuva);
+ u64 end = GPUVA_END(gpuva);
+ size_t npages;
+ int ret;
+
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = ALIGN(end, PAGE_SIZE);
+ npages = __npages_in_range(start, end);
+ hmmptr->pfn = kvcalloc(npages, sizeof(*hmmptr->pfn), GFP_KERNEL);
+ if (!hmmptr->pfn)
+ return -ENOMEM;
+
+ iova->dev = drm->dev;
+ iova->size = end - start;
+ iova->dir = DMA_BIDIRECTIONAL;
+ ret = dma_alloc_iova(iova);
+ if (ret)
+ goto free_pfn;
+
+ ret = mmu_interval_notifier_insert(&hmmptr->notifier, current->mm,
+ start, end - start, ops);
+ if (ret)
+ goto free_iova;
+
+ hmmptr->notifier_seq = LONG_MAX;
+ return 0;
+
+free_iova:
+ dma_free_iova(iova);
+free_pfn:
+ kvfree(hmmptr->pfn);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(drm_svm_hmmptr_init);
+
+/**
+ * drm_svm_hmmptr_release() - release a hmmptr
+ *
+ * @hmmptr: the hmmptr to release
+ */
+void drm_svm_hmmptr_release(struct drm_hmmptr *hmmptr)
+{
+ drm_svm_hmmptr_unmap_dma_pages(hmmptr);
+ mmu_interval_notifier_remove(&hmmptr->notifier);
+ dma_free_iova(&hmmptr->iova);
+ kvfree(hmmptr->pfn);
+}
+EXPORT_SYMBOL_GPL(drm_svm_hmmptr_release);
+
+/**
+ * drm_svm_hmmptr_populate() - Populate physical pages of the range of hmmptr
+ *
+ * @hmmptr: hmmptr to populate
+ * @start: start of the range
+ * @end: end of the range
+ * @write: Populate range for write purpose
+ * @owner: avoid fault for pages owned by owner, only report the current pfn.
+ *
+ * This function populate the physical pages of a hmmptr range. The
+ * populated physical pages is saved in hmmptr's pfn array.
+ * It is similar to get_user_pages but call hmm_range_fault.
+ *
+ * There are two usage model of this API:
+ *
+ * 1) use it for legacy userptr code: pass owner as NULL, fault-in the range
+ * in system pages
+ *
+ * 2) use it for svm: Usually caller would first migrate a range to device
+ * pages, then call this function with owner as the device pages owner. This way
+ * this function won't cause a fault, only report the range's backing pfns which
+ * is already in device memory.
+ *
+ * This function also read mmu notifier sequence # (
+ * mmu_interval_read_begin), for the purpose of later comparison
+ * (through mmu_interval_read_retry). The usage model is, driver first
+ * call this function to populate a range of a hmmptr, then call
+ * mmu_interval_read_retry to see whether need to retry before programming
+ * GPU page table. Since we only populate a sub-range of the whole hmmptr
+ * here, even if the recorded hmmptr->notifier_seq equals to notifier's
+ * current sequence no, it doesn't means the whole hmmptr is up to date.
+ * Driver is *required* to always call this function before check a retry.
+ *
+ * This must be called with mmap read or write lock held.
+ *
+ * returns: 0 for success; negative error no on failure
+ */
+int drm_svm_hmmptr_populate(struct drm_hmmptr *hmmptr, void *owner, u64 start, u64 end, bool write)
+{
+ unsigned long timeout =
+ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ struct hmm_range hmm_range;
+ struct mm_struct *mm = hmmptr->notifier.mm;
+ int pfn_index, npages;
+ int ret;
+
+ BUG_ON(start < __hmmptr_start(hmmptr));
+ BUG_ON(end > __hmmptr_end(hmmptr));
+ mmap_assert_locked(mm);
+
+ if (!mmget_not_zero(mm))
+ return -EFAULT;
+
+ hmm_range.notifier = &hmmptr->notifier;
+ hmm_range.start = ALIGN_DOWN(start, PAGE_SIZE);
+ hmm_range.end = ALIGN(end, PAGE_SIZE);
+ npages = __npages_in_range(hmm_range.start, hmm_range.end);
+ pfn_index = (hmm_range.start - __hmmptr_start(hmmptr)) >> PAGE_SHIFT;
+ hmm_range.hmm_pfns = hmmptr->pfn + pfn_index;
+ hmm_range.default_flags = HMM_PFN_REQ_FAULT;
+ if (write)
+ hmm_range.default_flags |= HMM_PFN_REQ_WRITE;
+ hmm_range.dev_private_owner = owner;
+
+ while (true) {
+ hmm_range.notifier_seq = mmu_interval_read_begin(&hmmptr->notifier);
+ ret = hmm_range_fault(&hmm_range);
+
+ if (ret == -EBUSY) {
+ if (time_after(jiffies, timeout))
+ break;
+
+ continue;
+ }
+ break;
+ }
+
+ mmput(mm);
+
+ if (ret)
+ return ret;
+
+ __mark_range_accessed(hmm_range.hmm_pfns, npages, write);
+ hmmptr->notifier_seq = hmm_range.notifier_seq;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(drm_svm_hmmptr_populate);
diff --git a/include/drm/drm_svm.h b/include/drm/drm_svm.h
index 2f8658538b4b..d7b9cf6b96c4 100644
--- a/include/drm/drm_svm.h
+++ b/include/drm/drm_svm.h
@@ -4,11 +4,15 @@
*/
#include <linux/compiler_types.h>
+#include <linux/dma-mapping.h>
#include <linux/memremap.h>
+#include <drm/drm_gpuvm.h>
#include <linux/types.h>
+
struct dma_fence;
struct drm_mem_region;
+struct mmu_interval_notifier_ops;
/**
* struct migrate_vec - a migration vector is an array of addresses,
@@ -154,3 +158,53 @@ static inline u64 drm_mem_region_pfn_to_dpa(struct drm_mem_region *mr, u64 pfn)
return dpa;
}
+
+/**
+ * struct drm_hmmptr- hmmptr pointer
+ *
+ * A hmmptr is a pointer in a CPU program that can be access by GPU program
+ * also, like a userptr. but unlike a userptr, a hmmptr can also be migrated
+ * to device local memory. The other way to look at is, userptr is a special
+ * hmmptr without the capability of migration - userptr's backing store is
+ * always in system memory.
+ *
+ * A hmmptr can have mixed backing pages in system and GPU vram.
+ *
+ * hmmptr is supposed to be embedded in driver's GPU virtual range management
+ * struct such as xe_vma etc. hmmptr itself doesn't have a range. hmmptr
+ * depends on driver's data structure (such as xe_vma) to live in a gpuvm's
+ * process space and RB-tree.
+ *
+ * With hmmptr concept, SVM and traditional userptr can share codes around
+ * mmu notifier, backing store population etc.
+ *
+ * This is built on top of kernel HMM infrastructure thus is called hmmptr.
+ */
+struct drm_hmmptr {
+ /**
+ * @notifier: MMU notifier for hmmptr
+ */
+ struct mmu_interval_notifier notifier;
+ /** @notifier_seq: notifier sequence number */
+ unsigned long notifier_seq;
+ /**
+ * @pfn: An array of pfn used for page population
+ */
+ unsigned long *pfn;
+ /**
+ * @iova: iova hold the dma-address of this hmmptr.
+ * iova is only used when the backing pages are in sram.
+ */
+ struct dma_iova_attrs iova;
+ /**
+ * @get_gpuva: callback function to get gpuva of this hmmptr
+ * FIXME: Probably have direct gpuva member in hmmptr
+ */
+ struct drm_gpuva * (*get_gpuva) (struct drm_hmmptr *hmmptr);
+};
+
+int drm_svm_hmmptr_init(struct drm_hmmptr *hmmptr,
+ const struct mmu_interval_notifier_ops *ops);
+void drm_svm_hmmptr_release(struct drm_hmmptr *hmmptr);
+int drm_svm_hmmptr_populate(struct drm_hmmptr *hmmptr, void *owner,
+ u64 start, u64 end, bool write);
--
2.26.3
next prev parent reply other threads:[~2024-05-29 1:05 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-29 1:18 [CI v3 01/26] mm/hmm: let users to tag specific PFNs Oak Zeng
2024-05-29 1:14 ` ✗ CI.Patch_applied: failure for series starting with [CI,v3,01/26] " Patchwork
2024-05-29 1:19 ` [CI v3 02/26] dma-mapping: provide an interface to allocate IOVA Oak Zeng
2024-05-29 1:19 ` [CI v3 03/26] dma-mapping: provide callbacks to link/unlink pages to specific IOVA Oak Zeng
2024-05-29 1:19 ` [CI v3 04/26] iommu/dma: Provide an interface to allow preallocate IOVA Oak Zeng
2024-05-29 1:19 ` [CI v3 05/26] iommu/dma: Prepare map/unmap page functions to receive IOVA Oak Zeng
2024-05-29 1:19 ` [CI v3 06/26] iommu/dma: Implement link/unlink page callbacks Oak Zeng
2024-05-29 1:19 ` [CI v3 07/26] drm: move xe_sg_segment_size to drm layer Oak Zeng
2024-05-29 1:19 ` [CI v3 08/26] drm: Move GPUVA_START/LAST to drm_gpuvm.h Oak Zeng
2024-05-29 1:19 ` [CI v3 09/26] drm/svm: add a mm field to drm_gpuvm struct Oak Zeng
2024-05-29 1:19 ` [CI v3 10/26] drm/svm: introduce drm_mem_region concept Oak Zeng
2024-05-29 1:19 ` Oak Zeng [this message]
2024-05-29 1:19 ` [CI v3 12/26] drm/svm: Introduce helper to remap drm memory region Oak Zeng
2024-05-29 1:19 ` [CI v3 13/26] drm/svm: handle CPU page fault Oak Zeng
2024-05-29 1:19 ` [CI v3 14/26] drm/svm: Migrate a range of hmmptr to vram Oak Zeng
2024-05-29 1:19 ` [CI v3 15/26] drm/svm: Add DRM SVM documentation Oak Zeng
2024-05-29 1:19 ` [CI v3 16/26] drm/xe: s/xe_tile_migrate_engine/xe_tile_migrate_exec_queue Oak Zeng
2024-05-29 1:19 ` [CI v3 17/26] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops Oak Zeng
2024-05-29 1:19 ` [CI v3 18/26] drm/xe: Convert multiple bind ops into single job Oak Zeng
2024-05-29 1:19 ` [CI v3 19/26] drm/xe: Update VM trace events Oak Zeng
2024-05-29 1:19 ` [CI v3 20/26] drm/xe: Update PT layer with better error handling Oak Zeng
2024-05-29 1:19 ` [CI v3 21/26] drm/xe: Retry BO allocation Oak Zeng
2024-05-29 1:19 ` [CI v3 22/26] drm/xe: Rework GPU page fault handling Oak Zeng
2024-05-29 1:19 ` [CI v3 23/26] drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR flag Oak Zeng
2024-05-29 1:19 ` [CI v3 24/26] drm/xe: Add dma_addr res cursor Oak Zeng
2024-05-29 1:19 ` [CI v3 25/26] drm/xe: Use drm_mem_region for xe Oak Zeng
2024-05-29 1:19 ` [CI v3 26/26] drm/xe: use drm_hmmptr in xe Oak Zeng
-- strict thread matches above, loose matches on Subject: below --
2024-05-30 0:47 [CI v3 01/26] mm/hmm: let users to tag specific PFNs Oak Zeng
2024-05-30 0:47 ` [CI v3 11/26] drm/svm: introduce hmmptr and helper functions Oak Zeng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240529011924.4125173-11-oak.zeng@intel.com \
--to=oak.zeng@intel.com \
--cc=intel-xe@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox