Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Oak Zeng <oak.zeng@intel.com>
To: intel-xe@lists.freedesktop.org
Subject: [CI 13/44] drm/svm: handle CPU page fault
Date: Fri, 14 Jun 2024 17:57:46 -0400	[thread overview]
Message-ID: <20240614215817.1097633-13-oak.zeng@intel.com> (raw)
In-Reply-To: <20240614215817.1097633-1-oak.zeng@intel.com>

Under the picture of svm, CPU and GPU program share the same
virtual address space. The backing store of this virtual address
space can be either in system memory or device memory. Since GPU
device memory is remaped as DEVICE_PRIVATE, CPU can't access it.
Any CPU access to device memory causes a page fault. Implement
a page fault handler to migrate memory back to system memory and
map it to CPU page table so the CPU program can proceed.

Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: Brian Welty <brian.welty@intel.com>
Cc: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: Oak Zeng <oak.zeng@intel.com>
---
 drivers/gpu/drm/drm_svm.c | 280 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 279 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_svm.c b/drivers/gpu/drm/drm_svm.c
index 2c0cb2f82b28..1e6db4857a22 100644
--- a/drivers/gpu/drm/drm_svm.c
+++ b/drivers/gpu/drm/drm_svm.c
@@ -6,7 +6,9 @@
 #include <linux/scatterlist.h>
 #include <linux/mmu_notifier.h>
 #include <linux/dma-mapping.h>
+#include <linux/overflow.h>
 #include <linux/memremap.h>
+#include <linux/migrate.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_svm.h>
 #include <linux/swap.h>
@@ -325,7 +327,283 @@ int drm_svm_hmmptr_populate(struct drm_hmmptr *hmmptr, void *owner, u64 start, u
 }
 EXPORT_SYMBOL_GPL(drm_svm_hmmptr_populate);
 
-static struct dev_pagemap_ops drm_devm_pagemap_ops;
+static void __drm_svm_free_pages(unsigned long *mpfn, unsigned long npages)
+{
+	struct page *page;
+	int j;
+
+	for (j = 0; j < npages; j++) {
+		page = migrate_pfn_to_page(mpfn[j]);
+		mpfn[j] = 0;
+		if (page) {
+			unlock_page(page);
+			put_page(page);
+		}
+	}
+}
+
+/**
+ * __drm_svm_alloc_host_pages() - allocate host pages for the fault vma
+ *
+ * @vma: the fault vma that we need allocate page for
+ * @addr: the virtual address that we allocate pages for
+ * @mpfn: used to output the migration pfns of the allocated pages
+ * @npages: number of pages to allocate
+ *
+ * This function allocate host pages for a specified vma.
+ *
+ * When this function returns, the pages are locked.
+ *
+ * Return 0 on success
+ * error code otherwise
+ */
+static int __drm_svm_alloc_host_pages(struct vm_area_struct *vma,
+				      unsigned long addr,
+				      u64 npages,
+				      unsigned long *mpfn)
+{
+	struct page *page;
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (unlikely(!page))
+			goto free_allocated;
+
+		/**Lock page per hmm requirement, see hmm.rst*/
+		lock_page(page);
+
+		mpfn[i] = migrate_pfn(page_to_pfn(page));
+		addr += PAGE_SIZE;
+	}
+	return 0;
+
+free_allocated:
+	__drm_svm_free_pages(mpfn, i);
+	return -ENOMEM;
+}
+
+static struct migrate_vec *__generate_migrate_vec_vram(unsigned long *mpfn,
+						       bool is_migrate_src,
+						       unsigned long npages)
+{
+	struct migrate_vec *vec;
+	int size = struct_size(vec, addr_vec, npages);
+	struct drm_mem_region *mr;
+	struct page *page;
+	u64 dpa;
+	int i, j;
+
+	WARN_ON_ONCE(is_migrate_src && npages != 1);
+	vec = kzalloc(size, GFP_KERNEL);
+	if (!vec)
+		return NULL;
+
+	mr = drm_page_to_mem_region(page);
+	for (i = 0, j = 0; i < npages; i++) {
+		/**
+		 * We only migrate one page from vram to sram on CPU page fault today.
+		 * If this source page is not marked with _MIGRATE flag, something is
+		 * wrong. We need to report error to core mm.
+		 *
+		 * If we move to multiple pages migration, below logic need a revisit,
+		 * as it is fine to only migrate some pages (but not all) as indicated
+		 * by hmm.
+		 */
+		if (is_migrate_src && !(mpfn[i] & MIGRATE_PFN_MIGRATE)) {
+			pr_err("Migrate from vram to sram: MIGRATE_PFN_MIGRATE flag is not set\n");
+			kfree(vec);
+			return NULL;
+		}
+
+		page = migrate_pfn_to_page(mpfn[i]);
+		if (!page || !is_device_private_page(page)) {
+			pr_err("No page or wrong page zone in %s\n", __func__);
+			kfree(vec);
+			return NULL;
+		}
+
+		dpa = drm_mem_region_page_to_dpa(mr, page);
+		vec->addr_vec[j++].dpa = dpa;
+	}
+	vec->mr = mr;
+	vec->npages = j;
+	return vec;
+}
+
+static struct migrate_vec *__generate_migrate_vec_sram(struct device *dev,
+						       unsigned long *mpfn,
+						       bool is_migrate_src,
+						       unsigned long npages)
+{
+	enum dma_data_direction dir = is_migrate_src ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	struct migrate_vec *vec;
+	int size = struct_size(vec, addr_vec, npages);
+	dma_addr_t dma_addr;
+	struct page *page;
+	int i, j, k;
+
+	page = migrate_pfn_to_page(mpfn[0]);
+	if (unlikely(!page))
+		return NULL;
+
+	vec = kzalloc(size, GFP_KERNEL);
+	if (!vec)
+		return NULL;
+
+	for (i = 0, k = 0 ; i < npages; i++) {
+		if (is_migrate_src && !(mpfn[i] & MIGRATE_PFN_MIGRATE))
+			continue;
+
+		page = migrate_pfn_to_page(mpfn[i]);
+		if (!page || is_device_private_page(page)) {
+			pr_err("No page or wrong page zone in %s\n", __func__);
+			goto undo_dma_mapping;
+		}
+
+		dma_addr = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+		if (unlikely(dma_mapping_error(dev, dma_addr)))
+			goto undo_dma_mapping;
+
+		vec->addr_vec[k++].dma_addr = dma_addr;
+	}
+
+	vec->mr = NULL;
+	vec->npages = k;
+	return vec;
+
+undo_dma_mapping:
+	for (j = 0; j < k; j++) {
+		if (vec->addr_vec[j].dma_addr)
+			dma_unmap_page(dev, vec->addr_vec[j].dma_addr, PAGE_SIZE, dir);
+	}
+	kfree(vec);
+	return NULL;
+}
+
+static void __free_migrate_vec_sram(struct device *dev, struct migrate_vec *vec,
+				    bool is_migrate_src)
+{
+	enum dma_data_direction dir = is_migrate_src ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+	int i;
+
+	WARN_ON_ONCE(vec->mr);
+
+	for (i = 0; i < vec->npages; i++) {
+		dma_unmap_page(dev, vec->addr_vec[i].dma_addr, PAGE_SIZE, dir);
+		/** No need to free host pages. migrate_vma_finalize take care */
+	}
+	kfree(vec);
+}
+
+static void __free_migrate_vec_vram(struct migrate_vec *vec)
+{
+	WARN_ON_ONCE(!vec->mr);
+
+	kfree(vec);
+}
+
+/**
+ * drm_svm_migrate_to_sram() - Migrate memory back to sram on CPU page fault
+ *
+ * @vmf: cpu vm fault structure, contains fault information such as vma etc.
+ *
+ * Note, this is in CPU's vm fault handler, caller holds the mmap read lock.
+ *
+ * This function migrate one page at the fault address. This is the normal
+ * core mm page fault scheme. Linux doesn't aggressively prefault at CPU page
+ * fault time. It only fault-in one page to recover the fault address. Even
+ * if we migrate more than one page, core mm still only program one pte entry
+ * (covers one page).See logic in function handle_pte_fault. do_swap_page
+ * eventually calls to drm_svm_migrate_to_sram in our case.
+ *
+ * We call migrate_vma_setup to set up the migration. During migrate_vma_setup,
+ * device page table is invalidated before migration (by calling the driver registered
+ * mmu notifier)
+ *
+ * We call migrate_vma_finalize to finalize the migration. During migrate_vma_finalize,
+ * device pages of the source buffer is freed (by calling memory region's
+ * drm_mem_region_free_page callback function)
+ *
+ * Return:
+ * 0 on success
+ * VM_FAULT_SIGBUS: failed to migrate page to system memory, application
+ * will be signaled a SIGBUG
+ */
+static vm_fault_t drm_svm_migrate_to_sram(struct vm_fault *vmf)
+{
+	struct drm_mem_region *mr = drm_page_to_mem_region(vmf->page);
+	struct drm_device *drm = mr->mr_ops.drm_mem_region_get_device(mr);
+	unsigned long src_pfn = 0, dst_pfn = 0;
+	struct device *dev = drm->dev;
+	struct vm_area_struct *vma = vmf->vma;
+	struct migrate_vec *src;
+	struct migrate_vec *dst;
+	struct dma_fence *fence;
+	vm_fault_t ret = 0, r;
+
+	struct migrate_vma migrate_vma = {
+		.vma		= vma,
+		.start		= ALIGN_DOWN(vmf->address, PAGE_SIZE),
+		.end		= ALIGN_DOWN(vmf->address, PAGE_SIZE) + PAGE_SIZE,
+		.src		= &src_pfn,
+		.dst		= &dst_pfn,
+		.pgmap_owner	= mr->mr_ops.drm_mem_region_pagemap_owner(mr),
+		.flags		= MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
+		.fault_page = vmf->page,
+	};
+
+	if (migrate_vma_setup(&migrate_vma) < 0)
+		return VM_FAULT_SIGBUS;
+
+	if (!migrate_vma.cpages)
+		return 0;
+
+	r = __drm_svm_alloc_host_pages(vma, migrate_vma.start, 1, migrate_vma.dst);
+	if (r) {
+		ret = VM_FAULT_OOM;
+		goto migrate_finalize;
+	}
+
+	src = __generate_migrate_vec_vram(migrate_vma.src, true, 1);
+	if (!src) {
+		ret = VM_FAULT_SIGBUS;
+		goto free_host_pages;
+	}
+
+	dst = __generate_migrate_vec_sram(dev, migrate_vma.dst, false, 1);
+	if (!dst) {
+		ret = VM_FAULT_SIGBUS;
+		goto free_migrate_src;
+	}
+
+	fence = mr->mr_ops.drm_mem_region_migrate(src, dst);
+	if (IS_ERR(fence)) {
+		ret = VM_FAULT_SIGBUS;
+		goto free_migrate_dst;
+	}
+	dma_fence_wait(fence, false);
+	dma_fence_put(fence);
+
+	migrate_vma_pages(&migrate_vma);
+
+free_migrate_dst:
+	__free_migrate_vec_sram(dev, dst, false);
+free_migrate_src:
+	__free_migrate_vec_vram(src);
+free_host_pages:
+	if (ret)
+		__drm_svm_free_pages(migrate_vma.dst, 1);
+migrate_finalize:
+	if (ret)
+		memset(migrate_vma.dst, 0, sizeof(*migrate_vma.dst));
+	migrate_vma_finalize(&migrate_vma);
+	return ret;
+}
+
+static struct dev_pagemap_ops drm_devm_pagemap_ops = {
+	.migrate_to_ram = drm_svm_migrate_to_sram,
+};
 
 /**
  * drm_svm_register_mem_region: Remap and provide memmap backing for device memory
-- 
2.26.3


  parent reply	other threads:[~2024-06-14 21:47 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-14 21:57 [CI 01/44] mm/hmm: let users to tag specific PFNs Oak Zeng
2024-06-14 21:57 ` [CI 02/44] dma-mapping: provide an interface to allocate IOVA Oak Zeng
2024-06-14 21:57 ` [CI 03/44] dma-mapping: provide callbacks to link/unlink pages to specific IOVA Oak Zeng
2024-06-14 21:57 ` [CI 04/44] iommu/dma: Provide an interface to allow preallocate IOVA Oak Zeng
2024-06-14 21:57 ` [CI 05/44] iommu/dma: Prepare map/unmap page functions to receive IOVA Oak Zeng
2024-06-14 21:57 ` [CI 06/44] iommu/dma: Implement link/unlink page callbacks Oak Zeng
2024-06-14 21:57 ` [CI 07/44] mm: Add an empty implementation of zone_device_page_init Oak Zeng
2024-06-14 21:57 ` [CI 08/44] drm: Move GPUVA_START/LAST to drm_gpuvm.h Oak Zeng
2024-06-14 21:57 ` [CI 09/44] drm/svm: Mark drm_gpuvm to participate SVM Oak Zeng
2024-06-14 21:57 ` [CI 10/44] drm/svm: introduce drm_mem_region concept Oak Zeng
2024-06-14 21:57 ` [CI 11/44] drm/svm: introduce hmmptr and helper functions Oak Zeng
2024-06-14 21:57 ` [CI 12/44] drm/svm: Introduce helper to remap drm memory region Oak Zeng
2024-06-14 21:57 ` Oak Zeng [this message]
2024-06-14 21:57 ` [CI 14/44] drm/svm: Migrate a range of hmmptr to vram Oak Zeng
2024-06-14 21:57 ` [CI 15/44] drm/svm: Add DRM SVM documentation Oak Zeng
2024-06-14 21:57 ` [CI 16/44] drm/svm: Introduce DRM_SVM kernel config Oak Zeng
2024-06-14 21:57 ` [CI 17/44] drm/xe: s/xe_tile_migrate_engine/xe_tile_migrate_exec_queue Oak Zeng
2024-06-14 21:57 ` [CI 18/44] drm/xe: Add xe_vm_pgtable_update_op to xe_vma_ops Oak Zeng
2024-06-14 21:57 ` [CI 19/44] drm/xe: Convert multiple bind ops into single job Oak Zeng
2024-06-14 21:57 ` [CI 20/44] drm/xe: Update VM trace events Oak Zeng
2024-06-14 21:57 ` [CI 21/44] drm/xe: Update PT layer with better error handling Oak Zeng
2024-06-14 21:57 ` [CI 22/44] drm/xe: Retry BO allocation Oak Zeng
2024-06-14 21:57 ` [CI 23/44] drm/xe/uapi: Add DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR flag Oak Zeng
2024-06-14 21:57 ` [CI 24/44] drm/xe: Add a helper to calculate userptr end address Oak Zeng
2024-06-14 21:57 ` [CI 25/44] drm/xe: Add dma_addr res cursor Oak Zeng
2024-06-14 21:57 ` [CI 26/44] drm/xe: Use drm_mem_region for xe Oak Zeng
2024-06-14 21:58 ` [CI 27/44] drm/xe: use drm_hmmptr in xe Oak Zeng
2024-06-14 21:58 ` [CI 28/44] drm/xe: Moving to range based vma invalidation Oak Zeng
2024-06-14 21:58 ` [CI 29/44] drm/xe: Support range based page table update Oak Zeng
2024-06-14 21:58 ` [CI 30/44] drm/xe/uapi: Add DRM_XE_VM_CREATE_FLAG_PARTICIPATE_SVM flag Oak Zeng
2024-06-14 21:58 ` [CI 31/44] drm/xe/svm: Create userptr if page fault occurs on system_allocator VMA Oak Zeng
2024-06-14 21:58 ` [CI 32/44] drm/xe/svm: Add faulted userptr VMA garbage collector Oak Zeng
2024-06-14 21:58 ` [CI 33/44] drm/xe: Introduce helper to get tile from memory region Oak Zeng
2024-06-14 21:58 ` [CI 34/44] drm/xe/svm: implement functions to allocate and free device memory Oak Zeng
2024-06-14 21:58 ` [CI 35/44] drm/xe/svm: Get drm device from drm memory region Oak Zeng
2024-06-14 21:58 ` [CI 36/44] drm/xe/svm: Get page map owner of a " Oak Zeng
2024-06-14 21:58 ` [CI 37/44] drm/xe/svm: Add migrate layer functions for SVM support Oak Zeng
2024-06-14 21:58 ` [CI 38/44] drm/xe/svm: introduce svm migration function Oak Zeng
2024-06-14 21:58 ` [CI 39/44] drm/xe/svm: Register xe memory region to drm layer Oak Zeng
2024-06-14 21:58 ` [CI 40/44] drm/xe/svm: Introduce DRM_XE_SVM kernel config Oak Zeng
2024-06-14 21:58 ` [CI 41/44] drm/xe/svm: Migration from sram to vram for system allocator Oak Zeng
2024-06-14 21:58 ` [CI 42/44] drm/xe/svm: Determine a vma is backed by device memory Oak Zeng
2024-06-14 21:58 ` [CI 43/44] drm/xe/svm: Introduce hmm_pfn array based resource cursor Oak Zeng
2024-06-14 21:58 ` [CI 44/44] drm/xe: Enable system allocator uAPI Oak Zeng
2024-06-14 22:41 ` ✓ CI.Patch_applied: success for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs Patchwork
2024-06-14 22:42 ` ✗ CI.checkpatch: warning " Patchwork
2024-06-14 22:43 ` ✓ CI.KUnit: success " Patchwork
2024-06-14 22:55 ` ✓ CI.Build: " Patchwork
2024-06-14 22:57 ` ✗ CI.Hooks: failure " Patchwork
2024-06-14 22:58 ` ✗ CI.checksparse: warning " Patchwork
2024-06-14 23:21 ` ✓ CI.BAT: success " Patchwork
2024-06-15  2:56 ` ✓ CI.Patch_applied: success for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs (rev2) Patchwork
2024-06-15  2:57 ` ✗ CI.checkpatch: warning " Patchwork
2024-06-15  2:58 ` ✓ CI.KUnit: success " Patchwork
2024-06-15  3:10 ` ✓ CI.Build: " Patchwork
2024-06-15  3:12 ` ✗ CI.Hooks: failure " Patchwork
2024-06-15  3:14 ` ✗ CI.checksparse: warning " Patchwork
2024-06-15  3:36 ` ✓ CI.BAT: success " Patchwork
2024-06-15 13:59 ` ✗ CI.FULL: failure for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs Patchwork
2024-06-18  8:53 ` ✓ CI.FULL: success for series starting with [CI,01/44] mm/hmm: let users to tag specific PFNs (rev2) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240614215817.1097633-13-oak.zeng@intel.com \
    --to=oak.zeng@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox