Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/xe/dgfx: Release mmap mappings on rpm suspend
@ 2023-12-20 18:50 Badal Nilawar
  2023-12-21  9:21 ` Matthew Auld
  0 siblings, 1 reply; 3+ messages in thread
From: Badal Nilawar @ 2023-12-20 18:50 UTC (permalink / raw)
  To: intel-xe; +Cc: thomas.hellstrom, matthew.auld, rodrigo.vivi

Release all mmap mappings for all vram objects which are associated
with userfault such that, while pcie function in D3hot, any access
to memory mappings will raise a userfault.

Upon userfault, in order to access memory mappings, if graphics
function is in D3 then runtime resume of dgpu will be triggered to
transition to D0.

v2:
  - Add lock dep assertion before updating vram_userfault_count (Rodrigo)
  - Avoid iomem check before bo migration check as bo can migrate
    to system memory (Matthew Auld)
v3:
  - Delete bo userfault link during bo destroy
  - Upon bo move (vram-smem), do bo userfault link deletion in
    xe_bo_move_notify instead of xe_bo_move (Thomas Hellström)
  - Grab lock in rpm hook while deleting bo userfault link (Matthew Auld)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c           | 65 +++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_bo.h           |  2 +
 drivers/gpu/drm/xe/xe_bo_types.h     |  5 +++
 drivers/gpu/drm/xe/xe_device_types.h | 20 +++++++++
 drivers/gpu/drm/xe/xe_pm.c           |  7 +++
 5 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 8e4a3b1f6b93..2ab933f50192 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -586,6 +586,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 {
 	struct ttm_buffer_object *ttm_bo = &bo->ttm;
 	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
+	struct ttm_resource *old_mem = ttm_bo->resource;
+	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
 	int ret;
 
 	/*
@@ -597,6 +599,7 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 		return -EINVAL;
 
 	xe_bo_vunmap(bo);
+
 	ret = xe_bo_trigger_rebind(xe, bo, ctx);
 	if (ret)
 		return ret;
@@ -605,6 +608,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
 	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
 		dma_buf_move_notify(ttm_bo->base.dma_buf);
 
+	/*
+	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
+	 * so if we moved from VRAM make sure to unlink this from the userfault
+	 * tracking.
+	 */
+	if (mem_type_is_vram(old_mem_type)) {
+		spin_lock(&xe->mem_access.vram_userfault_lock);
+		if (!list_empty(&bo->vram_userfault_link))
+			list_del_init(&bo->vram_userfault_link);
+		spin_unlock(&xe->mem_access.vram_userfault_lock);
+	}
+
 	return 0;
 }
 
@@ -1063,6 +1078,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 	if (bo->vm && xe_bo_is_user(bo))
 		xe_vm_put(bo->vm);
 
+	spin_lock(&xe->mem_access.vram_userfault_lock);
+	if (!list_empty(&bo->vram_userfault_link))
+		list_del(&bo->vram_userfault_link);
+	spin_unlock(&xe->mem_access.vram_userfault_lock);
+
 	kfree(bo);
 }
 
@@ -1110,6 +1130,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
 	struct drm_device *ddev = tbo->base.dev;
+	struct xe_bo *bo = NULL;
+	struct xe_device *xe = to_xe_device(ddev);
 	vm_fault_t ret;
 	int idx, r = 0;
 
@@ -1118,7 +1140,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 		return ret;
 
 	if (drm_dev_enter(ddev, &idx)) {
-		struct xe_bo *bo = ttm_to_xe_bo(tbo);
+		bo = ttm_to_xe_bo(tbo);
 
 		trace_xe_bo_cpu_fault(bo);
 
@@ -1137,10 +1159,31 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
 	} else {
 		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
 	}
+
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 		return ret;
+	/*
+	 * ttm_bo_vm_reserve() already has dma_resv_lock.
+	 * vram_userfault_count is protected by dma_resv lock and rpm wakeref.
+	 */
+	if (ret == VM_FAULT_NOPAGE && bo && !bo->vram_userfault_count) {
+		if (tbo->resource->bus.is_iomem) {
+			dma_resv_assert_held(tbo->base.resv);
+
+			xe_device_mem_access_get(xe);
+
+			bo->vram_userfault_count = 1;
+
+			spin_lock(&xe->mem_access.vram_userfault_lock);
+			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault_list);
+			spin_unlock(&xe->mem_access.vram_userfault_lock);
+
+			xe_device_mem_access_put(xe);
+		}
+	}
 
 	dma_resv_unlock(tbo->base.resv);
+
 	return ret;
 }
 
@@ -1254,6 +1297,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 #ifdef CONFIG_PROC_FS
 	INIT_LIST_HEAD(&bo->client_link);
 #endif
+	INIT_LIST_HEAD(&bo->vram_userfault_link);
 
 	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
 
@@ -2264,6 +2308,25 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
 	return err;
 }
 
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
+{
+	struct ttm_buffer_object *tbo = &bo->ttm;
+	struct ttm_device *bdev = tbo->bdev;
+	struct xe_device *xe = ttm_to_xe_device(bdev);
+
+	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
+
+	/*
+	 * Other callers xe_ttm_bo_destroy and xe_bo_move_notify also checks and
+	 * deletes vram_user_fault_link so grabbing spinlock here.
+	 */
+	XE_WARN_ON(!bo->vram_userfault_count);
+	spin_lock(&xe->mem_access.vram_userfault_lock);
+	list_del(&bo->vram_userfault_link);
+	bo->vram_userfault_count = 0;
+	spin_unlock(&xe->mem_access.vram_userfault_lock);
+}
+
 #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
 #include "tests/xe_bo.c"
 #endif
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 97b32528c600..350cc73cadf8 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -249,6 +249,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file);
+void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
+
 int xe_bo_dumb_create(struct drm_file *file_priv,
 		      struct drm_device *dev,
 		      struct drm_mode_create_dumb *args);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 64c2249a4e40..910b35c05acd 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -88,6 +88,11 @@ struct xe_bo {
 	 * objects.
 	 */
 	u16 cpu_caching;
+	/**
+	 * Whether the object is currently in fake offset mmap backed by vram.
+	 */
+	unsigned int vram_userfault_count;
+	struct list_head vram_userfault_link;
 };
 
 #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 71f23ac365e6..6890d21f7a4c 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -385,6 +385,26 @@ struct xe_device {
 	struct {
 		/** @ref: ref count of memory accesses */
 		atomic_t ref;
+		/*
+		 *  Protects access to vram usefault list.
+		 *  It is required, if we are outside of the runtime suspend path,
+		 *  access to @vram_userfault_list requires always first grabbing the
+		 *  runtime pm, to ensure we can't race against runtime suspend.
+		 *  Once we have that we also need to grab @vram_userfault_lock,
+		 *  at which point we have exclusive access.
+		 *  The runtime suspend path is special since it doesn't really hold any locks,
+		 *  but instead has exclusive access by virtue of all other accesses requiring
+		 *  holding the runtime pm wakeref.
+		 */
+		spinlock_t vram_userfault_lock;
+
+		/*
+		 *  Keep list of userfaulted gem obj, which require to release their
+		 *  mmap mappings at runtime suspend path.
+		 */
+		struct list_head vram_userfault_list;
+
+		bool vram_userfault_ongoing;
 	} mem_access;
 
 	/**
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index b429c2876a76..bc1cb081e6e5 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -181,6 +181,8 @@ void xe_pm_init(struct xe_device *xe)
 	}
 
 	xe_pm_runtime_init(xe);
+	INIT_LIST_HEAD(&xe->mem_access.vram_userfault_list);
+	spin_lock_init(&xe->mem_access.vram_userfault_lock);
 }
 
 void xe_pm_runtime_fini(struct xe_device *xe)
@@ -214,6 +216,7 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
 
 int xe_pm_runtime_suspend(struct xe_device *xe)
 {
+	struct xe_bo *bo, *on;
 	struct xe_gt *gt;
 	u8 id;
 	int err = 0;
@@ -247,6 +250,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
 	 */
 	lock_map_acquire(&xe_device_mem_access_lockdep_map);
 
+	list_for_each_entry_safe(bo, on,
+				 &xe->mem_access.vram_userfault_list, vram_userfault_link)
+		xe_bo_runtime_pm_release_mmap_offset(bo);
+
 	if (xe->d3cold.allowed) {
 		err = xe_bo_evict_all(xe);
 		if (err)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] drm/xe/dgfx: Release mmap mappings on rpm suspend
  2023-12-20 18:50 [PATCH v3] drm/xe/dgfx: Release mmap mappings on rpm suspend Badal Nilawar
@ 2023-12-21  9:21 ` Matthew Auld
  2023-12-22  7:42   ` Nilawar, Badal
  0 siblings, 1 reply; 3+ messages in thread
From: Matthew Auld @ 2023-12-21  9:21 UTC (permalink / raw)
  To: Badal Nilawar, intel-xe; +Cc: thomas.hellstrom, rodrigo.vivi

On 20/12/2023 18:50, Badal Nilawar wrote:
> Release all mmap mappings for all vram objects which are associated
> with userfault such that, while pcie function in D3hot, any access
> to memory mappings will raise a userfault.
> 
> Upon userfault, in order to access memory mappings, if graphics
> function is in D3 then runtime resume of dgpu will be triggered to
> transition to D0.
> 
> v2:
>    - Add lock dep assertion before updating vram_userfault_count (Rodrigo)
>    - Avoid iomem check before bo migration check as bo can migrate
>      to system memory (Matthew Auld)
> v3:
>    - Delete bo userfault link during bo destroy
>    - Upon bo move (vram-smem), do bo userfault link deletion in
>      xe_bo_move_notify instead of xe_bo_move (Thomas Hellström)
>    - Grab lock in rpm hook while deleting bo userfault link (Matthew Auld)
> 
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Matthew Auld <matthew.auld@intel.com>
> Cc: Anshuman Gupta <anshuman.gupta@intel.com>
> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
> ---
>   drivers/gpu/drm/xe/xe_bo.c           | 65 +++++++++++++++++++++++++++-
>   drivers/gpu/drm/xe/xe_bo.h           |  2 +
>   drivers/gpu/drm/xe/xe_bo_types.h     |  5 +++
>   drivers/gpu/drm/xe/xe_device_types.h | 20 +++++++++
>   drivers/gpu/drm/xe/xe_pm.c           |  7 +++
>   5 files changed, 98 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 8e4a3b1f6b93..2ab933f50192 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -586,6 +586,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>   {
>   	struct ttm_buffer_object *ttm_bo = &bo->ttm;
>   	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +	struct ttm_resource *old_mem = ttm_bo->resource;
> +	u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
>   	int ret;
>   
>   	/*
> @@ -597,6 +599,7 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>   		return -EINVAL;
>   
>   	xe_bo_vunmap(bo);
> +
>   	ret = xe_bo_trigger_rebind(xe, bo, ctx);
>   	if (ret)
>   		return ret;
> @@ -605,6 +608,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>   	if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
>   		dma_buf_move_notify(ttm_bo->base.dma_buf);
>   
> +	/*
> +	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
> +	 * so if we moved from VRAM make sure to unlink this from the userfault
> +	 * tracking.
> +	 */
> +	if (mem_type_is_vram(old_mem_type)) {
> +		spin_lock(&xe->mem_access.vram_userfault_lock);
> +		if (!list_empty(&bo->vram_userfault_link))
> +			list_del_init(&bo->vram_userfault_link);
> +		spin_unlock(&xe->mem_access.vram_userfault_lock);
> +	}
> +
>   	return 0;
>   }
>   
> @@ -1063,6 +1078,11 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
>   	if (bo->vm && xe_bo_is_user(bo))
>   		xe_vm_put(bo->vm);
>   
> +	spin_lock(&xe->mem_access.vram_userfault_lock);
> +	if (!list_empty(&bo->vram_userfault_link))
> +		list_del(&bo->vram_userfault_link);
> +	spin_unlock(&xe->mem_access.vram_userfault_lock);
> +
>   	kfree(bo);
>   }
>   
> @@ -1110,6 +1130,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>   {
>   	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
>   	struct drm_device *ddev = tbo->base.dev;
> +	struct xe_bo *bo = NULL;
> +	struct xe_device *xe = to_xe_device(ddev);
>   	vm_fault_t ret;
>   	int idx, r = 0;
>   
> @@ -1118,7 +1140,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>   		return ret;
>   
>   	if (drm_dev_enter(ddev, &idx)) {
> -		struct xe_bo *bo = ttm_to_xe_bo(tbo);
> +		bo = ttm_to_xe_bo(tbo);
>   
>   		trace_xe_bo_cpu_fault(bo);
>   
> @@ -1137,10 +1159,31 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>   	} else {
>   		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
>   	}
> +
>   	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
>   		return ret;
> +	/*
> +	 * ttm_bo_vm_reserve() already has dma_resv_lock.
> +	 * vram_userfault_count is protected by dma_resv lock and rpm wakeref.
> +	 */
> +	if (ret == VM_FAULT_NOPAGE && bo && !bo->vram_userfault_count) {
> +		if (tbo->resource->bus.is_iomem) {
> +			dma_resv_assert_held(tbo->base.resv);
> +
> +			xe_device_mem_access_get(xe);

Thinking ahead for d3cold, we probably don't want to introduce more 
places that do dma-resv -> access_get. Not a big deal for now, but at 
some point someone will have to fix this. Please consider if we can 
easily move this higher up now.

> +
> +			bo->vram_userfault_count = 1;
> +
> +			spin_lock(&xe->mem_access.vram_userfault_lock);
> +			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault_list);
> +			spin_unlock(&xe->mem_access.vram_userfault_lock);
> +
> +			xe_device_mem_access_put(xe);
> +		}
> +	}
>   
>   	dma_resv_unlock(tbo->base.resv);
> +
>   	return ret;
>   }
>   
> @@ -1254,6 +1297,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>   #ifdef CONFIG_PROC_FS
>   	INIT_LIST_HEAD(&bo->client_link);
>   #endif
> +	INIT_LIST_HEAD(&bo->vram_userfault_link);
>   
>   	drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
>   
> @@ -2264,6 +2308,25 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
>   	return err;
>   }
>   
> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
> +{
> +	struct ttm_buffer_object *tbo = &bo->ttm;
> +	struct ttm_device *bdev = tbo->bdev;
> +	struct xe_device *xe = ttm_to_xe_device(bdev);
> +
> +	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
> +
> +	/*
> +	 * Other callers xe_ttm_bo_destroy and xe_bo_move_notify also checks and
> +	 * deletes vram_user_fault_link so grabbing spinlock here.
> +	 */
> +	XE_WARN_ON(!bo->vram_userfault_count);
> +	spin_lock(&xe->mem_access.vram_userfault_lock);

AFAICT exclusive access is needed for the entire list op, including the 
list iteration prior to this.

> +	list_del(&bo->vram_userfault_link);
> +	bo->vram_userfault_count = 0;
> +	spin_unlock(&xe->mem_access.vram_userfault_lock);
> +}
> +
>   #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>   #include "tests/xe_bo.c"
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 97b32528c600..350cc73cadf8 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -249,6 +249,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
>   			struct drm_file *file);
>   int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
>   			     struct drm_file *file);
> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
> +
>   int xe_bo_dumb_create(struct drm_file *file_priv,
>   		      struct drm_device *dev,
>   		      struct drm_mode_create_dumb *args);
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index 64c2249a4e40..910b35c05acd 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -88,6 +88,11 @@ struct xe_bo {
>   	 * objects.
>   	 */
>   	u16 cpu_caching;
> +	/**
> +	 * Whether the object is currently in fake offset mmap backed by vram.
> +	 */
> +	unsigned int vram_userfault_count;
> +	struct list_head vram_userfault_link;

Nit: Needs proper kernel-doc. Also consider maybe wrapping this in a struct:

struct {
     struct list_head link;
     int count;
} vram_userfault;

>   };
>   
>   #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 71f23ac365e6..6890d21f7a4c 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -385,6 +385,26 @@ struct xe_device {
>   	struct {
>   		/** @ref: ref count of memory accesses */
>   		atomic_t ref;
> +		/*
> +		 *  Protects access to vram usefault list.
> +		 *  It is required, if we are outside of the runtime suspend path,
> +		 *  access to @vram_userfault_list requires always first grabbing the
> +		 *  runtime pm, to ensure we can't race against runtime suspend.
> +		 *  Once we have that we also need to grab @vram_userfault_lock,
> +		 *  at which point we have exclusive access.
> +		 *  The runtime suspend path is special since it doesn't really hold any locks,
> +		 *  but instead has exclusive access by virtue of all other accesses requiring
> +		 *  holding the runtime pm wakeref.
> +		 */
> +		spinlock_t vram_userfault_lock;
> +
> +		/*
> +		 *  Keep list of userfaulted gem obj, which require to release their
> +		 *  mmap mappings at runtime suspend path.
> +		 */

Nit: Not properly formatted kernel-doc here and above.

> +		struct list_head vram_userfault_list;
> +
> +		bool vram_userfault_ongoing;

Also maybe consider wrapping this in a struct:

struct {
     spinlock_t lock;
     list_head list;
} vram_userfault;

Also vram_userfault_ongoing looks to be unused.

>   	} mem_access;
>   
>   	/**
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index b429c2876a76..bc1cb081e6e5 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -181,6 +181,8 @@ void xe_pm_init(struct xe_device *xe)
>   	}
>   
>   	xe_pm_runtime_init(xe);
> +	INIT_LIST_HEAD(&xe->mem_access.vram_userfault_list);
> +	spin_lock_init(&xe->mem_access.vram_userfault_lock);
>   }
>   
>   void xe_pm_runtime_fini(struct xe_device *xe)
> @@ -214,6 +216,7 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
>   
>   int xe_pm_runtime_suspend(struct xe_device *xe)
>   {
> +	struct xe_bo *bo, *on;
>   	struct xe_gt *gt;
>   	u8 id;
>   	int err = 0;
> @@ -247,6 +250,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>   	 */
>   	lock_map_acquire(&xe_device_mem_access_lockdep_map);
>   
> +	list_for_each_entry_safe(bo, on,
> +				 &xe->mem_access.vram_userfault_list, vram_userfault_link)
> +		xe_bo_runtime_pm_release_mmap_offset(bo);
> +
>   	if (xe->d3cold.allowed) {
>   		err = xe_bo_evict_all(xe);
>   		if (err)

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v3] drm/xe/dgfx: Release mmap mappings on rpm suspend
  2023-12-21  9:21 ` Matthew Auld
@ 2023-12-22  7:42   ` Nilawar, Badal
  0 siblings, 0 replies; 3+ messages in thread
From: Nilawar, Badal @ 2023-12-22  7:42 UTC (permalink / raw)
  To: Matthew Auld, intel-xe; +Cc: thomas.hellstrom, rodrigo.vivi



On 21-12-2023 14:51, Matthew Auld wrote:
> On 20/12/2023 18:50, Badal Nilawar wrote:
>> Release all mmap mappings for all vram objects which are associated
>> with userfault such that, while pcie function in D3hot, any access
>> to memory mappings will raise a userfault.
>>
>> Upon userfault, in order to access memory mappings, if graphics
>> function is in D3 then runtime resume of dgpu will be triggered to
>> transition to D0.
>>
>> v2:
>>    - Add lock dep assertion before updating vram_userfault_count 
>> (Rodrigo)
>>    - Avoid iomem check before bo migration check as bo can migrate
>>      to system memory (Matthew Auld)
>> v3:
>>    - Delete bo userfault link during bo destroy
>>    - Upon bo move (vram-smem), do bo userfault link deletion in
>>      xe_bo_move_notify instead of xe_bo_move (Thomas Hellström)
>>    - Grab lock in rpm hook while deleting bo userfault link (Matthew 
>> Auld)
>>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Matthew Auld <matthew.auld@intel.com>
>> Cc: Anshuman Gupta <anshuman.gupta@intel.com>
>> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_bo.c           | 65 +++++++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_bo.h           |  2 +
>>   drivers/gpu/drm/xe/xe_bo_types.h     |  5 +++
>>   drivers/gpu/drm/xe/xe_device_types.h | 20 +++++++++
>>   drivers/gpu/drm/xe/xe_pm.c           |  7 +++
>>   5 files changed, 98 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>> index 8e4a3b1f6b93..2ab933f50192 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.c
>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>> @@ -586,6 +586,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>>   {
>>       struct ttm_buffer_object *ttm_bo = &bo->ttm;
>>       struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
>> +    struct ttm_resource *old_mem = ttm_bo->resource;
>> +    u32 old_mem_type = old_mem ? old_mem->mem_type : XE_PL_SYSTEM;
>>       int ret;
>>       /*
>> @@ -597,6 +599,7 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>>           return -EINVAL;
>>       xe_bo_vunmap(bo);
>> +
>>       ret = xe_bo_trigger_rebind(xe, bo, ctx);
>>       if (ret)
>>           return ret;
>> @@ -605,6 +608,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>>       if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
>>           dma_buf_move_notify(ttm_bo->base.dma_buf);
>> +    /*
>> +     * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
>> +     * so if we moved from VRAM make sure to unlink this from the 
>> userfault
>> +     * tracking.
>> +     */
>> +    if (mem_type_is_vram(old_mem_type)) {
>> +        spin_lock(&xe->mem_access.vram_userfault_lock);
>> +        if (!list_empty(&bo->vram_userfault_link))
>> +            list_del_init(&bo->vram_userfault_link);
>> +        spin_unlock(&xe->mem_access.vram_userfault_lock);
>> +    }
>> +
>>       return 0;
>>   }
>> @@ -1063,6 +1078,11 @@ static void xe_ttm_bo_destroy(struct 
>> ttm_buffer_object *ttm_bo)
>>       if (bo->vm && xe_bo_is_user(bo))
>>           xe_vm_put(bo->vm);
>> +    spin_lock(&xe->mem_access.vram_userfault_lock);
>> +    if (!list_empty(&bo->vram_userfault_link))
>> +        list_del(&bo->vram_userfault_link);
>> +    spin_unlock(&xe->mem_access.vram_userfault_lock);
>> +
>>       kfree(bo);
>>   }
>> @@ -1110,6 +1130,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault 
>> *vmf)
>>   {
>>       struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
>>       struct drm_device *ddev = tbo->base.dev;
>> +    struct xe_bo *bo = NULL;
>> +    struct xe_device *xe = to_xe_device(ddev);
>>       vm_fault_t ret;
>>       int idx, r = 0;
>> @@ -1118,7 +1140,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault 
>> *vmf)
>>           return ret;
>>       if (drm_dev_enter(ddev, &idx)) {
>> -        struct xe_bo *bo = ttm_to_xe_bo(tbo);
>> +        bo = ttm_to_xe_bo(tbo);
>>           trace_xe_bo_cpu_fault(bo);
>> @@ -1137,10 +1159,31 @@ static vm_fault_t xe_gem_fault(struct vm_fault 
>> *vmf)
>>       } else {
>>           ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
>>       }
>> +
>>       if (ret == VM_FAULT_RETRY && !(vmf->flags & 
>> FAULT_FLAG_RETRY_NOWAIT))
>>           return ret;
>> +    /*
>> +     * ttm_bo_vm_reserve() already has dma_resv_lock.
>> +     * vram_userfault_count is protected by dma_resv lock and rpm 
>> wakeref.
>> +     */
>> +    if (ret == VM_FAULT_NOPAGE && bo && !bo->vram_userfault_count) {
>> +        if (tbo->resource->bus.is_iomem) {
>> +            dma_resv_assert_held(tbo->base.resv);
>> +
>> +            xe_device_mem_access_get(xe);
> 
> Thinking ahead for d3cold, we probably don't want to introduce more 
> places that do dma-resv -> access_get. Not a big deal for now, but at 
> some point someone will have to fix this. Please consider if we can 
> easily move this higher up now.
Sure I will change this as disscussed offline.
> 
>> +
>> +            bo->vram_userfault_count = 1;
>> +
>> +            spin_lock(&xe->mem_access.vram_userfault_lock);
>> +            list_add(&bo->vram_userfault_link, 
>> &xe->mem_access.vram_userfault_list);
>> +            spin_unlock(&xe->mem_access.vram_userfault_lock);
>> +
>> +            xe_device_mem_access_put(xe);
>> +        }
>> +    }
>>       dma_resv_unlock(tbo->base.resv);
>> +
>>       return ret;
>>   }
>> @@ -1254,6 +1297,7 @@ struct xe_bo *___xe_bo_create_locked(struct 
>> xe_device *xe, struct xe_bo *bo,
>>   #ifdef CONFIG_PROC_FS
>>       INIT_LIST_HEAD(&bo->client_link);
>>   #endif
>> +    INIT_LIST_HEAD(&bo->vram_userfault_link);
>>       drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
>> @@ -2264,6 +2308,25 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
>>       return err;
>>   }
>> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
>> +{
>> +    struct ttm_buffer_object *tbo = &bo->ttm;
>> +    struct ttm_device *bdev = tbo->bdev;
>> +    struct xe_device *xe = ttm_to_xe_device(bdev);
>> +
>> +    drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
>> +
>> +    /*
>> +     * Other callers xe_ttm_bo_destroy and xe_bo_move_notify also 
>> checks and
>> +     * deletes vram_user_fault_link so grabbing spinlock here.
>> +     */
>> +    XE_WARN_ON(!bo->vram_userfault_count);
>> +    spin_lock(&xe->mem_access.vram_userfault_lock);
> 
> AFAICT exclusive access is needed for the entire list op, including the 
> list iteration prior to this.
Ok.
> 
>> +    list_del(&bo->vram_userfault_link);
>> +    bo->vram_userfault_count = 0;
>> +    spin_unlock(&xe->mem_access.vram_userfault_lock);
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>>   #include "tests/xe_bo.c"
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
>> index 97b32528c600..350cc73cadf8 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.h
>> +++ b/drivers/gpu/drm/xe/xe_bo.h
>> @@ -249,6 +249,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, 
>> void *data,
>>               struct drm_file *file);
>>   int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
>>                    struct drm_file *file);
>> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
>> +
>>   int xe_bo_dumb_create(struct drm_file *file_priv,
>>                 struct drm_device *dev,
>>                 struct drm_mode_create_dumb *args);
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h 
>> b/drivers/gpu/drm/xe/xe_bo_types.h
>> index 64c2249a4e40..910b35c05acd 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -88,6 +88,11 @@ struct xe_bo {
>>        * objects.
>>        */
>>       u16 cpu_caching;
>> +    /**
>> +     * Whether the object is currently in fake offset mmap backed by 
>> vram.
>> +     */
>> +    unsigned int vram_userfault_count;
>> +    struct list_head vram_userfault_link;
> 
> Nit: Needs proper kernel-doc. Also consider maybe wrapping this in a 
> struct:
> 
> struct {
>      struct list_head link;
>      int count;
> } vram_userfault;
Sure will fix this.
> 
>>   };
>>   #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
>> b/drivers/gpu/drm/xe/xe_device_types.h
>> index 71f23ac365e6..6890d21f7a4c 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -385,6 +385,26 @@ struct xe_device {
>>       struct {
>>           /** @ref: ref count of memory accesses */
>>           atomic_t ref;
>> +        /*
>> +         *  Protects access to vram usefault list.
>> +         *  It is required, if we are outside of the runtime suspend 
>> path,
>> +         *  access to @vram_userfault_list requires always first 
>> grabbing the
>> +         *  runtime pm, to ensure we can't race against runtime suspend.
>> +         *  Once we have that we also need to grab @vram_userfault_lock,
>> +         *  at which point we have exclusive access.
>> +         *  The runtime suspend path is special since it doesn't 
>> really hold any locks,
>> +         *  but instead has exclusive access by virtue of all other 
>> accesses requiring
>> +         *  holding the runtime pm wakeref.
>> +         */
>> +        spinlock_t vram_userfault_lock;
>> +
>> +        /*
>> +         *  Keep list of userfaulted gem obj, which require to 
>> release their
>> +         *  mmap mappings at runtime suspend path.
>> +         */
> 
> Nit: Not properly formatted kernel-doc here and above.
Ok.
> 
>> +        struct list_head vram_userfault_list;
>> +
>> +        bool vram_userfault_ongoing;
> 
> Also maybe consider wrapping this in a struct:
> 
> struct {
>      spinlock_t lock;
>      list_head list;
> } vram_userfault;
Ok, will fix this.
> 
> Also vram_userfault_ongoing looks to be unused.
Will remove it.

Thanks,
Badal
> 
>>       } mem_access;
>>       /**
>> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
>> index b429c2876a76..bc1cb081e6e5 100644
>> --- a/drivers/gpu/drm/xe/xe_pm.c
>> +++ b/drivers/gpu/drm/xe/xe_pm.c
>> @@ -181,6 +181,8 @@ void xe_pm_init(struct xe_device *xe)
>>       }
>>       xe_pm_runtime_init(xe);
>> +    INIT_LIST_HEAD(&xe->mem_access.vram_userfault_list);
>> +    spin_lock_init(&xe->mem_access.vram_userfault_lock);
>>   }
>>   void xe_pm_runtime_fini(struct xe_device *xe)
>> @@ -214,6 +216,7 @@ struct task_struct 
>> *xe_pm_read_callback_task(struct xe_device *xe)
>>   int xe_pm_runtime_suspend(struct xe_device *xe)
>>   {
>> +    struct xe_bo *bo, *on;
>>       struct xe_gt *gt;
>>       u8 id;
>>       int err = 0;
>> @@ -247,6 +250,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>>        */
>>       lock_map_acquire(&xe_device_mem_access_lockdep_map);
>> +    list_for_each_entry_safe(bo, on,
>> +                 &xe->mem_access.vram_userfault_list, 
>> vram_userfault_link)
>> +        xe_bo_runtime_pm_release_mmap_offset(bo);
>> +
>>       if (xe->d3cold.allowed) {
>>           err = xe_bo_evict_all(xe);
>>           if (err)

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-12-22  7:42 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-20 18:50 [PATCH v3] drm/xe/dgfx: Release mmap mappings on rpm suspend Badal Nilawar
2023-12-21  9:21 ` Matthew Auld
2023-12-22  7:42   ` Nilawar, Badal

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox