AMD-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: Surface svm_attr_gobm, a RW module parameter
@ 2024-08-21 23:22 Ramesh Errabolu
  2024-08-22 14:27 ` Philip Yang
  0 siblings, 1 reply; 3+ messages in thread
From: Ramesh Errabolu @ 2024-08-21 23:22 UTC (permalink / raw)
  To: amd-gfx; +Cc: Ramesh Errabolu

KFD's design of unified memory (UM) does not allow users to
configure the size of buffer used in migrating buffer either
from Sysmem to VRAM or vice versa. This patch remedies this
gap, albeit at a coarse level, for workloads that deal with
unregistered memory

Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 ++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 12 ++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c    | 29 +++++++++++++++++--------
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e8c284aea1f2..73dd816b01f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -237,6 +237,7 @@ extern int sched_policy;
 extern bool debug_evictions;
 extern bool no_system_mem_limit;
 extern int halt_if_hws_hang;
+extern uint amdgpu_svm_attr_gobm;
 #else
 static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
 static const bool __maybe_unused debug_evictions; /* = false */
@@ -313,6 +314,9 @@ extern int amdgpu_wbrf;
 /* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */
 #define AMDGPU_SWCTF_EXTRA_DELAY		50
 
+/* Default size of buffer to use in migrating buffer */
+#define AMDGPU_SVM_ATTR_GOBM	    9
+
 struct amdgpu_xcp_mgr;
 struct amdgpu_device;
 struct amdgpu_irq_src;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b9529948f2b2..e195e1cf0f28 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -169,6 +169,15 @@ uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu;
 char *amdgpu_virtual_display;
 bool enforce_isolation;
+
+/* Specifies the default size of buffer to use in
+ * migrating buffer from Sysmem to VRAM and vice
+ * versa
+ *
+ * Defined as log2(sizeof(buffer)/PAGE_SIZE)
+ */
+uint amdgpu_svm_attr_gobm = AMDGPU_SVM_ATTR_GOBM;
+
 /*
  * OverDrive(bit 14) disabled by default
  * GFX DCS(bit 19) disabled by default
@@ -320,6 +329,13 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);
 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, amdgpu_msi, int, 0444);
 
+/**
+ * DOC: svm_attr_gobm (uint)
+ * Size of buffer to use in migrating buffer from Sysmem to VRAM and vice versa
+ */
+MODULE_PARM_DESC(svm_attr_gobm, "Defined as log2(sizeof(buffer)/PAGE_SIZE), e.g. 9 for 2 MiB");
+module_param_named(svm_attr_gobm, amdgpu_svm_attr_gobm, uint, 0644);
+
 /**
  * DOC: lockup_timeout (string)
  * Set GPU scheduler timeout value in ms.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7bba6bed2f48..07b202ab008a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -866,6 +866,18 @@ struct svm_range_list {
 	struct delayed_work		restore_work;
 	DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
 	struct task_struct		*faulting_task;
+
+	/* Indicates the default size to use in migrating
+	 * buffers of a process from Sysmem to VRAM and vice
+	 * versa. The max legal value cannot be greater than
+	 * 0x3F
+	 *
+	 * @note: A side effect of this symbol being part of
+	 * struct svm_range_list is that it forces all buffers
+	 * of the process of unregistered kind to use the same
+	 * size in buffer migration
+	 */
+	uint8_t attr_gobm;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 10b1a1f64198..fcfe5543a3c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -309,12 +309,11 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
 }
 
 static void
-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
-				 uint8_t *granularity, uint32_t *flags)
+svm_range_set_default_attributes(int32_t *location,
+			int32_t *prefetch_loc, uint32_t *flags)
 {
 	*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
 	*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
-	*granularity = 9;
 	*flags =
 		KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
 }
@@ -358,9 +357,9 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
 		bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
 			    MAX_GPU_INSTANCE);
 
+	prange->granularity = svms->attr_gobm;
 	svm_range_set_default_attributes(&prange->preferred_loc,
-					 &prange->prefetch_loc,
-					 &prange->granularity, &prange->flags);
+				&prange->prefetch_loc, &prange->flags);
 
 	pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
 
@@ -2685,10 +2684,12 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 
 	*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
 
+	/* Determine the starting and ending page of prange */
 	start_limit = max(vma->vm_start >> PAGE_SHIFT,
-		      (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+		      (unsigned long)ALIGN_DOWN(addr, (1 << p->svms.attr_gobm)));
 	end_limit = min(vma->vm_end >> PAGE_SHIFT,
-		    (unsigned long)ALIGN(addr + 1, 2UL << 8));
+		    (unsigned long)ALIGN(addr + 1, (1 << p->svms.attr_gobm)));
+
 	/* First range that starts after the fault address */
 	node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
 	if (node) {
@@ -3211,6 +3212,15 @@ int svm_range_list_init(struct kfd_process *p)
 		if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
 			bitmap_set(svms->bitmap_supported, i, 1);
 
+	/* Bind granularity of buffer migration, either
+	 * the default size or one specified by the user
+	 */
+	svms->attr_gobm = amdgpu_svm_attr_gobm;
+	if (unlikely(amdgpu_svm_attr_gobm != AMDGPU_SVM_ATTR_GOBM))
+		svms->attr_gobm = min_t(uint32_t, amdgpu_svm_attr_gobm, 0x3F);
+	pr_debug("%s() Granularity Of Buffer Migration: %d\n",
+			__func__, svms->attr_gobm);
+
 	return 0;
 }
 
@@ -3738,8 +3748,9 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
 	node = interval_tree_iter_first(&svms->objects, start, last);
 	if (!node) {
 		pr_debug("range attrs not found return default values\n");
-		svm_range_set_default_attributes(&location, &prefetch_loc,
-						 &granularity, &flags_and);
+		granularity = svms->attr_gobm;
+		svm_range_set_default_attributes(&location,
+					&prefetch_loc, &flags_and);
 		flags_or = flags_and;
 		if (p->xnack_enabled)
 			bitmap_copy(bitmap_access, svms->bitmap_supported,
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amdgpu: Surface svm_attr_gobm, a RW module parameter
  2024-08-21 23:22 [PATCH] drm/amdgpu: Surface svm_attr_gobm, a RW module parameter Ramesh Errabolu
@ 2024-08-22 14:27 ` Philip Yang
  2024-08-26 16:47   ` Errabolu, Ramesh
  0 siblings, 1 reply; 3+ messages in thread
From: Philip Yang @ 2024-08-22 14:27 UTC (permalink / raw)
  To: Ramesh Errabolu, amd-gfx

[-- Attachment #1: Type: text/html, Size: 9243 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH] drm/amdgpu: Surface svm_attr_gobm, a RW module parameter
  2024-08-22 14:27 ` Philip Yang
@ 2024-08-26 16:47   ` Errabolu, Ramesh
  0 siblings, 0 replies; 3+ messages in thread
From: Errabolu, Ramesh @ 2024-08-26 16:47 UTC (permalink / raw)
  To: Yang, Philip, amd-gfx@lists.freedesktop.org

[-- Attachment #1: Type: text/plain, Size: 8814 bytes --]

Responses are in-line. Will post updated patch after testing

Regards,
Ramesh

From: Yang, Philip <Philip.Yang@amd.com>
Sent: Thursday, August 22, 2024 9:28 AM
To: Errabolu, Ramesh <Ramesh.Errabolu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: Surface svm_attr_gobm, a RW module parameter

On 2024-08-21 19:22, Ramesh Errabolu wrote:

KFD's design of unified memory (UM) does not allow users to

configure the size of buffer used in migrating buffer either

from Sysmem to VRAM or vice versa.

This is not true, app can change range granularity attribute, to configure the buffer migration size. This module parameter is used to config the default range granularity.

Ramesh: Will update commit summary to emphasize the patch as updating default migration size that will benefit users of unregistered memory

This patch remedies this

gap, albeit at a coarse level, for workloads that deal with

unregistered memory



Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com><mailto:Ramesh.Errabolu@amd.com>

---

 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++

 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 ++++++++++++++

 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   | 12 ++++++++++

 drivers/gpu/drm/amd/amdkfd/kfd_svm.c    | 29 +++++++++++++++++--------

 4 files changed, 52 insertions(+), 9 deletions(-)



diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index e8c284aea1f2..73dd816b01f2 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

@@ -237,6 +237,7 @@ extern int sched_policy;

 extern bool debug_evictions;

 extern bool no_system_mem_limit;

 extern int halt_if_hws_hang;

+extern uint amdgpu_svm_attr_gobm;



 #else

 static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;

 static const bool __maybe_unused debug_evictions; /* = false */

@@ -313,6 +314,9 @@ extern int amdgpu_wbrf;

 /* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */

 #define AMDGPU_SWCTF_EXTRA_DELAY              50



+/* Default size of buffer to use in migrating buffer */

+#define AMDGPU_SVM_ATTR_GOBM       9

+

 struct amdgpu_xcp_mgr;

 struct amdgpu_device;

 struct amdgpu_irq_src;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index b9529948f2b2..e195e1cf0f28 100644

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

@@ -169,6 +169,15 @@ uint amdgpu_sdma_phase_quantum = 32;

 char *amdgpu_disable_cu;

 char *amdgpu_virtual_display;

 bool enforce_isolation;

+

+/* Specifies the default size of buffer to use in

+ * migrating buffer from Sysmem to VRAM and vice

+ * versa

+ *

+ * Defined as log2(sizeof(buffer)/PAGE_SIZE)

+ */

+uint amdgpu_svm_attr_gobm = AMDGPU_SVM_ATTR_GOBM;

/* add explanation, GOBM : granularity of buffer migration

Ramesh: Done

Use u8 type, the same type used in prange->granularity

u8 amdgpu_svm_default_gobm = AMDGPU_SVM_DEFAULT_GOBM

Ramesh: Leaving it as uint. u8 cannot be used as it is not a supported by in “include/linux/modueparam.h” file



+

 /*

  * OverDrive(bit 14) disabled by default

  * GFX DCS(bit 19) disabled by default

@@ -320,6 +329,13 @@ module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);

 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");

 module_param_named(msi, amdgpu_msi, int, 0444);



+/**

+ * DOC: svm_attr_gobm (uint)

+ * Size of buffer to use in migrating buffer from Sysmem to VRAM and vice versa

+ */

+MODULE_PARM_DESC(svm_attr_gobm, "Defined as log2(sizeof(buffer)/PAGE_SIZE), e.g. 9 for 2 MiB");

+module_param_named(svm_attr_gobm, amdgpu_svm_attr_gobm, uint, 0644);

+

 /**

  * DOC: lockup_timeout (string)

  * Set GPU scheduler timeout value in ms.

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 7bba6bed2f48..07b202ab008a 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

@@ -866,6 +866,18 @@ struct svm_range_list {

  struct delayed_work            restore_work;

  DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);

  struct task_struct             *faulting_task;

+

+ /* Indicates the default size to use in migrating

+  * buffers of a process from Sysmem to VRAM and vice

+  * versa. The max legal value cannot be greater than

+  * 0x3F

+  *

+  * @note: A side effect of this symbol being part of

+  * struct svm_range_list is that it forces all buffers

+  * of the process of unregistered kind to use the same

+  * size in buffer migration

+  */

+ uint8_t attr_gobm;

 };



 /* Process data */

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 10b1a1f64198..fcfe5543a3c0 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -309,12 +309,11 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)

 }



 static void

-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,

-                         uint8_t *granularity, uint32_t *flags)

+svm_range_set_default_attributes(int32_t *location,

+                int32_t *prefetch_loc, uint32_t *flags)

 {

  *location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;

  *prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;

- *granularity = 9;

  *flags =

         KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;

 }

@@ -358,9 +357,9 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,

         bitmap_copy(prange->bitmap_access, svms->bitmap_supported,

                     MAX_GPU_INSTANCE);



+ prange->granularity = svms->attr_gobm;

  svm_range_set_default_attributes(&prange->preferred_loc,

-                                &prange->prefetch_loc,

-                                &prange->granularity, &prange->flags);

+                        &prange->prefetch_loc, &prange->flags);



  pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);



@@ -2685,10 +2684,12 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,



  *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);



+ /* Determine the starting and ending page of prange */

  start_limit = max(vma->vm_start >> PAGE_SHIFT,

-              (unsigned long)ALIGN_DOWN(addr, 2UL << 8));

+              (unsigned long)ALIGN_DOWN(addr, (1 << p->svms.attr_gobm)));
1UL << p->svms.attr_gobm
Ramesh: Done



  end_limit = min(vma->vm_end >> PAGE_SHIFT,

-            (unsigned long)ALIGN(addr + 1, 2UL << 8));

+            (unsigned long)ALIGN(addr + 1, (1 << p->svms.attr_gobm)));

1UL << p->svms.attr_gobm

Ramesh: Done

+

  /* First range that starts after the fault address */

  node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);

  if (node) {

@@ -3211,6 +3212,15 @@ int svm_range_list_init(struct kfd_process *p)

         if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))

                 bitmap_set(svms->bitmap_supported, i, 1);



+ /* Bind granularity of buffer migration, either

+  * the default size or one specified by the user

+  */

+ svms->attr_gobm = amdgpu_svm_attr_gobm;

+ if (unlikely(amdgpu_svm_attr_gobm != AMDGPU_SVM_ATTR_GOBM))

+        svms->attr_gobm = min_t(uint32_t, amdgpu_svm_attr_gobm, 0x3F);

to simplify, remove the if check and always use this:

svms->attr_gobm = min_t(uint32_t, amdgpu_svm_attr_gobm, 0x3F);

Ramesh: done



+ pr_debug("%s() Granularity Of Buffer Migration: %d\n",

+                __func__, svms->attr_gobm);

pr_debug can output function name if adding +pf dynamically, __func__ is not needed.

Ramesh: done

Regards,

Philip



+

  return 0;

 }



@@ -3738,8 +3748,9 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,

  node = interval_tree_iter_first(&svms->objects, start, last);

  if (!node) {

         pr_debug("range attrs not found return default values\n");

-        svm_range_set_default_attributes(&location, &prefetch_loc,

-                                        &granularity, &flags_and);

+        granularity = svms->attr_gobm;

+        svm_range_set_default_attributes(&location,

+                               &prefetch_loc, &flags_and);

         flags_or = flags_and;

         if (p->xnack_enabled)

                 bitmap_copy(bitmap_access, svms->bitmap_supported,

[-- Attachment #2: Type: text/html, Size: 19075 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-08-26 16:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-21 23:22 [PATCH] drm/amdgpu: Surface svm_attr_gobm, a RW module parameter Ramesh Errabolu
2024-08-22 14:27 ` Philip Yang
2024-08-26 16:47   ` Errabolu, Ramesh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox