Linux kernel -stable discussions
 help / color / mirror / Atom feed
* [PATCH v2] drm/xe: Defer buffer object shrinker write-backs and GPU waits
@ 2025-08-05  7:48 Thomas Hellström
  2025-08-05 13:40 ` Matthew Auld
  2025-08-05 14:50 ` Summers, Stuart
  0 siblings, 2 replies; 3+ messages in thread
From: Thomas Hellström @ 2025-08-05  7:48 UTC (permalink / raw)
  To: intel-xe; +Cc: Thomas Hellström, melvyn, Summers Stuart, stable

When the xe buffer-object shrinker allows GPU waits and write-back,
(typically from kswapd), perform multiple passes, skipping
subsequent passes if the shrinker number of scanned objects target
is reached.

1) Without GPU waits and write-back
2) Without write-back
3) With both GPU-waits and write-back

This is to avoid stalls and costly write- and readbacks unless they
are really necessary.

v2:
- Don't test for scan completion twice. (Stuart Summers)
- Update tags.

Reported-by: melvyn <melvyn2@dnsense.pub>
Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557
Cc: Summers Stuart <stuart.summers@intel.com>
Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos")
Cc: <stable@vger.kernel.org> # v6.15+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 1c3c04d52f55..90244fe59b59 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea
 	write_unlock(&shrinker->lock);
 }
 
-static s64 xe_shrinker_walk(struct xe_device *xe,
-			    struct ttm_operation_ctx *ctx,
-			    const struct xe_bo_shrink_flags flags,
-			    unsigned long to_scan, unsigned long *scanned)
+static s64 __xe_shrinker_walk(struct xe_device *xe,
+			      struct ttm_operation_ctx *ctx,
+			      const struct xe_bo_shrink_flags flags,
+			      unsigned long to_scan, unsigned long *scanned)
 {
 	unsigned int mem_type;
 	s64 freed = 0, lret;
@@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
 	return freed;
 }
 
+/*
+ * Try shrinking idle objects without writeback first, then if not sufficient,
+ * try also non-idle objects and finally if that's not sufficient either,
+ * add writeback. This avoids stalls and explicit writebacks with light or
+ * moderate memory pressure.
+ */
+static s64 xe_shrinker_walk(struct xe_device *xe,
+			    struct ttm_operation_ctx *ctx,
+			    const struct xe_bo_shrink_flags flags,
+			    unsigned long to_scan, unsigned long *scanned)
+{
+	bool no_wait_gpu = true;
+	struct xe_bo_shrink_flags save_flags = flags;
+	s64 lret, freed;
+
+	swap(no_wait_gpu, ctx->no_wait_gpu);
+	save_flags.writeback = false;
+	lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
+	swap(no_wait_gpu, ctx->no_wait_gpu);
+	if (lret < 0 || *scanned >= to_scan)
+		return lret;
+
+	freed = lret;
+	if (!ctx->no_wait_gpu) {
+		lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
+		if (lret < 0)
+			return lret;
+		freed += lret;
+		if (*scanned >= to_scan)
+			return freed;
+	}
+
+	if (flags.writeback) {
+		lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
+		if (lret < 0)
+			return lret;
+		freed += lret;
+	}
+
+	return freed;
+}
+
 static unsigned long
 xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
 {
@@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
 		runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
 
 	shrink_flags.purge = false;
+
 	lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
 				nr_to_scan, &nr_scanned);
 	if (lret >= 0)
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] drm/xe: Defer buffer object shrinker write-backs and GPU waits
  2025-08-05  7:48 [PATCH v2] drm/xe: Defer buffer object shrinker write-backs and GPU waits Thomas Hellström
@ 2025-08-05 13:40 ` Matthew Auld
  2025-08-05 14:50 ` Summers, Stuart
  1 sibling, 0 replies; 3+ messages in thread
From: Matthew Auld @ 2025-08-05 13:40 UTC (permalink / raw)
  To: Thomas Hellström, intel-xe; +Cc: melvyn, Summers Stuart, stable

On 05/08/2025 08:48, Thomas Hellström wrote:
> When the xe buffer-object shrinker allows GPU waits and write-back,
> (typically from kswapd), perform multiple passes, skipping
> subsequent passes if the shrinker number of scanned objects target
> is reached.
> 
> 1) Without GPU waits and write-back
> 2) Without write-back
> 3) With both GPU-waits and write-back
> 
> This is to avoid stalls and costly write- and readbacks unless they
> are really necessary.
> 
> v2:
> - Don't test for scan completion twice. (Stuart Summers)
> - Update tags.
> 
> Reported-by: melvyn <melvyn2@dnsense.pub>
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557
> Cc: Summers Stuart <stuart.summers@intel.com>
> Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos")
> Cc: <stable@vger.kernel.org> # v6.15+
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>   drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++---
>   1 file changed, 47 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
> index 1c3c04d52f55..90244fe59b59 100644
> --- a/drivers/gpu/drm/xe/xe_shrinker.c
> +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea
>   	write_unlock(&shrinker->lock);
>   }
>   
> -static s64 xe_shrinker_walk(struct xe_device *xe,
> -			    struct ttm_operation_ctx *ctx,
> -			    const struct xe_bo_shrink_flags flags,
> -			    unsigned long to_scan, unsigned long *scanned)
> +static s64 __xe_shrinker_walk(struct xe_device *xe,
> +			      struct ttm_operation_ctx *ctx,
> +			      const struct xe_bo_shrink_flags flags,
> +			      unsigned long to_scan, unsigned long *scanned)
>   {
>   	unsigned int mem_type;
>   	s64 freed = 0, lret;
> @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
>   	return freed;
>   }
>   
> +/*
> + * Try shrinking idle objects without writeback first, then if not sufficient,
> + * try also non-idle objects and finally if that's not sufficient either,
> + * add writeback. This avoids stalls and explicit writebacks with light or
> + * moderate memory pressure.

Just one question here, with writeback=false it doesn't really influence 
which objects are chosen for shrinking, unlike with no_wait_gpu, right? 
Will having another pass just with writeback=true yield anything 
different, assuming here that the previous two passes would have already 
hoovered ~everything up that was a possible candidate, so this pass 
won't really find anything in practice? If so, does that also mean we 
never really end up using the writeback=true behaviour any more?

> + */
> +static s64 xe_shrinker_walk(struct xe_device *xe,
> +			    struct ttm_operation_ctx *ctx,
> +			    const struct xe_bo_shrink_flags flags,
> +			    unsigned long to_scan, unsigned long *scanned)
> +{
> +	bool no_wait_gpu = true;
> +	struct xe_bo_shrink_flags save_flags = flags;
> +	s64 lret, freed;
> +
> +	swap(no_wait_gpu, ctx->no_wait_gpu);
> +	save_flags.writeback = false;
> +	lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
> +	swap(no_wait_gpu, ctx->no_wait_gpu);
> +	if (lret < 0 || *scanned >= to_scan)
> +		return lret;
> +
> +	freed = lret;
> +	if (!ctx->no_wait_gpu) {
> +		lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
> +		if (lret < 0)
> +			return lret;
> +		freed += lret;
> +		if (*scanned >= to_scan)
> +			return freed;
> +	}
> +
> +	if (flags.writeback) {
> +		lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
> +		if (lret < 0)
> +			return lret;
> +		freed += lret;
> +	}
> +
> +	return freed;
> +}
> +
>   static unsigned long
>   xe_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>   {
> @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
>   		runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
>   
>   	shrink_flags.purge = false;
> +
>   	lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
>   				nr_to_scan, &nr_scanned);
>   	if (lret >= 0)


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] drm/xe: Defer buffer object shrinker write-backs and GPU waits
  2025-08-05  7:48 [PATCH v2] drm/xe: Defer buffer object shrinker write-backs and GPU waits Thomas Hellström
  2025-08-05 13:40 ` Matthew Auld
@ 2025-08-05 14:50 ` Summers, Stuart
  1 sibling, 0 replies; 3+ messages in thread
From: Summers, Stuart @ 2025-08-05 14:50 UTC (permalink / raw)
  To: intel-xe@lists.freedesktop.org, thomas.hellstrom@linux.intel.com
  Cc: stable@vger.kernel.org, melvyn2@dnsense.pub, Auld, Matthew

On Tue, 2025-08-05 at 09:48 +0200, Thomas Hellström wrote:
> When the xe buffer-object shrinker allows GPU waits and write-back,
> (typically from kswapd), perform multiple passes, skipping
> subsequent passes if the shrinker number of scanned objects target
> is reached.
> 
> 1) Without GPU waits and write-back
> 2) Without write-back
> 3) With both GPU-waits and write-back
> 
> This is to avoid stalls and costly write- and readbacks unless they
> are really necessary.
> 
> v2:
> - Don't test for scan completion twice. (Stuart Summers)
> - Update tags.
> 
> Reported-by: melvyn <melvyn2@dnsense.pub>
> Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5557
> Cc: Summers Stuart <stuart.summers@intel.com>
> Fixes: 00c8efc3180f ("drm/xe: Add a shrinker for xe bos")
> Cc: <stable@vger.kernel.org> # v6.15+
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Changes look good to me, thanks Thomas. Before merging, please address
the feedback from Matt as well.
Reviewed-by: Stuart Summers <stuart.summers@intel.com>

Thanks,
Stuart

> ---
>  drivers/gpu/drm/xe/xe_shrinker.c | 51 +++++++++++++++++++++++++++++-
> --
>  1 file changed, 47 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_shrinker.c
> b/drivers/gpu/drm/xe/xe_shrinker.c
> index 1c3c04d52f55..90244fe59b59 100644
> --- a/drivers/gpu/drm/xe/xe_shrinker.c
> +++ b/drivers/gpu/drm/xe/xe_shrinker.c
> @@ -54,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker
> *shrinker, long shrinkable, long purgea
>         write_unlock(&shrinker->lock);
>  }
>  
> -static s64 xe_shrinker_walk(struct xe_device *xe,
> -                           struct ttm_operation_ctx *ctx,
> -                           const struct xe_bo_shrink_flags flags,
> -                           unsigned long to_scan, unsigned long
> *scanned)
> +static s64 __xe_shrinker_walk(struct xe_device *xe,
> +                             struct ttm_operation_ctx *ctx,
> +                             const struct xe_bo_shrink_flags flags,
> +                             unsigned long to_scan, unsigned long
> *scanned)
>  {
>         unsigned int mem_type;
>         s64 freed = 0, lret;
> @@ -93,6 +93,48 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
>         return freed;
>  }
>  
> +/*
> + * Try shrinking idle objects without writeback first, then if not
> sufficient,
> + * try also non-idle objects and finally if that's not sufficient
> either,
> + * add writeback. This avoids stalls and explicit writebacks with
> light or
> + * moderate memory pressure.
> + */
> +static s64 xe_shrinker_walk(struct xe_device *xe,
> +                           struct ttm_operation_ctx *ctx,
> +                           const struct xe_bo_shrink_flags flags,
> +                           unsigned long to_scan, unsigned long
> *scanned)
> +{
> +       bool no_wait_gpu = true;
> +       struct xe_bo_shrink_flags save_flags = flags;
> +       s64 lret, freed;
> +
> +       swap(no_wait_gpu, ctx->no_wait_gpu);
> +       save_flags.writeback = false;
> +       lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan,
> scanned);
> +       swap(no_wait_gpu, ctx->no_wait_gpu);
> +       if (lret < 0 || *scanned >= to_scan)
> +               return lret;
> +
> +       freed = lret;
> +       if (!ctx->no_wait_gpu) {
> +               lret = __xe_shrinker_walk(xe, ctx, save_flags,
> to_scan, scanned);
> +               if (lret < 0)
> +                       return lret;
> +               freed += lret;
> +               if (*scanned >= to_scan)
> +                       return freed;
> +       }
> +
> +       if (flags.writeback) {
> +               lret = __xe_shrinker_walk(xe, ctx, flags, to_scan,
> scanned);
> +               if (lret < 0)
> +                       return lret;
> +               freed += lret;
> +       }
> +
> +       return freed;
> +}
> +
>  static unsigned long
>  xe_shrinker_count(struct shrinker *shrink, struct shrink_control
> *sc)
>  {
> @@ -199,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct
> shrinker *shrink, struct shrink_con
>                 runtime_pm = xe_shrinker_runtime_pm_get(shrinker,
> true, 0, can_backup);
>  
>         shrink_flags.purge = false;
> +
>         lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
>                                 nr_to_scan, &nr_scanned);
>         if (lret >= 0)


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-08-05 14:51 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-05  7:48 [PATCH v2] drm/xe: Defer buffer object shrinker write-backs and GPU waits Thomas Hellström
2025-08-05 13:40 ` Matthew Auld
2025-08-05 14:50 ` Summers, Stuart

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox