All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maciej Patelczyk <maciej.patelczyk@intel.com>
To: Matthew Brost <matthew.brost@intel.com>,
	<intel-xe@lists.freedesktop.org>
Cc: <stuart.summers@intel.com>, <arvind.yadav@intel.com>,
	<himal.prasad.ghimiray@intel.com>,
	<thomas.hellstrom@linux.intel.com>, <francois.dugast@intel.com>
Subject: Re: [PATCH v4 05/12] drm/xe: Add num_pf_work modparam
Date: Wed, 6 May 2026 17:59:39 +0200	[thread overview]
Message-ID: <ad1df3ca-2b77-4a83-a009-c9930643483d@intel.com> (raw)
In-Reply-To: <20260226042834.2963245-6-matthew.brost@intel.com>

On 26/02/2026 05:28, Matthew Brost wrote:

> Add a module parameter to control the number of page-fault work threads,
> making it easy to experiment with how different numbers of work threads
> impact performance.
>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>   drivers/gpu/drm/xe/xe_defaults.h     |  1 +
>   drivers/gpu/drm/xe/xe_device.c       | 17 ++++++++++++++---
>   drivers/gpu/drm/xe/xe_device_types.h | 11 ++++-------
>   drivers/gpu/drm/xe/xe_module.c       |  4 ++++
>   drivers/gpu/drm/xe/xe_module.h       |  1 +
>   drivers/gpu/drm/xe/xe_pagefault.c    |  6 +++---
>   drivers/gpu/drm/xe/xe_vm.c           |  3 ++-
>   7 files changed, 29 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_defaults.h b/drivers/gpu/drm/xe/xe_defaults.h
> index 5d5d41d067c5..2e615cf896b2 100644
> --- a/drivers/gpu/drm/xe/xe_defaults.h
> +++ b/drivers/gpu/drm/xe/xe_defaults.h
> @@ -22,5 +22,6 @@
>   #define XE_DEFAULT_WEDGED_MODE			XE_WEDGED_MODE_UPON_CRITICAL_ERROR
>   #define XE_DEFAULT_WEDGED_MODE_STR		"upon-critical-error"
>   #define XE_DEFAULT_SVM_NOTIFIER_SIZE		512
> +#define XE_DEFAULT_NUM_PF_WORK			2
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
> index 3462645ca13c..0571079a09e8 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -436,6 +436,18 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
>   	ttm_device_fini(&xe->ttm);
>   }
>   
> +static void xe_device_parse_modparam(struct xe_device *xe)
> +{
> +	xe->info.force_execlist = xe_modparam.force_execlist;
> +	xe->atomic_svm_timeslice_ms = 5;
> +	xe->min_run_period_lr_ms = 5;
> +	xe->info.num_pf_work = xe_modparam.num_pf_work;
> +	if (xe->info.num_pf_work < 1)
> +		xe->info.num_pf_work = 1;
> +	else if (xe->info.num_pf_work > XE_PAGEFAULT_WORK_MAX)
> +		xe->info.num_pf_work = XE_PAGEFAULT_WORK_MAX;
> +}
> +
>   struct xe_device *xe_device_create(struct pci_dev *pdev,
>   				   const struct pci_device_id *ent)
>   {
> @@ -469,9 +481,8 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>   
>   	xe->info.devid = pdev->device;
>   	xe->info.revid = pdev->revision;
> -	xe->info.force_execlist = xe_modparam.force_execlist;
> -	xe->atomic_svm_timeslice_ms = 5;
> -	xe->min_run_period_lr_ms = 5;
> +
> +	xe_device_parse_modparam(xe);
>   
>   	err = xe_irq_init(xe);
>   	if (err)
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 0558dfd52541..a027ca5f6828 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -130,6 +130,8 @@ struct xe_device {
>   		u8 revid;
>   		/** @info.step: stepping information for each IP */
>   		struct xe_step_info step;
> +		/** @info.num_pf_work: Number of page fault work thread */
> +		int num_pf_work;
>   		/** @info.dma_mask_size: DMA address bits */
>   		u8 dma_mask_size;
>   		/** @info.vram_flags: Vram flags */
> @@ -310,14 +312,9 @@ struct xe_device {
>   		struct rw_semaphore lock;
>   		/** @usm.pf_wq: page fault work queue, unbound, high priority */
>   		struct workqueue_struct *pf_wq;
> -		/*
> -		 * We pick 4 here because, in the current implementation, it
> -		 * yields the best bandwidth utilization of the kernel paging
> -		 * engine.
> -		 */
> -#define XE_PAGEFAULT_WORK_COUNT	4
> +#define XE_PAGEFAULT_WORK_MAX	8
>   		/** @usm.pf_workers: Page fault workers */
> -		struct xe_pagefault_work pf_workers[XE_PAGEFAULT_WORK_COUNT];
> +		struct xe_pagefault_work pf_workers[XE_PAGEFAULT_WORK_MAX];
>   		/** @usm.pf_queue: Page fault queue */
>   		struct xe_pagefault_queue pf_queue;
>   #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
> index 903d3b433421..c750db4b579c 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -28,6 +28,7 @@ struct xe_modparam xe_modparam = {
>   	.max_vfs =		XE_DEFAULT_MAX_VFS,
>   #endif
>   	.wedged_mode =		XE_DEFAULT_WEDGED_MODE,
> +	.num_pf_work =		XE_DEFAULT_NUM_PF_WORK,
>   	.svm_notifier_size =	XE_DEFAULT_SVM_NOTIFIER_SIZE,
>   	/* the rest are 0 by default */
>   };
> @@ -81,6 +82,9 @@ MODULE_PARM_DESC(wedged_mode,
>   		 "Module's default policy for the wedged mode (0=never, 1=upon-critical-error, 2=upon-any-hang-no-reset "
>   		 "[default=" XE_DEFAULT_WEDGED_MODE_STR "])");
>   
> +module_param_named(num_pf_work, xe_modparam.num_pf_work, int, 0600);
> +MODULE_PARM_DESC(num_pf_work, "Number of page fault work threads, default=2, min=1, max=8");
> +
>   static int xe_check_nomodeset(void)
>   {
>   	if (drm_firmware_drivers_only())
> diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
> index 79cb9639c0f3..c6642523184a 100644
> --- a/drivers/gpu/drm/xe/xe_module.h
> +++ b/drivers/gpu/drm/xe/xe_module.h
> @@ -22,6 +22,7 @@ struct xe_modparam {
>   	unsigned int max_vfs;
>   #endif
>   	unsigned int wedged_mode;
> +	unsigned int num_pf_work;
>   	u32 svm_notifier_size;
>   };
>   
> diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
> index 7880fc7e7eb4..64b1dc574ab7 100644
> --- a/drivers/gpu/drm/xe/xe_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_pagefault.c
> @@ -383,7 +383,7 @@ int xe_pagefault_init(struct xe_device *xe)
>   
>   	xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
>   					WQ_UNBOUND | WQ_HIGHPRI,
> -					XE_PAGEFAULT_WORK_COUNT);
> +					xe->info.num_pf_work);
>   	if (!xe->usm.pf_wq)
>   		return -ENOMEM;
>   
> @@ -391,7 +391,7 @@ int xe_pagefault_init(struct xe_device *xe)
>   	if (err)
>   		goto err_out;
>   
> -	for (i = 0; i < XE_PAGEFAULT_WORK_COUNT; ++i) {
> +	for (i = 0; i < xe->info.num_pf_work; ++i) {
>   		struct xe_pagefault_work *pf_work = xe->usm.pf_workers + i;
>   
>   		pf_work->xe = xe;
> @@ -457,7 +457,7 @@ static int xe_pagefault_work_index(struct xe_device *xe)
>   {
>   	lockdep_assert_held(&xe->usm.pf_queue.lock);
>   
> -	return xe->usm.current_pf_work++ % XE_PAGEFAULT_WORK_COUNT;
> +	return xe->usm.current_pf_work++ % xe->info.num_pf_work;
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 06669e9c500d..54c7d0f791e1 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3070,7 +3070,8 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops,
>   	skip_threads =  op->prefetch_range.ranges_count == 1 ||
>   		(!dpagemap && !(vops->flags &
>   				XE_VMA_OPS_FLAG_HAS_SVM_VALID_RANGE)) ||
> -		!(vops->flags & XE_VMA_OPS_FLAG_DOWNGRADE_LOCK);
> +		!(vops->flags & XE_VMA_OPS_FLAG_DOWNGRADE_LOCK) ||
> +		vm->xe->info.num_pf_work == 1;
>   	thread = skip_threads ? &stack_thread : NULL;
>   
>   	if (!skip_threads) {

In addition to patch 04, we go down with workers to 2 by default.

Good idea to have this as a modparam!

Reviewed-by: Maciej Patelczyk <maciej.patelczyk@intel.com>


  reply	other threads:[~2026-05-06 16:00 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-26  4:28 [PATCH v4 00/12] Fine grained fault locking, threaded prefetch, storm cache Matthew Brost
2026-02-26  4:28 ` [PATCH v4 01/12] drm/xe: Fine grained page fault locking Matthew Brost
2026-02-26  4:28 ` [PATCH v4 02/12] drm/xe: Allow prefetch-only VM bind IOCTLs to use VM read lock Matthew Brost
2026-02-26  4:28 ` [PATCH v4 03/12] drm/xe: Thread prefetch of SVM ranges Matthew Brost
2026-02-26  4:28 ` [PATCH v4 04/12] drm/xe: Use a single page-fault queue with multiple workers Matthew Brost
2026-05-06 15:46   ` Maciej Patelczyk
2026-05-06 19:42     ` Matthew Brost
2026-05-07 12:41       ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 05/12] drm/xe: Add num_pf_work modparam Matthew Brost
2026-05-06 15:59   ` Maciej Patelczyk [this message]
2026-02-26  4:28 ` [PATCH v4 06/12] drm/xe: Engine class and instance into a u8 Matthew Brost
2026-05-06 16:04   ` Maciej Patelczyk
2026-05-07 16:20     ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 07/12] drm/xe: Track pagefault worker runtime Matthew Brost
2026-05-07 12:51   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 08/12] drm/xe: Chain page faults via queue-resident cache to avoid fault storms Matthew Brost
2026-05-08 12:03   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 09/12] drm/xe: Add pagefault chaining stats Matthew Brost
2026-05-07 13:15   ` Maciej Patelczyk
2026-05-07 13:52     ` Francois Dugast
2026-02-26  4:28 ` [PATCH v4 10/12] drm/xe: Add debugfs pagefault_info Matthew Brost
2026-05-07 10:07   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 11/12] drm/xe: batch CT pagefault acks with periodic flush Matthew Brost
2026-05-08  9:24   ` Maciej Patelczyk
2026-02-26  4:28 ` [PATCH v4 12/12] drm/xe: Track parallel page fault activity in GT stats Matthew Brost
2026-05-07 13:56   ` Maciej Patelczyk
2026-05-07 14:23     ` Francois Dugast
2026-02-26  4:35 ` ✗ CI.checkpatch: warning for Fine grained fault locking, threaded prefetch, storm cache (rev4) Patchwork
2026-02-26  4:36 ` ✓ CI.KUnit: success " Patchwork
2026-02-26  5:26 ` ✗ Xe.CI.BAT: failure " Patchwork
2026-02-26  8:59 ` ✗ Xe.CI.FULL: " Patchwork
2026-02-26 13:43 ` [PATCH v4 00/12] Fine grained fault locking, threaded prefetch, storm cache Thomas Hellström
2026-02-26 19:36   ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ad1df3ca-2b77-4a83-a009-c9930643483d@intel.com \
    --to=maciej.patelczyk@intel.com \
    --cc=arvind.yadav@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=stuart.summers@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.