Re: [PATCH 11/11] drm/xe: Add num_pf_queue modparam

All of lore.kernel.org
 help / color / mirror / Atom feed

From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Brost,  Matthew" <matthew.brost@intel.com>
Cc: "Mrozek, Michal" <michal.mrozek@intel.com>,
	"Ghimiray, Himal Prasad" <himal.prasad.ghimiray@intel.com>,
	"thomas.hellstrom@linux.intel.com"
	<thomas.hellstrom@linux.intel.com>,
	"Dugast, Francois" <francois.dugast@intel.com>
Subject: Re: [PATCH 11/11] drm/xe: Add num_pf_queue modparam
Date: Thu, 28 Aug 2025 22:58:09 +0000	[thread overview]
Message-ID: <423dbb4bf9fcd4898af48d6ce1d1b53585dfafb4.camel@intel.com> (raw)
In-Reply-To: <20250806062242.1090416-12-matthew.brost@intel.com>

On Tue, 2025-08-05 at 23:22 -0700, Matthew Brost wrote:
> Enable quick experiment to see how number of page fault queues
> affects
> performance.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device.c       | 15 +++++++++++++--
>  drivers/gpu/drm/xe/xe_device_types.h |  6 ++++--
>  drivers/gpu/drm/xe/xe_module.c       |  5 +++++
>  drivers/gpu/drm/xe/xe_module.h       |  1 +
>  drivers/gpu/drm/xe/xe_pagefault.c    |  8 ++++----
>  drivers/gpu/drm/xe/xe_vm.c           |  3 ++-
>  6 files changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> index c7c8aee03841..47eb07e9c799 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -413,6 +413,17 @@ static void xe_device_destroy(struct drm_device
> *dev, void *dummy)
>         ttm_device_fini(&xe->ttm);
>  }
>  
> +static void xe_device_parse_modparame(struct xe_device *xe)
> +{
> +       xe->info.force_execlist = xe_modparam.force_execlist;
> +       xe->info.num_pf_queue = xe_modparam.num_pf_queue;
> +       if (xe->info.num_pf_queue < 1)
> +               xe->info.num_pf_queue = 1;
> +       else if (xe->info.num_pf_queue > XE_PAGEFAULT_QUEUE_MAX)
> +               xe->info.num_pf_queue = XE_PAGEFAULT_QUEUE_MAX;
> +       xe->atomic_svm_timeslice_ms = 5;
> +}
> +
>  struct xe_device *xe_device_create(struct pci_dev *pdev,
>                                    const struct pci_device_id *ent)
>  {
> @@ -446,8 +457,8 @@ struct xe_device *xe_device_create(struct pci_dev
> *pdev,
>  
>         xe->info.devid = pdev->device;
>         xe->info.revid = pdev->revision;
> -       xe->info.force_execlist = xe_modparam.force_execlist;
> -       xe->atomic_svm_timeslice_ms = 5;
> +
> +       xe_device_parse_modparame(xe);
>  
>         err = xe_irq_init(xe);
>         if (err)
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 02b91a698500..d5c5fd7972a1 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -243,6 +243,8 @@ struct xe_device {
>                 u8 revid;
>                 /** @info.step: stepping information for each IP */
>                 struct xe_step_info step;
> +               /** @info.num_pf_queue: Number of page fault queues
> */
> +               int num_pf_queue;
>                 /** @info.dma_mask_size: DMA address bits */
>                 u8 dma_mask_size;
>                 /** @info.vram_flags: Vram flags */
> @@ -399,9 +401,9 @@ struct xe_device {
>                 struct rw_semaphore lock;
>                 /** @usm.pf_wq: page fault work queue, unbound, high
> priority */
>                 struct workqueue_struct *pf_wq;
> -#define XE_PAGEFAULT_QUEUE_COUNT       4
> +#define XE_PAGEFAULT_QUEUE_MAX 8
>                 /** @pf_queue: Page fault queues */
> -               struct xe_pagefault_queue
> pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
> +               struct xe_pagefault_queue
> pf_queue[XE_PAGEFAULT_QUEUE_MAX];
>         } usm;
>  
>         /** @pinned: pinned BO state */
> diff --git a/drivers/gpu/drm/xe/xe_module.c
> b/drivers/gpu/drm/xe/xe_module.c
> index d08338fc3bc1..0671ae9d9e5a 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -27,6 +27,7 @@
>  #define DEFAULT_PROBE_DISPLAY          true
>  #define DEFAULT_VRAM_BAR_SIZE          0
>  #define DEFAULT_FORCE_PROBE            CONFIG_DRM_XE_FORCE_PROBE
> +#define DEFAULT_NUM_PF_QUEUE           4
>  #define DEFAULT_MAX_VFS                        ~0
>  #define DEFAULT_MAX_VFS_STR            "unlimited"
>  #define DEFAULT_WEDGED_MODE            1
> @@ -40,6 +41,7 @@ struct xe_modparam xe_modparam = {
>         .max_vfs =              DEFAULT_MAX_VFS,
>  #endif
>         .wedged_mode =          DEFAULT_WEDGED_MODE,
> +       .num_pf_queue =         DEFAULT_NUM_PF_QUEUE,
>         .svm_notifier_size =    DEFAULT_SVM_NOTIFIER_SIZE,
>         /* the rest are 0 by default */
>  };
> @@ -93,6 +95,9 @@ MODULE_PARM_DESC(wedged_mode,
>                  "Module's default policy for the wedged mode
> (0=never, 1=upon-critical-errors, 2=upon-any-hang "
>                  "[default=" __stringify(DEFAULT_WEDGED_MODE) "])");
>  
> +module_param_named(num_pf_queue, xe_modparam.num_pf_queue, int,
> 0600);
> +MODULE_PARM_DESC(num_pf_queue, "Number of page fault queue,
> default=4, min=1, max=8");
> +
>  static int xe_check_nomodeset(void)
>  {
>         if (drm_firmware_drivers_only())
> diff --git a/drivers/gpu/drm/xe/xe_module.h
> b/drivers/gpu/drm/xe/xe_module.h
> index 5a3bfea8b7b4..36ac2151fe16 100644
> --- a/drivers/gpu/drm/xe/xe_module.h
> +++ b/drivers/gpu/drm/xe/xe_module.h
> @@ -22,6 +22,7 @@ struct xe_modparam {
>         unsigned int max_vfs;
>  #endif
>         int wedged_mode;
> +       int num_pf_queue;
>         u32 svm_notifier_size;
>  };
>  
> diff --git a/drivers/gpu/drm/xe/xe_pagefault.c
> b/drivers/gpu/drm/xe/xe_pagefault.c
> index f11c70ca6dd9..3c69557c6aa9 100644
> --- a/drivers/gpu/drm/xe/xe_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_pagefault.c
> @@ -373,11 +373,11 @@ int xe_pagefault_init(struct xe_device *xe)
>  
>         xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
>                                         WQ_UNBOUND | WQ_HIGHPRI,
> -                                       XE_PAGEFAULT_QUEUE_COUNT);
> +                                       xe->info.num_pf_queue);
>         if (!xe->usm.pf_wq)
>                 return -ENOMEM;
>  
> -       for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) {
> +       for (i = 0; i < xe->info.num_pf_queue; ++i) {
>                 err = xe_pagefault_queue_init(xe, xe->usm.pf_queue +
> i);
>                 if (err)
>                         goto err_out;
> @@ -420,7 +420,7 @@ void xe_pagefault_reset(struct xe_device *xe,
> struct xe_gt *gt)
>  {
>         int i;
>  
> -       for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i)
> +       for (i = 0; i < xe->info.num_pf_queue; ++i)
>                 xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue +
> i);
>  }
>  
> @@ -442,7 +442,7 @@ static int xe_pagefault_queue_index(struct
> xe_device *xe)
>  
>         WRITE_ONCE(xe->usm.current_pf_queue, (old_pf_queue + 1));
>  
> -       return old_pf_queue % XE_PAGEFAULT_QUEUE_COUNT;
> +       return old_pf_queue % xe->info.num_pf_queue;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 147b900b1f0b..67000c4466ab 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3039,7 +3039,8 @@ static int prefetch_ranges(struct xe_vm *vm,
> struct xe_vma_ops *vops,
>         bool devmem_possible = IS_DGFX(vm->xe) &&
>                 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
>         bool skip_threads = op->prefetch_range.ranges_count == 1 ||
> sram ||
> -               !(vops->flags & XE_VMA_OPS_FLAG_DOWNGRADE_LOCK);
> +               !(vops->flags & XE_VMA_OPS_FLAG_DOWNGRADE_LOCK) ||
> +               vm->xe->info.num_pf_queue == 1;

Ah ok well this does add that, but we still might want to skip but with
the default number of queues (4).

Also, should we make this a configfs to allow the users to configure
this per device for more fine tuning? I understand you have this just
for local debug right now...

Thanks,
Stuart

>         struct prefetch_thread *thread = skip_threads ? &stack_thread
> : NULL;
>         int err = 0, idx = 0;
>

next prev parent reply	other threads:[~2025-08-28 22:58 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-06  6:22 [PATCH 00/11] Pagefault refactor, fine grained fault locking, threaded prefetch Matthew Brost
2025-08-06  6:22 ` [PATCH 01/11] drm/xe: Stub out new pagefault layer Matthew Brost
2025-08-06 23:01   ` Summers, Stuart
2025-08-06 23:53     ` Matthew Brost
2025-08-07 17:20       ` Summers, Stuart
2025-08-07 18:10         ` Matthew Brost
2025-08-28 20:18           ` Summers, Stuart
2025-08-28 20:20             ` Matthew Brost
2025-08-27 15:29   ` Francois Dugast
2025-08-27 16:03     ` Matthew Brost
2025-08-27 16:25       ` Francois Dugast
2025-08-27 16:40         ` Matthew Brost
2025-08-27 18:00       ` Matthew Brost
2025-08-28 20:08   ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 02/11] drm/xe: Implement xe_pagefault_init Matthew Brost
2025-08-06 23:08   ` Summers, Stuart
2025-08-06 23:59     ` Matthew Brost
2025-08-07 18:22       ` Summers, Stuart
2025-08-27 16:30   ` Francois Dugast
2025-08-27 16:49     ` Matthew Brost
2025-08-28 20:10   ` Summers, Stuart
2025-08-28 20:14     ` Matthew Brost
2025-08-28 20:19       ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 03/11] drm/xe: Implement xe_pagefault_reset Matthew Brost
2025-08-06 23:16   ` Summers, Stuart
2025-08-07  0:12     ` Matthew Brost
2025-08-07 18:29       ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 04/11] drm/xe: Implement xe_pagefault_handler Matthew Brost
2025-08-28 11:26   ` Francois Dugast
2025-08-28 20:24   ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 05/11] drm/xe: Implement xe_pagefault_queue_work Matthew Brost
2025-08-28 12:29   ` Francois Dugast
2025-08-28 18:39     ` Matthew Brost
2025-08-28 22:04   ` Summers, Stuart
2025-08-29  0:51     ` Matthew Brost
2025-08-06  6:22 ` [PATCH 06/11] drm/xe: Add xe_guc_pagefault layer Matthew Brost
2025-08-28 13:27   ` Francois Dugast
2025-08-28 18:38     ` Matthew Brost
2025-08-28 22:11   ` Summers, Stuart
2025-08-29  0:54     ` Matthew Brost
2025-08-06  6:22 ` [PATCH 07/11] drm/xe: Remove unused GT page fault code Matthew Brost
2025-08-28 19:13   ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 08/11] drm/xe: Fine grained page fault locking Matthew Brost
2025-08-06  6:22 ` [PATCH 09/11] drm/xe: Allow prefetch-only VM bind IOCTLs to use VM read lock Matthew Brost
2025-08-06  6:22 ` [PATCH 10/11] drm/xe: Thread prefetch of SVM ranges Matthew Brost
2025-08-28 22:55   ` Summers, Stuart
2025-08-29  1:06     ` Matthew Brost
2025-08-06  6:22 ` [PATCH 11/11] drm/xe: Add num_pf_queue modparam Matthew Brost
2025-08-28 22:58   ` Summers, Stuart [this message]
2025-08-06  6:36 ` ✗ CI.checkpatch: warning for Pagefault refactor, fine grained fault locking, threaded prefetch Patchwork
2025-08-06  6:36 ` ✗ CI.KUnit: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=423dbb4bf9fcd4898af48d6ce1d1b53585dfafb4.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=michal.mrozek@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.