intel-xe.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Brost,  Matthew" <matthew.brost@intel.com>
Cc: "Mrozek, Michal" <michal.mrozek@intel.com>,
	"Ghimiray, Himal Prasad" <himal.prasad.ghimiray@intel.com>,
	"thomas.hellstrom@linux.intel.com"
	<thomas.hellstrom@linux.intel.com>,
	"Dugast, Francois" <francois.dugast@intel.com>
Subject: Re: [PATCH 11/11] drm/xe: Add num_pf_queue modparam
Date: Thu, 28 Aug 2025 22:58:09 +0000	[thread overview]
Message-ID: <423dbb4bf9fcd4898af48d6ce1d1b53585dfafb4.camel@intel.com> (raw)
In-Reply-To: <20250806062242.1090416-12-matthew.brost@intel.com>

On Tue, 2025-08-05 at 23:22 -0700, Matthew Brost wrote:
> Enable quick experiment to see how number of page fault queues
> affects
> performance.
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_device.c       | 15 +++++++++++++--
>  drivers/gpu/drm/xe/xe_device_types.h |  6 ++++--
>  drivers/gpu/drm/xe/xe_module.c       |  5 +++++
>  drivers/gpu/drm/xe/xe_module.h       |  1 +
>  drivers/gpu/drm/xe/xe_pagefault.c    |  8 ++++----
>  drivers/gpu/drm/xe/xe_vm.c           |  3 ++-
>  6 files changed, 29 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device.c
> b/drivers/gpu/drm/xe/xe_device.c
> index c7c8aee03841..47eb07e9c799 100644
> --- a/drivers/gpu/drm/xe/xe_device.c
> +++ b/drivers/gpu/drm/xe/xe_device.c
> @@ -413,6 +413,17 @@ static void xe_device_destroy(struct drm_device
> *dev, void *dummy)
>         ttm_device_fini(&xe->ttm);
>  }
>  
> +static void xe_device_parse_modparame(struct xe_device *xe)
> +{
> +       xe->info.force_execlist = xe_modparam.force_execlist;
> +       xe->info.num_pf_queue = xe_modparam.num_pf_queue;
> +       if (xe->info.num_pf_queue < 1)
> +               xe->info.num_pf_queue = 1;
> +       else if (xe->info.num_pf_queue > XE_PAGEFAULT_QUEUE_MAX)
> +               xe->info.num_pf_queue = XE_PAGEFAULT_QUEUE_MAX;
> +       xe->atomic_svm_timeslice_ms = 5;
> +}
> +
>  struct xe_device *xe_device_create(struct pci_dev *pdev,
>                                    const struct pci_device_id *ent)
>  {
> @@ -446,8 +457,8 @@ struct xe_device *xe_device_create(struct pci_dev
> *pdev,
>  
>         xe->info.devid = pdev->device;
>         xe->info.revid = pdev->revision;
> -       xe->info.force_execlist = xe_modparam.force_execlist;
> -       xe->atomic_svm_timeslice_ms = 5;
> +
> +       xe_device_parse_modparame(xe);
>  
>         err = xe_irq_init(xe);
>         if (err)
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 02b91a698500..d5c5fd7972a1 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -243,6 +243,8 @@ struct xe_device {
>                 u8 revid;
>                 /** @info.step: stepping information for each IP */
>                 struct xe_step_info step;
> +               /** @info.num_pf_queue: Number of page fault queues
> */
> +               int num_pf_queue;
>                 /** @info.dma_mask_size: DMA address bits */
>                 u8 dma_mask_size;
>                 /** @info.vram_flags: Vram flags */
> @@ -399,9 +401,9 @@ struct xe_device {
>                 struct rw_semaphore lock;
>                 /** @usm.pf_wq: page fault work queue, unbound, high
> priority */
>                 struct workqueue_struct *pf_wq;
> -#define XE_PAGEFAULT_QUEUE_COUNT       4
> +#define XE_PAGEFAULT_QUEUE_MAX 8
>                 /** @pf_queue: Page fault queues */
> -               struct xe_pagefault_queue
> pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
> +               struct xe_pagefault_queue
> pf_queue[XE_PAGEFAULT_QUEUE_MAX];
>         } usm;
>  
>         /** @pinned: pinned BO state */
> diff --git a/drivers/gpu/drm/xe/xe_module.c
> b/drivers/gpu/drm/xe/xe_module.c
> index d08338fc3bc1..0671ae9d9e5a 100644
> --- a/drivers/gpu/drm/xe/xe_module.c
> +++ b/drivers/gpu/drm/xe/xe_module.c
> @@ -27,6 +27,7 @@
>  #define DEFAULT_PROBE_DISPLAY          true
>  #define DEFAULT_VRAM_BAR_SIZE          0
>  #define DEFAULT_FORCE_PROBE            CONFIG_DRM_XE_FORCE_PROBE
> +#define DEFAULT_NUM_PF_QUEUE           4
>  #define DEFAULT_MAX_VFS                        ~0
>  #define DEFAULT_MAX_VFS_STR            "unlimited"
>  #define DEFAULT_WEDGED_MODE            1
> @@ -40,6 +41,7 @@ struct xe_modparam xe_modparam = {
>         .max_vfs =              DEFAULT_MAX_VFS,
>  #endif
>         .wedged_mode =          DEFAULT_WEDGED_MODE,
> +       .num_pf_queue =         DEFAULT_NUM_PF_QUEUE,
>         .svm_notifier_size =    DEFAULT_SVM_NOTIFIER_SIZE,
>         /* the rest are 0 by default */
>  };
> @@ -93,6 +95,9 @@ MODULE_PARM_DESC(wedged_mode,
>                  "Module's default policy for the wedged mode
> (0=never, 1=upon-critical-errors, 2=upon-any-hang "
>                  "[default=" __stringify(DEFAULT_WEDGED_MODE) "])");
>  
> +module_param_named(num_pf_queue, xe_modparam.num_pf_queue, int,
> 0600);
> +MODULE_PARM_DESC(num_pf_queue, "Number of page fault queue,
> default=4, min=1, max=8");
> +
>  static int xe_check_nomodeset(void)
>  {
>         if (drm_firmware_drivers_only())
> diff --git a/drivers/gpu/drm/xe/xe_module.h
> b/drivers/gpu/drm/xe/xe_module.h
> index 5a3bfea8b7b4..36ac2151fe16 100644
> --- a/drivers/gpu/drm/xe/xe_module.h
> +++ b/drivers/gpu/drm/xe/xe_module.h
> @@ -22,6 +22,7 @@ struct xe_modparam {
>         unsigned int max_vfs;
>  #endif
>         int wedged_mode;
> +       int num_pf_queue;
>         u32 svm_notifier_size;
>  };
>  
> diff --git a/drivers/gpu/drm/xe/xe_pagefault.c
> b/drivers/gpu/drm/xe/xe_pagefault.c
> index f11c70ca6dd9..3c69557c6aa9 100644
> --- a/drivers/gpu/drm/xe/xe_pagefault.c
> +++ b/drivers/gpu/drm/xe/xe_pagefault.c
> @@ -373,11 +373,11 @@ int xe_pagefault_init(struct xe_device *xe)
>  
>         xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
>                                         WQ_UNBOUND | WQ_HIGHPRI,
> -                                       XE_PAGEFAULT_QUEUE_COUNT);
> +                                       xe->info.num_pf_queue);
>         if (!xe->usm.pf_wq)
>                 return -ENOMEM;
>  
> -       for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) {
> +       for (i = 0; i < xe->info.num_pf_queue; ++i) {
>                 err = xe_pagefault_queue_init(xe, xe->usm.pf_queue +
> i);
>                 if (err)
>                         goto err_out;
> @@ -420,7 +420,7 @@ void xe_pagefault_reset(struct xe_device *xe,
> struct xe_gt *gt)
>  {
>         int i;
>  
> -       for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i)
> +       for (i = 0; i < xe->info.num_pf_queue; ++i)
>                 xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue +
> i);
>  }
>  
> @@ -442,7 +442,7 @@ static int xe_pagefault_queue_index(struct
> xe_device *xe)
>  
>         WRITE_ONCE(xe->usm.current_pf_queue, (old_pf_queue + 1));
>  
> -       return old_pf_queue % XE_PAGEFAULT_QUEUE_COUNT;
> +       return old_pf_queue % xe->info.num_pf_queue;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 147b900b1f0b..67000c4466ab 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -3039,7 +3039,8 @@ static int prefetch_ranges(struct xe_vm *vm,
> struct xe_vma_ops *vops,
>         bool devmem_possible = IS_DGFX(vm->xe) &&
>                 IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
>         bool skip_threads = op->prefetch_range.ranges_count == 1 ||
> sram ||
> -               !(vops->flags & XE_VMA_OPS_FLAG_DOWNGRADE_LOCK);
> +               !(vops->flags & XE_VMA_OPS_FLAG_DOWNGRADE_LOCK) ||
> +               vm->xe->info.num_pf_queue == 1;

Ah ok well this does add that, but we still might want to skip but with
the default number of queues (4).

Also, should we make this a configfs to allow the users to configure
this per device for more fine tuning? I understand you have this just
for local debug right now...

Thanks,
Stuart

>         struct prefetch_thread *thread = skip_threads ? &stack_thread
> : NULL;
>         int err = 0, idx = 0;
>  


  reply	other threads:[~2025-08-28 22:58 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-06  6:22 [PATCH 00/11] Pagefault refactor, fine grained fault locking, threaded prefetch Matthew Brost
2025-08-06  6:22 ` [PATCH 01/11] drm/xe: Stub out new pagefault layer Matthew Brost
2025-08-06 23:01   ` Summers, Stuart
2025-08-06 23:53     ` Matthew Brost
2025-08-07 17:20       ` Summers, Stuart
2025-08-07 18:10         ` Matthew Brost
2025-08-28 20:18           ` Summers, Stuart
2025-08-28 20:20             ` Matthew Brost
2025-08-27 15:29   ` Francois Dugast
2025-08-27 16:03     ` Matthew Brost
2025-08-27 16:25       ` Francois Dugast
2025-08-27 16:40         ` Matthew Brost
2025-08-27 18:00       ` Matthew Brost
2025-08-28 20:08   ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 02/11] drm/xe: Implement xe_pagefault_init Matthew Brost
2025-08-06 23:08   ` Summers, Stuart
2025-08-06 23:59     ` Matthew Brost
2025-08-07 18:22       ` Summers, Stuart
2025-08-27 16:30   ` Francois Dugast
2025-08-27 16:49     ` Matthew Brost
2025-08-28 20:10   ` Summers, Stuart
2025-08-28 20:14     ` Matthew Brost
2025-08-28 20:19       ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 03/11] drm/xe: Implement xe_pagefault_reset Matthew Brost
2025-08-06 23:16   ` Summers, Stuart
2025-08-07  0:12     ` Matthew Brost
2025-08-07 18:29       ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 04/11] drm/xe: Implement xe_pagefault_handler Matthew Brost
2025-08-28 11:26   ` Francois Dugast
2025-08-28 20:24   ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 05/11] drm/xe: Implement xe_pagefault_queue_work Matthew Brost
2025-08-28 12:29   ` Francois Dugast
2025-08-28 18:39     ` Matthew Brost
2025-08-28 22:04   ` Summers, Stuart
2025-08-29  0:51     ` Matthew Brost
2025-08-06  6:22 ` [PATCH 06/11] drm/xe: Add xe_guc_pagefault layer Matthew Brost
2025-08-28 13:27   ` Francois Dugast
2025-08-28 18:38     ` Matthew Brost
2025-08-28 22:11   ` Summers, Stuart
2025-08-29  0:54     ` Matthew Brost
2025-08-06  6:22 ` [PATCH 07/11] drm/xe: Remove unused GT page fault code Matthew Brost
2025-08-28 19:13   ` Summers, Stuart
2025-08-06  6:22 ` [PATCH 08/11] drm/xe: Fine grained page fault locking Matthew Brost
2025-08-06  6:22 ` [PATCH 09/11] drm/xe: Allow prefetch-only VM bind IOCTLs to use VM read lock Matthew Brost
2025-08-06  6:22 ` [PATCH 10/11] drm/xe: Thread prefetch of SVM ranges Matthew Brost
2025-08-28 22:55   ` Summers, Stuart
2025-08-29  1:06     ` Matthew Brost
2025-08-06  6:22 ` [PATCH 11/11] drm/xe: Add num_pf_queue modparam Matthew Brost
2025-08-28 22:58   ` Summers, Stuart [this message]
2025-08-06  6:36 ` ✗ CI.checkpatch: warning for Pagefault refactor, fine grained fault locking, threaded prefetch Patchwork
2025-08-06  6:36 ` ✗ CI.KUnit: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=423dbb4bf9fcd4898af48d6ce1d1b53585dfafb4.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=francois.dugast@intel.com \
    --cc=himal.prasad.ghimiray@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=michal.mrozek@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).