Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: "Summers, Stuart" <stuart.summers@intel.com>
To: "intel-xe@lists.freedesktop.org" <intel-xe@lists.freedesktop.org>,
	"Wajdeczko, Michal" <Michal.Wajdeczko@intel.com>
Cc: "Brost, Matthew" <matthew.brost@intel.com>,
	"thomas.hellstrom@linux.intel.com"
	<thomas.hellstrom@linux.intel.com>
Subject: Re: [PATCH] drm/xe/pf: Move VFs reprovisioning to worker
Date: Mon, 27 Jan 2025 17:07:14 +0000	[thread overview]
Message-ID: <838c976cbff003ce7b85d3dbf127f26d0d3e0979.camel@intel.com> (raw)
In-Reply-To: <20250125215505.720-1-michal.wajdeczko@intel.com>

On Sat, 2025-01-25 at 22:55 +0100, Michal Wajdeczko wrote:
> Since the GuC is reset during GT reset, we need to re-send the
> entire SR-IOV provisioning configuration to the GuC. But since
> this whole configuration is protected by the PF master mutex and
> we can't avoid making allocations under this mutex (like during
> LMEM provisioning), we can't do this reprovisioning from gt-reset
> path if we want to be reclaim-safe. Move VFs reprovisioning to a
> async worker that we will start from the gt-reset path.

Admittedly I don't fully understand the PF restart flow here from
userspace. Is there some race condition we need to check for whether
GuC completes base configuration before the PF config comes through? Is
it possible we can get into either some deadlock between the native
init and the PF init or start running content on some engines in native
mode before PF completes?

Thanks,
Stuart

> 
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/xe/xe_gt_sriov_pf.c       | 53 ++++++++++++++++++++-
> --
>  drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h | 10 +++++
>  2 files changed, 56 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> index 6f906c8e8108..d66478deab98 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
> @@ -15,7 +15,11 @@
>  #include "xe_gt_sriov_pf_helpers.h"
>  #include "xe_gt_sriov_pf_migration.h"
>  #include "xe_gt_sriov_pf_service.h"
> +#include "xe_gt_sriov_printk.h"
>  #include "xe_mmio.h"
> +#include "xe_pm.h"
> +
> +static void pf_worker_restart_func(struct work_struct *w);
>  
>  /*
>   * VF's metadata is maintained in the flexible array where:
> @@ -41,6 +45,11 @@ static int pf_alloc_metadata(struct xe_gt *gt)
>         return 0;
>  }
>  
> +static void pf_init_workers(struct xe_gt *gt)
> +{
> +       INIT_WORK(&gt->sriov.pf.workers.restart,
> pf_worker_restart_func);
> +}
> +
>  /**
>   * xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on
> PF.
>   * @gt: the &xe_gt to initialize
> @@ -65,6 +74,8 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
>         if (err)
>                 return err;
>  
> +       pf_init_workers(gt);
> +
>         return 0;
>  }
>  
> @@ -155,14 +166,42 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt
> *gt, unsigned int vfid)
>         pf_clear_vf_scratch_regs(gt, vfid);
>  }
>  
> -/**
> - * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
> - * @gt: the &xe_gt
> - *
> - * This function can only be called on PF.
> - */
> -void xe_gt_sriov_pf_restart(struct xe_gt *gt)
> +static void pf_restart(struct xe_gt *gt)
>  {
> +       struct xe_device *xe = gt_to_xe(gt);
> +
> +       xe_pm_runtime_get(xe);
>         xe_gt_sriov_pf_config_restart(gt);
>         xe_gt_sriov_pf_control_restart(gt);
> +       xe_pm_runtime_put(xe);
> +
> +       xe_gt_sriov_dbg(gt, "restart completed\n");
> +}
> +
> +static void pf_worker_restart_func(struct work_struct *w)
> +{
> +       struct xe_gt *gt = container_of(w, typeof(*gt),
> sriov.pf.workers.restart);
> +
> +       pf_restart(gt);
> +}
> +
> +static void pf_queue_restart(struct xe_gt *gt)
> +{
> +       struct xe_device *xe = gt_to_xe(gt);
> +
> +       xe_gt_assert(gt, IS_SRIOV_PF(xe));
> +
> +       if (!queue_work(xe->sriov.wq, &gt->sriov.pf.workers.restart))
> +               xe_gt_sriov_dbg(gt, "restart already in queue!\n");
> +}
> +
> +/**
> + * xe_gt_sriov_pf_restart - Restart SR-IOV support after a GT reset.
> + * @gt: the &xe_gt
> + *
> + * This function can only be called on PF.
> + */
> +void xe_gt_sriov_pf_restart(struct xe_gt *gt)
> +{
> +       pf_queue_restart(gt);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> index 0426b1a77069..a64a6835ad65 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
> @@ -35,8 +35,17 @@ struct xe_gt_sriov_metadata {
>         struct xe_gt_sriov_state_snapshot snapshot;
>  };
>  
> +/**
> + * struct xe_gt_sriov_pf_workers - GT level workers used by the PF.
> + */
> +struct xe_gt_sriov_pf_workers {
> +       /** @restart: worker that executes actions post GT reset */
> +       struct work_struct restart;
> +};
> +
>  /**
>   * struct xe_gt_sriov_pf - GT level PF virtualization data.
> + * @workers: workers data.
>   * @service: service data.
>   * @control: control data.
>   * @policy: policy data.
> @@ -45,6 +54,7 @@ struct xe_gt_sriov_metadata {
>   * @vfs: metadata for all VFs.
>   */
>  struct xe_gt_sriov_pf {
> +       struct xe_gt_sriov_pf_workers workers;
>         struct xe_gt_sriov_pf_service service;
>         struct xe_gt_sriov_pf_control control;
>         struct xe_gt_sriov_pf_policy policy;


  parent reply	other threads:[~2025-01-27 17:08 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-25 21:55 [PATCH] drm/xe/pf: Move VFs reprovisioning to worker Michal Wajdeczko
2025-01-25 22:45 ` ✓ CI.Patch_applied: success for " Patchwork
2025-01-25 22:45 ` ✓ CI.checkpatch: " Patchwork
2025-01-25 22:46 ` ✓ CI.KUnit: " Patchwork
2025-01-25 23:02 ` ✓ CI.Build: " Patchwork
2025-01-25 23:05 ` ✓ CI.Hooks: " Patchwork
2025-01-25 23:06 ` ✓ CI.checksparse: " Patchwork
2025-01-25 23:33 ` ✓ Xe.CI.BAT: " Patchwork
2025-01-26  0:41 ` ✗ Xe.CI.Full: failure " Patchwork
2025-01-27 17:07   ` Michal Wajdeczko
2025-01-27 14:23 ` [PATCH] " Michał Winiarski
2025-01-27 17:07 ` Summers, Stuart [this message]
2025-01-27 18:05   ` Michal Wajdeczko
2025-01-27 18:28     ` Summers, Stuart
2025-01-27 18:29 ` Matthew Brost

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=838c976cbff003ce7b85d3dbf127f26d0d3e0979.camel@intel.com \
    --to=stuart.summers@intel.com \
    --cc=Michal.Wajdeczko@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=matthew.brost@intel.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox