From: Matthew Brost <matthew.brost@intel.com>
To: "Michał Winiarski" <michal.winiarski@intel.com>
Cc: "Alex Williamson" <alex@shazbot.org>,
"Lucas De Marchi" <lucas.demarchi@intel.com>,
"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Rodrigo Vivi" <rodrigo.vivi@intel.com>,
"Jason Gunthorpe" <jgg@ziepe.ca>,
"Yishai Hadas" <yishaih@nvidia.com>,
"Kevin Tian" <kevin.tian@intel.com>,
"Shameer Kolothum" <skolothumtho@nvidia.com>,
intel-xe@lists.freedesktop.org, linux-kernel@vger.kernel.org,
kvm@vger.kernel.org,
"Michal Wajdeczko" <michal.wajdeczko@intel.com>,
dri-devel@lists.freedesktop.org,
"Jani Nikula" <jani.nikula@linux.intel.com>,
"Joonas Lahtinen" <joonas.lahtinen@linux.intel.com>,
"Tvrtko Ursulin" <tursulin@ursulin.net>,
"David Airlie" <airlied@gmail.com>,
"Simona Vetter" <simona@ffwll.ch>,
"Lukasz Laguna" <lukasz.laguna@intel.com>,
"Christoph Hellwig" <hch@infradead.org>
Subject: Re: [PATCH v4 22/28] drm/xe/pf: Handle VRAM migration data as part of PF control
Date: Fri, 7 Nov 2025 20:31:28 -0800 [thread overview]
Message-ID: <aQ7HoF9SlR6T4BEz@lstrano-desk.jf.intel.com> (raw)
In-Reply-To: <20251105151027.540712-23-michal.winiarski@intel.com>
On Wed, Nov 05, 2025 at 04:10:20PM +0100, Michał Winiarski wrote:
> Connect the helpers to allow save and restore of VRAM migration data in
> stop_copy / resume device state.
>
> Co-developed-by: Lukasz Laguna <lukasz.laguna@intel.com>
> Signed-off-by: Lukasz Laguna <lukasz.laguna@intel.com>
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 17 ++
> drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c | 211 ++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h | 3 +
> .../drm/xe/xe_gt_sriov_pf_migration_types.h | 2 +
> drivers/gpu/drm/xe/xe_sriov_pf_control.c | 3 +
> 5 files changed, 236 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> index abc2bd09288ea..aae0c98657408 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
> @@ -891,6 +891,20 @@ static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid)
> return -EAGAIN;
> }
>
> + if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
> + XE_SRIOV_PACKET_TYPE_VRAM)) {
> + ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid);
> + if (ret == -EAGAIN)
> + return -EAGAIN;
> + else if (ret)
> + return ret;
> +
> + xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
> + XE_SRIOV_PACKET_TYPE_VRAM);
> +
> + return -EAGAIN;
> + }
> +
> return 0;
> }
>
> @@ -1129,6 +1143,9 @@ static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid)
> case XE_SRIOV_PACKET_TYPE_GUC:
> ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data);
> break;
> + case XE_SRIOV_PACKET_TYPE_VRAM:
> + ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data);
> + break;
> default:
> xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n", vfid, data->type);
> break;
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> index 22f471f269cfa..c62bb67c20a6b 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
> @@ -19,6 +19,7 @@
> #include "xe_gt_sriov_printk.h"
> #include "xe_guc_buf.h"
> #include "xe_guc_ct.h"
> +#include "xe_migrate.h"
> #include "xe_mmio.h"
> #include "xe_sriov.h"
> #include "xe_sriov_packet.h"
> @@ -501,6 +502,205 @@ int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
> return pf_restore_vf_mmio_mig_data(gt, vfid, data);
> }
>
> +static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid)
> +{
> + if (!xe_gt_is_main_type(gt))
> + return 0;
> +
> + return xe_gt_sriov_pf_config_get_lmem(gt, vfid);
> +}
> +
> +static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid,
> + struct xe_bo *vram, u64 vram_offset,
> + struct xe_bo *sysmem, u64 sysmem_offset,
> + size_t size, bool save)
> +{
> + struct dma_fence *ret = NULL;
> + struct drm_exec exec;
> + int err;
> +
> + drm_exec_init(&exec, 0, 0);
> + drm_exec_until_all_locked(&exec) {
> + err = drm_exec_lock_obj(&exec, &vram->ttm.base);
> + drm_exec_retry_on_contention(&exec);
> + if (err) {
> + ret = ERR_PTR(err);
> + goto err;
> + }
> +
> + err = drm_exec_lock_obj(&exec, &sysmem->ttm.base);
> + drm_exec_retry_on_contention(&exec);
> + if (err) {
> + ret = ERR_PTR(err);
> + goto err;
> + }
> + }
> +
> + ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size,
> + save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM);
> +
> +err:
> + drm_exec_fini(&exec);
> +
> + return ret;
> +}
> +
> +#define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ)
> +static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid,
> + struct xe_bo *src_vram, u64 src_vram_offset,
> + size_t size)
> +{
> + struct xe_sriov_packet *data;
> + struct dma_fence *fence;
> + int ret;
> +
> + data = xe_sriov_packet_alloc(gt_to_xe(gt));
> + if (!data)
> + return -ENOMEM;
> +
> + ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id,
> + XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset,
> + size);
> + if (ret)
> + goto fail;
> +
> + fence = __pf_save_restore_vram(gt, vfid,
> + src_vram, src_vram_offset,
> + data->bo, 0, size, true);
> +
> + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT);
> + dma_fence_put(fence);
> + if (!ret) {
> + ret = -ETIME;
> + goto fail;
> + }
> +
> + pf_dump_mig_data(gt, vfid, data, "VRAM data save");
> +
> + ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data);
> + if (ret)
> + goto fail;
> +
> + return 0;
> +
> +fail:
> + xe_sriov_packet_free(data);
> + return ret;
> +}
> +
> +#define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M
> +static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid)
> +{
> + struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
> + loff_t *offset = &migration->save.vram_offset;
> + struct xe_bo *vram;
> + size_t vram_size, chunk_size;
> + int ret;
> +
> + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid);
> + if (!vram)
> + return -ENXIO;
> +
> + vram_size = xe_bo_size(vram);
> +
> + xe_gt_assert(gt, *offset < vram_size);
> +
> + chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE);
> +
> + ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size);
> + if (ret)
> + goto fail;
> +
> + *offset += chunk_size;
> +
> + xe_bo_put(vram);
> +
> + if (*offset < vram_size)
> + return -EAGAIN;
> +
> + return 0;
> +
> +fail:
> + xe_bo_put(vram);
> + xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret));
> + return ret;
> +}
> +
> +static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid,
> + struct xe_sriov_packet *data)
> +{
> + u64 end = data->hdr.offset + data->hdr.size;
> + struct dma_fence *fence;
> + struct xe_bo *vram;
> + size_t size;
> + int ret = 0;
> +
> + vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid);
> + if (!vram)
> + return -ENXIO;
> +
> + size = xe_bo_size(vram);
> +
> + if (end > size || end < data->hdr.size) {
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + pf_dump_mig_data(gt, vfid, data, "VRAM data restore");
> +
> + fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset,
> + data->bo, 0, data->hdr.size, false);
> + ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT);
> + dma_fence_put(fence);
> + if (!ret) {
> + ret = -ETIME;
> + goto err;
> + }
> +
> + return 0;
> +err:
> + xe_bo_put(vram);
> + xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret));
> + return ret;
> +}
> +
> +/**
> + * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid)
> +{
> + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
> + xe_gt_assert(gt, vfid != PFID);
> + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
> +
> + return pf_save_vf_vram_mig_data(gt, vfid);
> +}
> +
> +/**
> + * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data.
> + * @gt: the &xe_gt
> + * @vfid: the VF identifier (can't be 0)
> + *
> + * This function is for PF only.
> + *
> + * Return: 0 on success or a negative error code on failure.
> + */
> +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid,
> + struct xe_sriov_packet *data)
> +{
> + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
> + xe_gt_assert(gt, vfid != PFID);
> + xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
> +
> + return pf_restore_vf_vram_mig_data(gt, vfid, data);
> +}
> +
> /**
> * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT.
> * @gt: the &xe_gt
> @@ -540,6 +740,13 @@ ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid)
> size += sizeof(struct xe_sriov_pf_migration_hdr);
> total += size;
>
> + size = pf_migration_vram_size(gt, vfid);
> + if (size < 0)
> + return size;
> + if (size > 0)
> + size += sizeof(struct xe_sriov_pf_migration_hdr);
> + total += size;
> +
> return total;
> }
>
> @@ -602,6 +809,7 @@ void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid)
> struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
>
> migration->save.data_remaining = 0;
> + migration->save.vram_offset = 0;
>
> xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0);
> pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC);
> @@ -611,6 +819,9 @@ void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid)
>
> xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0);
> pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO);
> +
> + if (pf_migration_vram_size(gt, vfid) > 0)
> + pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_VRAM);
> }
>
> /**
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> index 04b3ed0d2aa23..181207a637b93 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
> @@ -25,6 +25,9 @@ int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid,
> int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid);
> int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
> struct xe_sriov_packet *data);
> +int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid);
> +int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid,
> + struct xe_sriov_packet *data);
>
> ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid);
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
> index 9f24878690d9c..f50c64241e9c0 100644
> --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
> +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
> @@ -20,6 +20,8 @@ struct xe_gt_sriov_migration_data {
> struct {
> /** @save.data_remaining: bitmap of migration types that need to be saved */
> unsigned long data_remaining;
> + /** @save.vram_offset: last saved offset within VRAM, used for chunked VRAM save */
> + loff_t vram_offset;
> } save;
> };
>
> diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
> index 87205f0505ad0..eec218c710278 100644
> --- a/drivers/gpu/drm/xe/xe_sriov_pf_control.c
> +++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
> @@ -5,6 +5,7 @@
>
> #include "xe_device.h"
> #include "xe_gt_sriov_pf_control.h"
> +#include "xe_gt_sriov_pf_migration.h"
> #include "xe_sriov_packet.h"
> #include "xe_sriov_pf_control.h"
> #include "xe_sriov_printk.h"
> @@ -171,6 +172,8 @@ int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid)
> return ret;
>
> for_each_gt(gt, xe, id) {
> + xe_gt_sriov_pf_migration_save_init(gt, vfid);
> +
> ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid);
> if (ret)
> return ret;
> --
> 2.51.2
>
next prev parent reply other threads:[~2025-11-08 4:31 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-05 15:09 [PATCH v4 00/28] vfio/xe: Add driver variant for Xe VF migration Michał Winiarski
2025-11-05 15:09 ` [PATCH v4 01/28] drm/xe/pf: Remove GuC version check for migration support Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 02/28] drm/xe: Move migration support to device-level struct Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 03/28] drm/xe/pf: Convert control state to bitmap Michał Winiarski
2025-11-05 18:51 ` Michal Wajdeczko
2025-11-05 15:10 ` [PATCH v4 04/28] drm/xe/pf: Add save/restore control state stubs and connect to debugfs Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 05/28] drm/xe/pf: Add data structures and handlers for migration rings Michał Winiarski
2025-11-05 20:17 ` Michal Wajdeczko
2025-11-06 11:24 ` Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 06/28] drm/xe/pf: Add helpers for migration data packet allocation / free Michał Winiarski
2025-11-05 21:12 ` Michal Wajdeczko
2025-11-06 11:30 ` Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 07/28] drm/xe/pf: Add support for encap/decap of bitstream to/from packet Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 08/28] drm/xe/pf: Add minimalistic migration descriptor Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 09/28] drm/xe/pf: Expose VF migration data size over debugfs Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 10/28] drm/xe: Add sa/guc_buf_cache sync interface Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 11/28] drm/xe: Allow the caller to pass guc_buf_cache size Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 12/28] drm/xe/pf: Increase PF GuC Buffer Cache size and use it for VF migration Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 13/28] drm/xe/pf: Remove GuC migration data save/restore from GT debugfs Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 14/28] drm/xe/pf: Don't save GuC VF migration data on pause Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 15/28] drm/xe/pf: Switch VF migration GuC save/restore to struct migration data Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 16/28] drm/xe/pf: Handle GuC migration data as part of PF control Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 17/28] drm/xe/pf: Add helpers for VF GGTT migration data handling Michał Winiarski
2025-11-05 21:45 ` Michal Wajdeczko
2025-11-06 11:31 ` Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 18/28] drm/xe/pf: Handle GGTT migration data as part of PF control Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 19/28] drm/xe/pf: Handle MMIO " Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 20/28] drm/xe/pf: Add helper to retrieve VF's LMEM object Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 21/28] drm/xe/migrate: Add function to copy of VRAM data in chunks Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 22/28] drm/xe/pf: Handle VRAM migration data as part of PF control Michał Winiarski
2025-11-08 4:31 ` Matthew Brost [this message]
2025-11-05 15:10 ` [PATCH v4 23/28] drm/xe/pf: Add wait helper for VF FLR Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 24/28] drm/xe/pf: Enable SR-IOV VF migration Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 25/28] drm/xe/pci: Introduce a helper to allow VF access to PF xe_device Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 26/28] drm/xe/pf: Export helpers for VFIO Michał Winiarski
2025-11-05 15:10 ` [PATCH v4 27/28] drm/intel/bmg: Allow device ID usage with single-argument macros Michał Winiarski
2025-11-07 14:51 ` Lucas De Marchi
2025-11-05 15:10 ` [PATCH v4 28/28] vfio/xe: Add device specific vfio_pci driver variant for Intel graphics Michał Winiarski
2025-11-06 8:20 ` Tian, Kevin
2025-11-06 10:55 ` Winiarski, Michal
2025-11-07 3:10 ` Tian, Kevin
2025-11-08 0:47 ` Jason Gunthorpe
2025-11-08 1:05 ` Tian, Kevin
2025-11-08 1:11 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aQ7HoF9SlR6T4BEz@lstrano-desk.jf.intel.com \
--to=matthew.brost@intel.com \
--cc=airlied@gmail.com \
--cc=alex@shazbot.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=hch@infradead.org \
--cc=intel-xe@lists.freedesktop.org \
--cc=jani.nikula@linux.intel.com \
--cc=jgg@ziepe.ca \
--cc=joonas.lahtinen@linux.intel.com \
--cc=kevin.tian@intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=lucas.demarchi@intel.com \
--cc=lukasz.laguna@intel.com \
--cc=michal.wajdeczko@intel.com \
--cc=michal.winiarski@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=simona@ffwll.ch \
--cc=skolothumtho@nvidia.com \
--cc=thomas.hellstrom@linux.intel.com \
--cc=tursulin@ursulin.net \
--cc=yishaih@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox