On 9/29/2025 4:55 AM, Matthew Brost wrote: > Be cautious and ensure the VF post-migration worker is not running > during driver unload. > > v3: > - More teardown later in driver init, use devm (Tomasz) there is no other teardown, so you probably meant "Move". There is no real need to check `xe_sriov_vf_migration_supported()` within `xe_gt_sriov_vf_init()`, at least as long as the teardown is just setting a flag. So this is ok (though you can add the condition if you prefer, to avoid confusion on future modification). Either way, this is ok: Reviewed-by: Tomasz Lis > > Signed-off-by: Matthew Brost > --- > drivers/gpu/drm/xe/xe_gt.c | 6 +++++ > drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 31 ++++++++++++++++++++++- > drivers/gpu/drm/xe/xe_gt_sriov_vf.h | 1 + > drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 4 ++- > 4 files changed, 40 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c > index 5f9ba4caf837..82be38c99205 100644 > --- a/drivers/gpu/drm/xe/xe_gt.c > +++ b/drivers/gpu/drm/xe/xe_gt.c > @@ -663,6 +663,12 @@ int xe_gt_init(struct xe_gt *gt) > if (err) > return err; > > + if (IS_SRIOV_VF(gt_to_xe(gt))) { > + err = xe_gt_sriov_vf_init(gt); > + if (err) > + return err; > + } > + > return 0; > } > > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c > index ad1d63b5b8d1..cc5af19c1911 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c > @@ -811,7 +811,8 @@ static void vf_start_migration_recovery(struct xe_gt *gt) > > spin_lock(>->sriov.vf.migration.lock); > > - if (!gt->sriov.vf.migration.recovery_queued) { > + if (!gt->sriov.vf.migration.recovery_queued || > + !gt->sriov.vf.migration.recovery_teardown) { > gt->sriov.vf.migration.recovery_queued = true; > WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true); > > @@ -1283,6 +1284,17 @@ static void migration_worker_func(struct work_struct *w) > vf_post_migration_recovery(gt); > } > > +static void vf_migration_fini(void *arg) > +{ > + struct xe_gt *gt = arg; > + > + spin_lock_irq(>->sriov.vf.migration.lock); > + gt->sriov.vf.migration.recovery_teardown = true; > + spin_unlock_irq(>->sriov.vf.migration.lock); > + > + cancel_work_sync(>->sriov.vf.migration.worker); > +} > + > /** > * xe_gt_sriov_vf_init_early() - GT VF init early > * @gt: the &xe_gt > @@ -1314,6 +1326,23 @@ int xe_gt_sriov_vf_init_early(struct xe_gt *gt) > return 0; > } > > +/** > + * xe_gt_sriov_vf_init() - GT VF init > + * @gt: the &xe_gt > + * > + * Return 0 on success, errno on failure > + */ > +int xe_gt_sriov_vf_init(struct xe_gt *gt) > +{ > + /* > + * We want to tear down the VF post-migration early during driver > + * unload; therefore, we add this finalization action later during > + * driver load. > + */ > + return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev, > + vf_migration_fini, gt); > +} > + > /** > * xe_gt_sriov_vf_recovery_inprogress() - VF post migration recovery in progress > * @gt: the &xe_gt > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h > index ff3a0ce608cd..71e1d566da81 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h > @@ -24,6 +24,7 @@ int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); > void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); > > int xe_gt_sriov_vf_init_early(struct xe_gt *gt); > +int xe_gt_sriov_vf_init(struct xe_gt *gt); > bool xe_gt_sriov_vf_recovery_inprogress(struct xe_gt *gt); > > u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); > diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h > index 6cbf8291a5ab..e135018cba1e 100644 > --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h > +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h > @@ -59,10 +59,12 @@ struct xe_gt_sriov_vf_runtime { > struct xe_gt_sriov_vf_migration { > /** @migration: VF migration recovery worker */ > struct work_struct worker; > - /** @lock: Protects recovery_queued */ > + /** @lock: Protects recovery_queued, teardown */ > spinlock_t lock; > /** @scratch: Scratch memory for VF recovery */ > void *scratch; > + /** @recovery_teardown: VF post migration recovery is being torn down */ > + bool recovery_teardown; > /** @recovery_queued: VF post migration recovery in queued */ > bool recovery_queued; > /** @recovery_inprogress: VF post migration recovery in progress */