* [PATCH v2] crypto: qat - fix VF2PF work teardown race in adf_disable_sriov()
@ 2026-05-13 14:47 Giovanni Cabiddu
0 siblings, 0 replies; only message in thread
From: Giovanni Cabiddu @ 2026-05-13 14:47 UTC (permalink / raw)
To: herbert; +Cc: linux-crypto, qat-linux, Giovanni Cabiddu, stable, Ahsan Atta
The VF2PF interrupt handler queues PF-side response work that stores a
raw pointer to per-VF state (struct adf_accel_vf_info). Currently,
adf_disable_sriov() destroys per-VF mutexes and frees vf_info without
stopping new VF2PF work or waiting for in-flight workers to complete. A
concurrently scheduled or already queued worker can then dereference
freed memory.
This manifests as a use-after-free when KASAN is enabled:
BUG: KASAN: null-ptr-deref in mutex_lock+0x76/0xe0
Write of size 8 at addr 0000000000000260 by task kworker/24:2/...
Workqueue: qat_pf2vf_resp_wq adf_iov_send_resp [intel_qat]
Call Trace:
kasan_report+0x119/0x140
mutex_lock+0x76/0xe0
adf_gen4_pfvf_send+0xd4/0x1f0 [intel_qat]
adf_recv_and_handle_vf2pf_msg+0x290/0x360 [intel_qat]
adf_iov_send_resp+0x8c/0xe0 [intel_qat]
process_one_work+0x6ac/0xfd0
worker_thread+0x4dd/0xd30
kthread+0x326/0x410
ret_from_fork+0x33b/0x670
Add a PF-local flag, vf2pf_disabled, that gates work queueing, worker
processing, and interrupt re-enabling during teardown. Set this flag
atomically with the hardware interrupt mask inside
adf_disable_all_vf2pf_interrupts(). After masking, synchronize the AE
cluster MSI-X interrupt and flush the PF response workqueue before
tearing down per-VF locks and state so all in-flight work completes
before vf_info is destroyed.
Introduce adf_enable_all_vf2pf_interrupts() to clear the flag and
unmask all VF2PF interrupts under the same lock when SR-IOV is
re-enabled. This ensures the software flag and hardware state transition
atomically on both the enable and disable paths.
Cc: stable@vger.kernel.org
Fixes: ed8ccaef52fa ("crypto: qat - Add support for SRIOV")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
---
Changes since v1:
- Reworked the bail-out check in adf_enable_all_vf2pf_interrupts() to
compute vf_mask first and check it instead of num_vfs.
- Removed the unreachable kfree() fallback in adf_schedule_vf2pf_handler().
Since pf2vf_resp is freshly allocated and initialized via INIT_WORK(),
queue_work() is guaranteed to return true for a work_struct that has
never been queued.
- Replaced '>= 0' with '>0' after pci_irq_vector() to allow only for
strictly positive IRQ vectors.
.../intel/qat/qat_common/adf_accel_devices.h | 2 +
.../intel/qat/qat_common/adf_common_drv.h | 2 +
drivers/crypto/intel/qat/qat_common/adf_isr.c | 39 +++++++++++++++++++
.../crypto/intel/qat/qat_common/adf_sriov.c | 20 +++++++++-
4 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index 03a4e9690208..d9b2a1cf474e 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -480,6 +480,8 @@ struct adf_accel_dev {
struct {
/* protects VF2PF interrupts access */
spinlock_t vf2pf_ints_lock;
+ /* prevents VF2PF handling from racing with VF state teardown */
+ bool vf2pf_disabled;
/* vf_info is non-zero when SR-IOV is init'ed */
struct adf_accel_vf_info *vf_info;
} pf;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index a05d149423b0..b9188ea9aa72 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -110,6 +110,7 @@ void qat_comp_alg_callback(void *resp);
int adf_isr_resource_alloc(struct adf_accel_dev *accel_dev);
void adf_isr_resource_free(struct adf_accel_dev *accel_dev);
+void adf_isr_sync_ae_cluster(struct adf_accel_dev *accel_dev);
int adf_vf_isr_resource_alloc(struct adf_accel_dev *accel_dev);
void adf_vf_isr_resource_free(struct adf_accel_dev *accel_dev);
@@ -183,6 +184,7 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
void adf_reenable_sriov(struct adf_accel_dev *accel_dev);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask);
+void adf_enable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 num_vfs);
void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u32 vf_nr);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c
index 4639d7fd93e6..159e91a50106 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c
@@ -62,6 +62,23 @@ void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask)
unsigned long flags;
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ if (!READ_ONCE(accel_dev->pf.vf2pf_disabled))
+ GET_PFVF_OPS(accel_dev)->enable_vf2pf_interrupts(pmisc_addr, vf_mask);
+ spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
+}
+
+void adf_enable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 num_vfs)
+{
+ void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev);
+ unsigned long flags;
+ u32 vf_mask;
+
+ vf_mask = BIT_ULL(num_vfs) - 1;
+ if (!vf_mask)
+ return;
+
+ spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ WRITE_ONCE(accel_dev->pf.vf2pf_disabled, false);
GET_PFVF_OPS(accel_dev)->enable_vf2pf_interrupts(pmisc_addr, vf_mask);
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
@@ -72,6 +89,7 @@ void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev)
unsigned long flags;
spin_lock_irqsave(&accel_dev->pf.vf2pf_ints_lock, flags);
+ WRITE_ONCE(accel_dev->pf.vf2pf_disabled, true);
GET_PFVF_OPS(accel_dev)->disable_all_vf2pf_interrupts(pmisc_addr);
spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags);
}
@@ -174,6 +192,27 @@ static irqreturn_t adf_msix_isr_ae(int irq, void *dev_ptr)
return IRQ_NONE;
}
+void adf_isr_sync_ae_cluster(struct adf_accel_dev *accel_dev)
+{
+ struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ u32 num_entries = pci_dev_info->msix_entries.num_entries;
+ struct adf_irq *irqs = pci_dev_info->msix_entries.irqs;
+ u32 irq_idx;
+ int irq;
+
+ if (!test_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status) || !irqs)
+ return;
+
+ irq_idx = num_entries > 1 ? hw_data->num_banks : 0;
+ if (irq_idx >= num_entries || !irqs[irq_idx].enabled)
+ return;
+
+ irq = pci_irq_vector(pci_dev_info->pci_dev, hw_data->num_banks);
+ if (irq > 0)
+ synchronize_irq(irq);
+}
+
static void adf_free_irqs(struct adf_accel_dev *accel_dev)
{
struct adf_accel_pci *pci_dev_info = &accel_dev->accel_pci_dev;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c
index 8bf0fe1fcb4d..96939572109e 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c
@@ -26,6 +26,9 @@ static void adf_iov_send_resp(struct work_struct *work)
u32 vf_nr = vf_info->vf_nr;
bool ret;
+ if (READ_ONCE(accel_dev->pf.vf2pf_disabled))
+ goto out;
+
mutex_lock(&vf_info->pfvf_mig_lock);
ret = adf_recv_and_handle_vf2pf_msg(accel_dev, vf_nr);
if (ret)
@@ -33,13 +36,18 @@ static void adf_iov_send_resp(struct work_struct *work)
adf_enable_vf2pf_interrupts(accel_dev, 1 << vf_nr);
mutex_unlock(&vf_info->pfvf_mig_lock);
+out:
kfree(pf2vf_resp);
}
void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info)
{
+ struct adf_accel_dev *accel_dev = vf_info->accel_dev;
struct adf_pf2vf_resp *pf2vf_resp;
+ if (READ_ONCE(accel_dev->pf.vf2pf_disabled))
+ return;
+
pf2vf_resp = kzalloc_obj(*pf2vf_resp, GFP_ATOMIC);
if (!pf2vf_resp)
return;
@@ -49,6 +57,12 @@ void adf_schedule_vf2pf_handler(struct adf_accel_vf_info *vf_info)
queue_work(pf2vf_resp_wq, &pf2vf_resp->pf2vf_resp_work);
}
+static void adf_flush_pf2vf_resp_wq(void)
+{
+ if (pf2vf_resp_wq)
+ flush_workqueue(pf2vf_resp_wq);
+}
+
static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
{
struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
@@ -75,7 +89,7 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
hw_data->configure_iov_threads(accel_dev, true);
/* Enable VF to PF interrupts for all VFs */
- adf_enable_vf2pf_interrupts(accel_dev, BIT_ULL(totalvfs) - 1);
+ adf_enable_all_vf2pf_interrupts(accel_dev, totalvfs);
/*
* Due to the hardware design, when SR-IOV and the ring arbiter
@@ -248,8 +262,10 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
adf_pf2vf_wait_for_restarting_complete(accel_dev);
pci_disable_sriov(accel_to_pci_dev(accel_dev));
- /* Disable VF to PF interrupts */
+ /* Block VF2PF work and disable VF to PF interrupts */
adf_disable_all_vf2pf_interrupts(accel_dev);
+ adf_isr_sync_ae_cluster(accel_dev);
+ adf_flush_pf2vf_resp_wq();
/* Clear Valid bits in AE Thread to PCIe Function Mapping */
if (hw_data->configure_iov_threads)
base-commit: 1b0b8d04c100e162957c4615f6c37da0efbb6f91
--
2.54.0
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2026-05-13 14:49 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-13 14:47 [PATCH v2] crypto: qat - fix VF2PF work teardown race in adf_disable_sriov() Giovanni Cabiddu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox