* [PATCH 1/9] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config()
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:02 ` Dave Jiang
2025-08-05 1:27 ` [PATCH 2/9] dmaengine: idxd: Fix crash when the event log is disabled Vinicius Costa Gomes
` (7 subsequent siblings)
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
idxd_device_config() should only be called with idxd->dev_lock held.
Hold the lock to the calls that were missing.
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/init.c | 2 ++
drivers/dma/idxd/irq.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 35bdefd3728bb851beb0f235fae7c6d71bd59239..d828d352ab008127e5e442e7072c9d5df0f2c6cf 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -1091,7 +1091,9 @@ static void idxd_reset_done(struct pci_dev *pdev)
/* Re-configure IDXD device if allowed. */
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ spin_lock(&idxd->dev_lock);
rc = idxd_device_config(idxd);
+ spin_unlock(&idxd->dev_lock);
if (rc < 0) {
dev_err(dev, "HALT: %s config fails\n", idxd_name);
goto out;
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 1107db3ce0a3a65246bd0d9b1f96e99c9fa3def6..74059fe43fafeb930f58db21d3824f62b095b968 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -36,7 +36,9 @@ static void idxd_device_reinit(struct work_struct *work)
int rc, i;
idxd_device_reset(idxd);
+ spin_lock(&idxd->dev_lock);
rc = idxd_device_config(idxd);
+ spin_unlock(&idxd->dev_lock);
if (rc < 0)
goto out;
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 1/9] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config()
2025-08-05 1:27 ` [PATCH 1/9] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config() Vinicius Costa Gomes
@ 2025-08-06 17:02 ` Dave Jiang
2025-08-06 20:25 ` Vinicius Costa Gomes
0 siblings, 1 reply; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:02 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> idxd_device_config() should only be called with idxd->dev_lock held.
> Hold the lock to the calls that were missing.
>
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Patch looks fine. What about doing something like this:
---
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 5cf419fe6b46..06c182ec3c04 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -1103,11 +1103,15 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
return 0;
}
-int idxd_device_config(struct idxd_device *idxd)
+int idxd_device_config_locked(struct idxd_device *idxd)
{
int rc;
lockdep_assert_held(&idxd->dev_lock);
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
rc = idxd_wqs_setup(idxd);
if (rc < 0)
return rc;
@@ -1129,6 +1133,12 @@ int idxd_device_config(struct idxd_device *idxd)
return 0;
}
+int idxd_device_config(struct idxd_device *idxd)
+{
+ guard(spinlock)(&idxd->dev_lock);
+ return idxd_device_config_locked(idxd);
+}
+
static int idxd_wq_load_config(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
@@ -1434,11 +1444,7 @@ int idxd_drv_enable_wq(struct idxd_wq *wq)
}
}
- rc = 0;
- spin_lock(&idxd->dev_lock);
- if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
- rc = idxd_device_config(idxd);
- spin_unlock(&idxd->dev_lock);
+ rc = idxd_device_config(idxd);
if (rc < 0) {
dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
goto err;
@@ -1521,7 +1527,7 @@ EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD");
int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
{
struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
- int rc = 0;
+ int rc;
/*
* Device should be in disabled state for the idxd_drv to load. If it's in
@@ -1534,10 +1540,7 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
}
/* Device configuration */
- spin_lock(&idxd->dev_lock);
- if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
- rc = idxd_device_config(idxd);
- spin_unlock(&idxd->dev_lock);
+ rc = idxd_device_config(idxd);
if (rc < 0)
return -ENXIO;
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 74e6695881e6..f15bc2281c6b 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -760,6 +760,7 @@ int idxd_device_disable(struct idxd_device *idxd);
void idxd_device_reset(struct idxd_device *idxd);
void idxd_device_clear_state(struct idxd_device *idxd);
int idxd_device_config(struct idxd_device *idxd);
+int idxd_device_config_locked(struct idxd_device *idxd);
void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
int idxd_device_load_config(struct idxd_device *idxd);
int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 80355d03004d..193b9282e30f 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -1091,12 +1091,10 @@ static void idxd_reset_done(struct pci_dev *pdev)
idxd_device_config_restore(idxd, idxd->idxd_saved);
/* Re-configure IDXD device if allowed. */
- if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
- rc = idxd_device_config(idxd);
- if (rc < 0) {
- dev_err(dev, "HALT: %s config fails\n", idxd_name);
- goto out;
- }
+ rc = idxd_device_config(idxd);
+ if (rc < 0) {
+ dev_err(dev, "HALT: %s config fails\n", idxd_name);
+ goto out;
}
/* Bind IDXD device to driver. */
> ---
> drivers/dma/idxd/init.c | 2 ++
> drivers/dma/idxd/irq.c | 2 ++
> 2 files changed, 4 insertions(+)
>
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index 35bdefd3728bb851beb0f235fae7c6d71bd59239..d828d352ab008127e5e442e7072c9d5df0f2c6cf 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -1091,7 +1091,9 @@ static void idxd_reset_done(struct pci_dev *pdev)
>
> /* Re-configure IDXD device if allowed. */
> if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
> + spin_lock(&idxd->dev_lock);
> rc = idxd_device_config(idxd);
> + spin_unlock(&idxd->dev_lock);
> if (rc < 0) {
> dev_err(dev, "HALT: %s config fails\n", idxd_name);
> goto out;
> diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
> index 1107db3ce0a3a65246bd0d9b1f96e99c9fa3def6..74059fe43fafeb930f58db21d3824f62b095b968 100644
> --- a/drivers/dma/idxd/irq.c
> +++ b/drivers/dma/idxd/irq.c
> @@ -36,7 +36,9 @@ static void idxd_device_reinit(struct work_struct *work)
> int rc, i;
>
> idxd_device_reset(idxd);
> + spin_lock(&idxd->dev_lock);
> rc = idxd_device_config(idxd);
> + spin_unlock(&idxd->dev_lock);
> if (rc < 0)
> goto out;
>
>
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 1/9] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config()
2025-08-06 17:02 ` Dave Jiang
@ 2025-08-06 20:25 ` Vinicius Costa Gomes
0 siblings, 0 replies; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-06 20:25 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams; +Cc: dmaengine, linux-kernel
Dave Jiang <dave.jiang@intel.com> writes:
> On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
>> idxd_device_config() should only be called with idxd->dev_lock held.
>> Hold the lock to the calls that were missing.
>>
>> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
>
> Patch looks fine. What about doing something like this:
>
> ---
>
> diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
> index 5cf419fe6b46..06c182ec3c04 100644
> --- a/drivers/dma/idxd/device.c
> +++ b/drivers/dma/idxd/device.c
> @@ -1103,11 +1103,15 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
> return 0;
> }
>
> -int idxd_device_config(struct idxd_device *idxd)
> +int idxd_device_config_locked(struct idxd_device *idxd)
> {
> int rc;
>
> lockdep_assert_held(&idxd->dev_lock);
> +
> + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
> + return -EPERM;
> +
> rc = idxd_wqs_setup(idxd);
> if (rc < 0)
> return rc;
> @@ -1129,6 +1133,12 @@ int idxd_device_config(struct idxd_device *idxd)
> return 0;
> }
>
> +int idxd_device_config(struct idxd_device *idxd)
> +{
> + guard(spinlock)(&idxd->dev_lock);
> + return idxd_device_config_locked(idxd);
> +}
> +
This gave me the idea that moving the lock to idxd_device_config() might
be a better idea. As the _locked() variant doesn't seem to be used, I
don't see why we should keep it around. Will give it a try and see how
it looks.
Thanks.
> static int idxd_wq_load_config(struct idxd_wq *wq)
> {
> struct idxd_device *idxd = wq->idxd;
> @@ -1434,11 +1444,7 @@ int idxd_drv_enable_wq(struct idxd_wq *wq)
> }
> }
>
> - rc = 0;
> - spin_lock(&idxd->dev_lock);
> - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
> - rc = idxd_device_config(idxd);
> - spin_unlock(&idxd->dev_lock);
> + rc = idxd_device_config(idxd);
> if (rc < 0) {
> dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
> goto err;
> @@ -1521,7 +1527,7 @@ EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD");
> int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
> {
> struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
> - int rc = 0;
> + int rc;
>
> /*
> * Device should be in disabled state for the idxd_drv to load. If it's in
> @@ -1534,10 +1540,7 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
> }
>
> /* Device configuration */
> - spin_lock(&idxd->dev_lock);
> - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
> - rc = idxd_device_config(idxd);
> - spin_unlock(&idxd->dev_lock);
> + rc = idxd_device_config(idxd);
> if (rc < 0)
> return -ENXIO;
>
> diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
> index 74e6695881e6..f15bc2281c6b 100644
> --- a/drivers/dma/idxd/idxd.h
> +++ b/drivers/dma/idxd/idxd.h
> @@ -760,6 +760,7 @@ int idxd_device_disable(struct idxd_device *idxd);
> void idxd_device_reset(struct idxd_device *idxd);
> void idxd_device_clear_state(struct idxd_device *idxd);
> int idxd_device_config(struct idxd_device *idxd);
> +int idxd_device_config_locked(struct idxd_device *idxd);
> void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
> int idxd_device_load_config(struct idxd_device *idxd);
> int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index 80355d03004d..193b9282e30f 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -1091,12 +1091,10 @@ static void idxd_reset_done(struct pci_dev *pdev)
> idxd_device_config_restore(idxd, idxd->idxd_saved);
>
> /* Re-configure IDXD device if allowed. */
> - if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
> - rc = idxd_device_config(idxd);
> - if (rc < 0) {
> - dev_err(dev, "HALT: %s config fails\n", idxd_name);
> - goto out;
> - }
> + rc = idxd_device_config(idxd);
> + if (rc < 0) {
> + dev_err(dev, "HALT: %s config fails\n", idxd_name);
> + goto out;
> }
>
> /* Bind IDXD device to driver. */
>
>
>> ---
>> drivers/dma/idxd/init.c | 2 ++
>> drivers/dma/idxd/irq.c | 2 ++
>> 2 files changed, 4 insertions(+)
>>
>> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
>> index 35bdefd3728bb851beb0f235fae7c6d71bd59239..d828d352ab008127e5e442e7072c9d5df0f2c6cf 100644
>> --- a/drivers/dma/idxd/init.c
>> +++ b/drivers/dma/idxd/init.c
>> @@ -1091,7 +1091,9 @@ static void idxd_reset_done(struct pci_dev *pdev)
>>
>> /* Re-configure IDXD device if allowed. */
>> if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
>> + spin_lock(&idxd->dev_lock);
>> rc = idxd_device_config(idxd);
>> + spin_unlock(&idxd->dev_lock);
>> if (rc < 0) {
>> dev_err(dev, "HALT: %s config fails\n", idxd_name);
>> goto out;
>> diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
>> index 1107db3ce0a3a65246bd0d9b1f96e99c9fa3def6..74059fe43fafeb930f58db21d3824f62b095b968 100644
>> --- a/drivers/dma/idxd/irq.c
>> +++ b/drivers/dma/idxd/irq.c
>> @@ -36,7 +36,9 @@ static void idxd_device_reinit(struct work_struct *work)
>> int rc, i;
>>
>> idxd_device_reset(idxd);
>> + spin_lock(&idxd->dev_lock);
>> rc = idxd_device_config(idxd);
>> + spin_unlock(&idxd->dev_lock);
>> if (rc < 0)
>> goto out;
>>
>>
>
--
Vinicius
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 2/9] dmaengine: idxd: Fix crash when the event log is disabled
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
2025-08-05 1:27 ` [PATCH 1/9] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config() Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:07 ` Dave Jiang
2025-08-05 1:27 ` [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR Vinicius Costa Gomes
` (6 subsequent siblings)
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
If reporting errors to the event log is not supported by the hardware,
and an error that causes Field Level Reset (FLR) is received, the
driver will try to restore the event log even if it was not allocated.
Also, only try to free the event log if it was properly allocated.
Fixes: 6078a315aec1 ("dmaengine: idxd: Add idxd_device_config_save() and idxd_device_config_restore() helpers")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/device.c | 3 +++
drivers/dma/idxd/init.c | 3 ++-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 5cf419fe6b4645337cf361305ca066d34763b3c2..c599a902767ee9904d75a0510a911596e35a259b 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -815,6 +815,9 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
struct device *dev = &idxd->pdev->dev;
struct idxd_evl *evl = idxd->evl;
+ if (!evl)
+ return;
+
gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
if (!gencfg.evl_en)
return;
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index d828d352ab008127e5e442e7072c9d5df0f2c6cf..a58b8cdbfa60ba9f00b91a737df01517885bc41a 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -959,7 +959,8 @@ static void idxd_device_config_restore(struct idxd_device *idxd,
idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit;
- idxd->evl->size = saved_evl->size;
+ if (idxd->evl)
+ idxd->evl->size = saved_evl->size;
for (i = 0; i < idxd->max_groups; i++) {
struct idxd_group *saved_group, *group;
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 2/9] dmaengine: idxd: Fix crash when the event log is disabled
2025-08-05 1:27 ` [PATCH 2/9] dmaengine: idxd: Fix crash when the event log is disabled Vinicius Costa Gomes
@ 2025-08-06 17:07 ` Dave Jiang
0 siblings, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:07 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> If reporting errors to the event log is not supported by the hardware,
> and an error that causes Field Level Reset (FLR) is received, the
> driver will try to restore the event log even if it was not allocated.
>
> Also, only try to free the event log if it was properly allocated.
>
> Fixes: 6078a315aec1 ("dmaengine: idxd: Add idxd_device_config_save() and idxd_device_config_restore() helpers")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/device.c | 3 +++
> drivers/dma/idxd/init.c | 3 ++-
> 2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
> index 5cf419fe6b4645337cf361305ca066d34763b3c2..c599a902767ee9904d75a0510a911596e35a259b 100644
> --- a/drivers/dma/idxd/device.c
> +++ b/drivers/dma/idxd/device.c
> @@ -815,6 +815,9 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
> struct device *dev = &idxd->pdev->dev;
> struct idxd_evl *evl = idxd->evl;
>
> + if (!evl)
> + return;
> +
> gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
> if (!gencfg.evl_en)
> return;
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index d828d352ab008127e5e442e7072c9d5df0f2c6cf..a58b8cdbfa60ba9f00b91a737df01517885bc41a 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -959,7 +959,8 @@ static void idxd_device_config_restore(struct idxd_device *idxd,
>
> idxd->rdbuf_limit = idxd_saved->saved_idxd.rdbuf_limit;
>
> - idxd->evl->size = saved_evl->size;
> + if (idxd->evl)
> + idxd->evl->size = saved_evl->size;
>
> for (i = 0; i < idxd->max_groups; i++) {
> struct idxd_group *saved_group, *group;
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
2025-08-05 1:27 ` [PATCH 1/9] dmaengine: idxd: Fix lockdep warnings when calling idxd_device_config() Vinicius Costa Gomes
2025-08-05 1:27 ` [PATCH 2/9] dmaengine: idxd: Fix crash when the event log is disabled Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:09 ` Dave Jiang
2025-08-15 19:26 ` Nathan Lynch
2025-08-05 1:27 ` [PATCH 4/9] dmaengine: idxd: Flush kernel workqueues on Field Level Reset Vinicius Costa Gomes
` (5 subsequent siblings)
8 siblings, 2 replies; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
In the case that the first Field Level Reset (FLR) concludes
correctly, but in the second FLR the scratch area for the saved
configuration cannot be allocated, it's possible for a invalid memory
access to happen.
Always set the deallocated scratch area to NULL after FLR completes.
Fixes: 98d187a98903 ("dmaengine: idxd: Enable Function Level Reset (FLR) for halt")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/init.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index a58b8cdbfa60ba9f00b91a737df01517885bc41a..31e00af136a7e13887d3ffd00efbb05864712a80 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -1136,6 +1136,7 @@ static void idxd_reset_done(struct pci_dev *pdev)
}
out:
kfree(idxd->idxd_saved);
+ idxd->idxd_saved = NULL;
}
static const struct pci_error_handlers idxd_error_handler = {
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR
2025-08-05 1:27 ` [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR Vinicius Costa Gomes
@ 2025-08-06 17:09 ` Dave Jiang
2025-08-15 19:26 ` Nathan Lynch
1 sibling, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:09 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> In the case that the first Field Level Reset (FLR) concludes
> correctly, but in the second FLR the scratch area for the saved
> configuration cannot be allocated, it's possible for a invalid memory
> access to happen.
>
> Always set the deallocated scratch area to NULL after FLR completes.
>
> Fixes: 98d187a98903 ("dmaengine: idxd: Enable Function Level Reset (FLR) for halt")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/init.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index a58b8cdbfa60ba9f00b91a737df01517885bc41a..31e00af136a7e13887d3ffd00efbb05864712a80 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -1136,6 +1136,7 @@ static void idxd_reset_done(struct pci_dev *pdev)
> }
> out:
> kfree(idxd->idxd_saved);
> + idxd->idxd_saved = NULL;
> }
>
> static const struct pci_error_handlers idxd_error_handler = {
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR
2025-08-05 1:27 ` [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR Vinicius Costa Gomes
2025-08-06 17:09 ` Dave Jiang
@ 2025-08-15 19:26 ` Nathan Lynch
2025-08-15 22:45 ` Vinicius Costa Gomes
1 sibling, 1 reply; 23+ messages in thread
From: Nathan Lynch @ 2025-08-15 19:26 UTC (permalink / raw)
To: Vinicius Costa Gomes, Dave Jiang, Vinod Koul, Fenghua Yu,
Dan Williams
Cc: dmaengine, linux-kernel
Vinicius Costa Gomes <vinicius.gomes@intel.com> writes:
> In the case that the first Field Level Reset (FLR) concludes
I think you mean Function Level Reset? (here and in other changes in the
series)
> correctly, but in the second FLR the scratch area for the saved
> configuration cannot be allocated, it's possible for a invalid memory
> access to happen.
>
> Always set the deallocated scratch area to NULL after FLR completes.
>
> Fixes: 98d187a98903 ("dmaengine: idxd: Enable Function Level Reset (FLR) for halt")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> ---
> drivers/dma/idxd/init.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index a58b8cdbfa60ba9f00b91a737df01517885bc41a..31e00af136a7e13887d3ffd00efbb05864712a80 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -1136,6 +1136,7 @@ static void idxd_reset_done(struct pci_dev *pdev)
> }
> out:
> kfree(idxd->idxd_saved);
> + idxd->idxd_saved = NULL;
> }
>
> static const struct pci_error_handlers idxd_error_handler = {
>
> --
> 2.50.1
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR
2025-08-15 19:26 ` Nathan Lynch
@ 2025-08-15 22:45 ` Vinicius Costa Gomes
0 siblings, 0 replies; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-15 22:45 UTC (permalink / raw)
To: Nathan Lynch, Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel
Nathan Lynch <nathan.lynch@amd.com> writes:
> Vinicius Costa Gomes <vinicius.gomes@intel.com> writes:
>
>> In the case that the first Field Level Reset (FLR) concludes
>
> I think you mean Function Level Reset? (here and in other changes in the
> series)
>
Ugh, thanks for noticing that. Don't know what happened that I messed
that up. Will fix.
Cheers,
--
Vinicius
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 4/9] dmaengine: idxd: Flush kernel workqueues on Field Level Reset
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
` (2 preceding siblings ...)
2025-08-05 1:27 ` [PATCH 3/9] dmaengine: idxd: Fix possible invalid memory access after FLR Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:12 ` Dave Jiang
2025-08-05 1:27 ` [PATCH 5/9] dmaengine: idxd: Allow DMA clients to empty the pending queue Vinicius Costa Gomes
` (4 subsequent siblings)
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
When a Field Level Reset (FLR) happens terminate the pending
descriptors that were issued by in-kernel users and disable the
interrupts associated with those. They will be re-enabled after FLR
finishes.
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/device.c | 24 ++++++++++++++++++++++++
drivers/dma/idxd/idxd.h | 1 +
drivers/dma/idxd/irq.c | 5 +++++
3 files changed, 30 insertions(+)
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index c599a902767ee9904d75a0510a911596e35a259b..287cf3bf1f5a2efdc9037968e9a4eed506e489c3 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -1315,6 +1315,11 @@ void idxd_wq_free_irq(struct idxd_wq *wq)
free_irq(ie->vector, ie);
idxd_flush_pending_descs(ie);
+
+ /* The interrupt might have been already released by FLR */
+ if (ie->int_handle == INVALID_INT_HANDLE)
+ return;
+
if (idxd->request_int_handles)
idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
idxd_device_clear_perm_entry(idxd, ie);
@@ -1323,6 +1328,25 @@ void idxd_wq_free_irq(struct idxd_wq *wq)
ie->pasid = IOMMU_PASID_INVALID;
}
+void idxd_wqs_flush_descs(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) {
+ struct idxd_irq_entry *ie = &wq->ie;
+
+ idxd_flush_pending_descs(ie);
+ if (idxd->request_int_handles)
+ idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
+ idxd_device_clear_perm_entry(idxd, ie);
+ ie->int_handle = INVALID_INT_HANDLE;
+ }
+ }
+}
+
int idxd_wq_request_irq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 74e6695881e6f1684512601ca2c2ee241aaf0a78..6ccca3c56556dbffe0a7c983a2f11f6c73ff2bfd 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -737,6 +737,7 @@ static inline void idxd_desc_complete(struct idxd_desc *desc,
int idxd_register_devices(struct idxd_device *idxd);
void idxd_unregister_devices(struct idxd_device *idxd);
void idxd_wqs_quiesce(struct idxd_device *idxd);
+void idxd_wqs_flush_descs(struct idxd_device *idxd);
bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count);
int idxd_load_iaa_device_defaults(struct idxd_device *idxd);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 74059fe43fafeb930f58db21d3824f62b095b968..26547586fcfaa1b9d244b678bf8e209b7b14d35a 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -417,6 +417,11 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd)
} else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) {
idxd->state = IDXD_DEV_HALTED;
idxd_mask_error_interrupts(idxd);
+ /* Flush all pending descriptors, and disable
+ * interrupts, they will be re-enabled when FLR
+ * concludes.
+ */
+ idxd_wqs_flush_descs(idxd);
dev_dbg(&idxd->pdev->dev,
"idxd halted, doing FLR. After FLR, configs are restored\n");
INIT_WORK(&idxd->work, idxd_device_flr);
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 4/9] dmaengine: idxd: Flush kernel workqueues on Field Level Reset
2025-08-05 1:27 ` [PATCH 4/9] dmaengine: idxd: Flush kernel workqueues on Field Level Reset Vinicius Costa Gomes
@ 2025-08-06 17:12 ` Dave Jiang
0 siblings, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:12 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> When a Field Level Reset (FLR) happens terminate the pending
> descriptors that were issued by in-kernel users and disable the
> interrupts associated with those. They will be re-enabled after FLR
> finishes.
>
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/device.c | 24 ++++++++++++++++++++++++
> drivers/dma/idxd/idxd.h | 1 +
> drivers/dma/idxd/irq.c | 5 +++++
> 3 files changed, 30 insertions(+)
>
> diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
> index c599a902767ee9904d75a0510a911596e35a259b..287cf3bf1f5a2efdc9037968e9a4eed506e489c3 100644
> --- a/drivers/dma/idxd/device.c
> +++ b/drivers/dma/idxd/device.c
> @@ -1315,6 +1315,11 @@ void idxd_wq_free_irq(struct idxd_wq *wq)
>
> free_irq(ie->vector, ie);
> idxd_flush_pending_descs(ie);
> +
> + /* The interrupt might have been already released by FLR */
> + if (ie->int_handle == INVALID_INT_HANDLE)
> + return;
> +
> if (idxd->request_int_handles)
> idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
> idxd_device_clear_perm_entry(idxd, ie);
> @@ -1323,6 +1328,25 @@ void idxd_wq_free_irq(struct idxd_wq *wq)
> ie->pasid = IOMMU_PASID_INVALID;
> }
>
> +void idxd_wqs_flush_descs(struct idxd_device *idxd)
> +{
> + struct idxd_wq *wq;
> + int i;
> +
> + for (i = 0; i < idxd->max_wqs; i++) {
> + wq = idxd->wqs[i];
> + if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL) {
> + struct idxd_irq_entry *ie = &wq->ie;
> +
> + idxd_flush_pending_descs(ie);
> + if (idxd->request_int_handles)
> + idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
> + idxd_device_clear_perm_entry(idxd, ie);
> + ie->int_handle = INVALID_INT_HANDLE;
> + }
> + }
> +}
> +
> int idxd_wq_request_irq(struct idxd_wq *wq)
> {
> struct idxd_device *idxd = wq->idxd;
> diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
> index 74e6695881e6f1684512601ca2c2ee241aaf0a78..6ccca3c56556dbffe0a7c983a2f11f6c73ff2bfd 100644
> --- a/drivers/dma/idxd/idxd.h
> +++ b/drivers/dma/idxd/idxd.h
> @@ -737,6 +737,7 @@ static inline void idxd_desc_complete(struct idxd_desc *desc,
> int idxd_register_devices(struct idxd_device *idxd);
> void idxd_unregister_devices(struct idxd_device *idxd);
> void idxd_wqs_quiesce(struct idxd_device *idxd);
> +void idxd_wqs_flush_descs(struct idxd_device *idxd);
> bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
> void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count);
> int idxd_load_iaa_device_defaults(struct idxd_device *idxd);
> diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
> index 74059fe43fafeb930f58db21d3824f62b095b968..26547586fcfaa1b9d244b678bf8e209b7b14d35a 100644
> --- a/drivers/dma/idxd/irq.c
> +++ b/drivers/dma/idxd/irq.c
> @@ -417,6 +417,11 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd)
> } else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) {
> idxd->state = IDXD_DEV_HALTED;
> idxd_mask_error_interrupts(idxd);
> + /* Flush all pending descriptors, and disable
> + * interrupts, they will be re-enabled when FLR
> + * concludes.
> + */
> + idxd_wqs_flush_descs(idxd);
> dev_dbg(&idxd->pdev->dev,
> "idxd halted, doing FLR. After FLR, configs are restored\n");
> INIT_WORK(&idxd->work, idxd_device_flr);
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 5/9] dmaengine: idxd: Allow DMA clients to empty the pending queue
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
` (3 preceding siblings ...)
2025-08-05 1:27 ` [PATCH 4/9] dmaengine: idxd: Flush kernel workqueues on Field Level Reset Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:17 ` Dave Jiang
2025-08-05 1:27 ` [PATCH 6/9] dmaengine: idxd: Fix not releasing workqueue on .release() Vinicius Costa Gomes
` (3 subsequent siblings)
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
Send a request to drain all pending commands from the hardware queue
when the DMA clients request.
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/dma.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index dbecd699237e3ac5a73b49ed2097a897abc9a043..10356a00cbdfc2ddfeea629aa749c40e7eec0a56 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -194,6 +194,15 @@ static void idxd_dma_release(struct dma_device *device)
kfree(idxd_dma);
}
+static int idxd_dma_terminate_all(struct dma_chan *c)
+{
+ struct idxd_wq *wq = to_idxd_wq(c);
+
+ idxd_wq_drain(wq);
+
+ return 0;
+}
+
int idxd_register_dma_device(struct idxd_device *idxd)
{
struct idxd_dma_dev *idxd_dma;
@@ -224,6 +233,7 @@ int idxd_register_dma_device(struct idxd_device *idxd)
dma->device_issue_pending = idxd_dma_issue_pending;
dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
dma->device_free_chan_resources = idxd_dma_free_chan_resources;
+ dma->device_terminate_all = idxd_dma_terminate_all;
rc = dma_async_device_register(dma);
if (rc < 0) {
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 5/9] dmaengine: idxd: Allow DMA clients to empty the pending queue
2025-08-05 1:27 ` [PATCH 5/9] dmaengine: idxd: Allow DMA clients to empty the pending queue Vinicius Costa Gomes
@ 2025-08-06 17:17 ` Dave Jiang
2025-08-06 20:30 ` Vinicius Costa Gomes
0 siblings, 1 reply; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:17 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> Send a request to drain all pending commands from the hardware queue
> when the DMA clients request.
>
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
> ---
> drivers/dma/idxd/dma.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
> index dbecd699237e3ac5a73b49ed2097a897abc9a043..10356a00cbdfc2ddfeea629aa749c40e7eec0a56 100644
> --- a/drivers/dma/idxd/dma.c
> +++ b/drivers/dma/idxd/dma.c
> @@ -194,6 +194,15 @@ static void idxd_dma_release(struct dma_device *device)
> kfree(idxd_dma);
> }
>
> +static int idxd_dma_terminate_all(struct dma_chan *c)
> +{
> + struct idxd_wq *wq = to_idxd_wq(c);
> +
> + idxd_wq_drain(wq);
Definition in include/linux/dmaengine.h is "Aborts all transfers on a channel." So instead of drain, I think we need to abort and clean up all the pending descriptors on the irq list as well. Perhaps drain may be only for ->device_synchronize()?
DJ
> +
> + return 0;
> +}
> +
> int idxd_register_dma_device(struct idxd_device *idxd)
> {
> struct idxd_dma_dev *idxd_dma;
> @@ -224,6 +233,7 @@ int idxd_register_dma_device(struct idxd_device *idxd)
> dma->device_issue_pending = idxd_dma_issue_pending;
> dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
> dma->device_free_chan_resources = idxd_dma_free_chan_resources;
> + dma->device_terminate_all = idxd_dma_terminate_all;
>
> rc = dma_async_device_register(dma);
> if (rc < 0) {
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/9] dmaengine: idxd: Allow DMA clients to empty the pending queue
2025-08-06 17:17 ` Dave Jiang
@ 2025-08-06 20:30 ` Vinicius Costa Gomes
0 siblings, 0 replies; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-06 20:30 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Dan Williams, Fenghua Yu; +Cc: dmaengine, linux-kernel
Dave Jiang <dave.jiang@intel.com> writes:
> On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
>> Send a request to drain all pending commands from the hardware queue
>> when the DMA clients request.
>>
>> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
>> ---
>> drivers/dma/idxd/dma.c | 10 ++++++++++
>> 1 file changed, 10 insertions(+)
>>
>> diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
>> index dbecd699237e3ac5a73b49ed2097a897abc9a043..10356a00cbdfc2ddfeea629aa749c40e7eec0a56 100644
>> --- a/drivers/dma/idxd/dma.c
>> +++ b/drivers/dma/idxd/dma.c
>> @@ -194,6 +194,15 @@ static void idxd_dma_release(struct dma_device *device)
>> kfree(idxd_dma);
>> }
>>
>> +static int idxd_dma_terminate_all(struct dma_chan *c)
>> +{
>> + struct idxd_wq *wq = to_idxd_wq(c);
>> +
>> + idxd_wq_drain(wq);
>
> Definition in include/linux/dmaengine.h is "Aborts all transfers on a
> channel." So instead of drain, I think we need to abort and clean up
> all the pending descriptors on the irq list as well. Perhaps drain may
> be only for ->device_synchronize()?
I was considering more the documentation in dmaengine_terminate_async().
But will check again for the expectations of the "core" dmaengine, I
could be missing something. Thanks for checking this.
>
> DJ
>
>> +
>> + return 0;
>> +}
>> +
>> int idxd_register_dma_device(struct idxd_device *idxd)
>> {
>> struct idxd_dma_dev *idxd_dma;
>> @@ -224,6 +233,7 @@ int idxd_register_dma_device(struct idxd_device *idxd)
>> dma->device_issue_pending = idxd_dma_issue_pending;
>> dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
>> dma->device_free_chan_resources = idxd_dma_free_chan_resources;
>> + dma->device_terminate_all = idxd_dma_terminate_all;
>>
>> rc = dma_async_device_register(dma);
>> if (rc < 0) {
>>
>
Cheers,
--
Vinicius
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 6/9] dmaengine: idxd: Fix not releasing workqueue on .release()
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
` (4 preceding siblings ...)
2025-08-05 1:27 ` [PATCH 5/9] dmaengine: idxd: Allow DMA clients to empty the pending queue Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:24 ` Dave Jiang
2025-08-05 1:27 ` [PATCH 7/9] dmaengine: idxd: Fix memory leak when a wq is reset Vinicius Costa Gomes
` (2 subsequent siblings)
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
The workqueue associated with an DSA/IAA device is not released when
the object is freed.
Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/sysfs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 9f0701021af0e6fbe3c3c04a0e336ee9cd094641..cdd7a59140d90c80f5837473962017114cd00b13 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1812,6 +1812,7 @@ static void idxd_conf_device_release(struct device *dev)
{
struct idxd_device *idxd = confdev_to_idxd(dev);
+ destroy_workqueue(idxd->wq);
kfree(idxd->groups);
bitmap_free(idxd->wq_enable_map);
kfree(idxd->wqs);
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 6/9] dmaengine: idxd: Fix not releasing workqueue on .release()
2025-08-05 1:27 ` [PATCH 6/9] dmaengine: idxd: Fix not releasing workqueue on .release() Vinicius Costa Gomes
@ 2025-08-06 17:24 ` Dave Jiang
0 siblings, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:24 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> The workqueue associated with an DSA/IAA device is not released when
> the object is freed.
>
> Fixes: 47c16ac27d4c ("dmaengine: idxd: fix idxd conf_dev 'struct device' lifetime")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/sysfs.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
> index 9f0701021af0e6fbe3c3c04a0e336ee9cd094641..cdd7a59140d90c80f5837473962017114cd00b13 100644
> --- a/drivers/dma/idxd/sysfs.c
> +++ b/drivers/dma/idxd/sysfs.c
> @@ -1812,6 +1812,7 @@ static void idxd_conf_device_release(struct device *dev)
> {
> struct idxd_device *idxd = confdev_to_idxd(dev);
>
> + destroy_workqueue(idxd->wq);
> kfree(idxd->groups);
> bitmap_free(idxd->wq_enable_map);
> kfree(idxd->wqs);
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 7/9] dmaengine: idxd: Fix memory leak when a wq is reset
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
` (5 preceding siblings ...)
2025-08-05 1:27 ` [PATCH 6/9] dmaengine: idxd: Fix not releasing workqueue on .release() Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:25 ` Dave Jiang
2025-08-05 1:27 ` [PATCH 8/9] dmaengine: idxd: Fix freeing the allocated ida too late Vinicius Costa Gomes
2025-08-05 1:28 ` [PATCH 9/9] dmaengine: idxd: Fix leaking event log memory Vinicius Costa Gomes
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
idxd_wq_disable_cleanup() which is called from the reset path for a
workqueue, sets the wq type to NONE, which for other parts of the
driver mean that the wq is empty (all its resources were released).
Only set the wq type to NONE after its resources are released.
Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/device.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 287cf3bf1f5a2efdc9037968e9a4eed506e489c3..8f6afcba840ed7128259ad6b58b2fd967b0c151c 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -174,6 +174,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq)
free_descs(wq);
dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
sbitmap_queue_free(&wq->sbq);
+ wq->type = IDXD_WQT_NONE;
}
EXPORT_SYMBOL_NS_GPL(idxd_wq_free_resources, "IDXD");
@@ -367,7 +368,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
lockdep_assert_held(&wq->wq_lock);
wq->state = IDXD_WQ_DISABLED;
memset(wq->wqcfg, 0, idxd->wqcfg_size);
- wq->type = IDXD_WQT_NONE;
wq->threshold = 0;
wq->priority = 0;
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
@@ -1540,7 +1540,6 @@ void idxd_drv_disable_wq(struct idxd_wq *wq)
idxd_wq_reset(wq);
idxd_wq_free_resources(wq);
percpu_ref_exit(&wq->wq_active);
- wq->type = IDXD_WQT_NONE;
wq->client_count = 0;
}
EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD");
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 7/9] dmaengine: idxd: Fix memory leak when a wq is reset
2025-08-05 1:27 ` [PATCH 7/9] dmaengine: idxd: Fix memory leak when a wq is reset Vinicius Costa Gomes
@ 2025-08-06 17:25 ` Dave Jiang
0 siblings, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:25 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> idxd_wq_disable_cleanup() which is called from the reset path for a
> workqueue, sets the wq type to NONE, which for other parts of the
> driver mean that the wq is empty (all its resources were released).
>
> Only set the wq type to NONE after its resources are released.
>
> Fixes: da32b28c95a7 ("dmaengine: idxd: cleanup workqueue config after disabling")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/device.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
> index 287cf3bf1f5a2efdc9037968e9a4eed506e489c3..8f6afcba840ed7128259ad6b58b2fd967b0c151c 100644
> --- a/drivers/dma/idxd/device.c
> +++ b/drivers/dma/idxd/device.c
> @@ -174,6 +174,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq)
> free_descs(wq);
> dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
> sbitmap_queue_free(&wq->sbq);
> + wq->type = IDXD_WQT_NONE;
> }
> EXPORT_SYMBOL_NS_GPL(idxd_wq_free_resources, "IDXD");
>
> @@ -367,7 +368,6 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
> lockdep_assert_held(&wq->wq_lock);
> wq->state = IDXD_WQ_DISABLED;
> memset(wq->wqcfg, 0, idxd->wqcfg_size);
> - wq->type = IDXD_WQT_NONE;
> wq->threshold = 0;
> wq->priority = 0;
> wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
> @@ -1540,7 +1540,6 @@ void idxd_drv_disable_wq(struct idxd_wq *wq)
> idxd_wq_reset(wq);
> idxd_wq_free_resources(wq);
> percpu_ref_exit(&wq->wq_active);
> - wq->type = IDXD_WQT_NONE;
> wq->client_count = 0;
> }
> EXPORT_SYMBOL_NS_GPL(idxd_drv_disable_wq, "IDXD");
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 8/9] dmaengine: idxd: Fix freeing the allocated ida too late
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
` (6 preceding siblings ...)
2025-08-05 1:27 ` [PATCH 7/9] dmaengine: idxd: Fix memory leak when a wq is reset Vinicius Costa Gomes
@ 2025-08-05 1:27 ` Vinicius Costa Gomes
2025-08-06 17:27 ` Dave Jiang
2025-08-05 1:28 ` [PATCH 9/9] dmaengine: idxd: Fix leaking event log memory Vinicius Costa Gomes
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:27 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
It can happen that when the cdev .release() is called, the driver
already called ida_destroy(). Move ida_free() to the _del() path.
We see with DEBUG_KOBJECT_RELEASE enabled and forcing an early PCI
unbind.
Fixes: 04922b7445a1 ("dmaengine: idxd: fix cdev setup and free device lifetime issues")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/cdev.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 7e4715f927732702416d917f34ab0a83f575d533..4105688cf3f060704b851ee17467c135c170326e 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -158,11 +158,7 @@ static const struct device_type idxd_cdev_file_type = {
static void idxd_cdev_dev_release(struct device *dev)
{
struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
- struct idxd_cdev_context *cdev_ctx;
- struct idxd_wq *wq = idxd_cdev->wq;
- cdev_ctx = &ictx[wq->idxd->data->type];
- ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
kfree(idxd_cdev);
}
@@ -582,11 +578,15 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
void idxd_wq_del_cdev(struct idxd_wq *wq)
{
+ struct idxd_cdev_context *cdev_ctx;
struct idxd_cdev *idxd_cdev;
idxd_cdev = wq->idxd_cdev;
wq->idxd_cdev = NULL;
cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
put_device(cdev_dev(idxd_cdev));
}
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 8/9] dmaengine: idxd: Fix freeing the allocated ida too late
2025-08-05 1:27 ` [PATCH 8/9] dmaengine: idxd: Fix freeing the allocated ida too late Vinicius Costa Gomes
@ 2025-08-06 17:27 ` Dave Jiang
0 siblings, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:27 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:27 PM, Vinicius Costa Gomes wrote:
> It can happen that when the cdev .release() is called, the driver
> already called ida_destroy(). Move ida_free() to the _del() path.
>
> We see with DEBUG_KOBJECT_RELEASE enabled and forcing an early PCI
> unbind.
>
> Fixes: 04922b7445a1 ("dmaengine: idxd: fix cdev setup and free device lifetime issues")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/cdev.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
> index 7e4715f927732702416d917f34ab0a83f575d533..4105688cf3f060704b851ee17467c135c170326e 100644
> --- a/drivers/dma/idxd/cdev.c
> +++ b/drivers/dma/idxd/cdev.c
> @@ -158,11 +158,7 @@ static const struct device_type idxd_cdev_file_type = {
> static void idxd_cdev_dev_release(struct device *dev)
> {
> struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
> - struct idxd_cdev_context *cdev_ctx;
> - struct idxd_wq *wq = idxd_cdev->wq;
>
> - cdev_ctx = &ictx[wq->idxd->data->type];
> - ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
> kfree(idxd_cdev);
> }
>
> @@ -582,11 +578,15 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
>
> void idxd_wq_del_cdev(struct idxd_wq *wq)
> {
> + struct idxd_cdev_context *cdev_ctx;
> struct idxd_cdev *idxd_cdev;
>
> idxd_cdev = wq->idxd_cdev;
> wq->idxd_cdev = NULL;
> cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
> +
> + cdev_ctx = &ictx[wq->idxd->data->type];
> + ida_free(&cdev_ctx->minor_ida, idxd_cdev->minor);
> put_device(cdev_dev(idxd_cdev));
> }
>
>
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 9/9] dmaengine: idxd: Fix leaking event log memory
2025-08-05 1:27 [PATCH 0/9] dmaengine: idxd: Memory leak and FLR fixes Vinicius Costa Gomes
` (7 preceding siblings ...)
2025-08-05 1:27 ` [PATCH 8/9] dmaengine: idxd: Fix freeing the allocated ida too late Vinicius Costa Gomes
@ 2025-08-05 1:28 ` Vinicius Costa Gomes
2025-08-06 17:29 ` Dave Jiang
8 siblings, 1 reply; 23+ messages in thread
From: Vinicius Costa Gomes @ 2025-08-05 1:28 UTC (permalink / raw)
To: Dave Jiang, Vinod Koul, Fenghua Yu, Dan Williams
Cc: dmaengine, linux-kernel, Vinicius Costa Gomes
During the device remove process, the device is reset, causing the
configuration registers to go back to their default state, which is
zero. As the driver is checking if the event log support was enabled
before deallocating, it will fail if a reset happened before.
Do not check if the support was enabled, the check for 'idxd->evl'
being valid (only allocated if the HW capability is available) is
enough.
Fixes: 244da66cda35 ("dmaengine: idxd: setup event log configuration")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
---
drivers/dma/idxd/device.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 8f6afcba840ed7128259ad6b58b2fd967b0c151c..288cfd85f3a91f40ce2f8d8150830ad0628eacbe 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -818,10 +818,6 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
if (!evl)
return;
- gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
- if (!gencfg.evl_en)
- return;
-
mutex_lock(&evl->lock);
gencfg.evl_en = 0;
iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
--
2.50.1
^ permalink raw reply related [flat|nested] 23+ messages in thread
* Re: [PATCH 9/9] dmaengine: idxd: Fix leaking event log memory
2025-08-05 1:28 ` [PATCH 9/9] dmaengine: idxd: Fix leaking event log memory Vinicius Costa Gomes
@ 2025-08-06 17:29 ` Dave Jiang
0 siblings, 0 replies; 23+ messages in thread
From: Dave Jiang @ 2025-08-06 17:29 UTC (permalink / raw)
To: Vinicius Costa Gomes, Vinod Koul, Dan Williams, Fenghua Yu
Cc: dmaengine, linux-kernel
On 8/4/25 6:28 PM, Vinicius Costa Gomes wrote:
> During the device remove process, the device is reset, causing the
> configuration registers to go back to their default state, which is
> zero. As the driver is checking if the event log support was enabled
> before deallocating, it will fail if a reset happened before.
>
> Do not check if the support was enabled, the check for 'idxd->evl'
> being valid (only allocated if the HW capability is available) is
> enough.
>
> Fixes: 244da66cda35 ("dmaengine: idxd: setup event log configuration")
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/dma/idxd/device.c | 4 ----
> 1 file changed, 4 deletions(-)
>
> diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
> index 8f6afcba840ed7128259ad6b58b2fd967b0c151c..288cfd85f3a91f40ce2f8d8150830ad0628eacbe 100644
> --- a/drivers/dma/idxd/device.c
> +++ b/drivers/dma/idxd/device.c
> @@ -818,10 +818,6 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
> if (!evl)
> return;
>
> - gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
> - if (!gencfg.evl_en)
> - return;
> -
> mutex_lock(&evl->lock);
> gencfg.evl_en = 0;
> iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
>
^ permalink raw reply [flat|nested] 23+ messages in thread