* [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters
2025-12-25 4:29 [PATCH V2 0/4] nvdimm: virtio_pmem: fix request lifetime and converge broken queue failures Li Chen
@ 2025-12-25 4:29 ` Li Chen
2026-01-02 12:29 ` Pankaj Gupta
2025-12-25 4:29 ` [PATCH V2 2/5] nvdimm: virtio_pmem: refcount requests for token lifetime Li Chen
` (3 subsequent siblings)
4 siblings, 1 reply; 8+ messages in thread
From: Li Chen @ 2025-12-25 4:29 UTC (permalink / raw)
To: Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny, Pankaj Gupta,
nvdimm, virtualization, linux-kernel
Cc: Li Chen
virtio_pmem_host_ack() reclaims virtqueue descriptors with
virtqueue_get_buf(). The -ENOSPC waiter wakeup is tied to completing the
returned token.
If token completion is skipped for any reason, reclaimed descriptors may
not wake a waiter and the submitter may sleep forever waiting for a free
slot.
Always wake one -ENOSPC waiter for each virtqueue completion before
touching the returned token.
Use READ_ONCE()/WRITE_ONCE() for the wait_event() flags (done and
wq_buf_avail). They are observed by waiters without pmem_lock, so make
the accesses explicit single loads/stores and avoid compiler
reordering/caching across the wait/wake paths.
Signed-off-by: Li Chen <me@linux.beauty>
---
drivers/nvdimm/nd_virtio.c | 35 +++++++++++++++++++++--------------
1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index c3f07be4aa22..6f9890361d0b 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -9,26 +9,33 @@
#include "virtio_pmem.h"
#include "nd.h"
+static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
+{
+ struct virtio_pmem_request *req_buf;
+
+ if (list_empty(&vpmem->req_list))
+ return;
+
+ req_buf = list_first_entry(&vpmem->req_list,
+ struct virtio_pmem_request, list);
+ list_del_init(&req_buf->list);
+ WRITE_ONCE(req_buf->wq_buf_avail, true);
+ wake_up(&req_buf->wq_buf);
+}
+
/* The interrupt handler */
void virtio_pmem_host_ack(struct virtqueue *vq)
{
struct virtio_pmem *vpmem = vq->vdev->priv;
- struct virtio_pmem_request *req_data, *req_buf;
+ struct virtio_pmem_request *req_data;
unsigned long flags;
unsigned int len;
spin_lock_irqsave(&vpmem->pmem_lock, flags);
while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
- req_data->done = true;
+ virtio_pmem_wake_one_waiter(vpmem);
+ WRITE_ONCE(req_data->done, true);
wake_up(&req_data->host_acked);
-
- if (!list_empty(&vpmem->req_list)) {
- req_buf = list_first_entry(&vpmem->req_list,
- struct virtio_pmem_request, list);
- req_buf->wq_buf_avail = true;
- wake_up(&req_buf->wq_buf);
- list_del(&req_buf->list);
- }
}
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
}
@@ -58,7 +65,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
if (!req_data)
return -ENOMEM;
- req_data->done = false;
+ WRITE_ONCE(req_data->done, false);
init_waitqueue_head(&req_data->host_acked);
init_waitqueue_head(&req_data->wq_buf);
INIT_LIST_HEAD(&req_data->list);
@@ -79,12 +86,12 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
GFP_ATOMIC)) == -ENOSPC) {
dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
- req_data->wq_buf_avail = false;
+ WRITE_ONCE(req_data->wq_buf_avail, false);
list_add_tail(&req_data->list, &vpmem->req_list);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
/* A host response results in "host_ack" getting called */
- wait_event(req_data->wq_buf, req_data->wq_buf_avail);
+ wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail));
spin_lock_irqsave(&vpmem->pmem_lock, flags);
}
err1 = virtqueue_kick(vpmem->req_vq);
@@ -98,7 +105,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
err = -EIO;
} else {
/* A host response results in "host_ack" getting called */
- wait_event(req_data->host_acked, req_data->done);
+ wait_event(req_data->host_acked, READ_ONCE(req_data->done));
err = le32_to_cpu(req_data->resp.ret);
}
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters
2025-12-25 4:29 ` [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters Li Chen
@ 2026-01-02 12:29 ` Pankaj Gupta
2026-02-25 11:48 ` Li Chen
0 siblings, 1 reply; 8+ messages in thread
From: Pankaj Gupta @ 2026-01-02 12:29 UTC (permalink / raw)
To: Li Chen
Cc: Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny, nvdimm,
virtualization, linux-kernel, Michael S . Tsirkin
+CC MST
> virtio_pmem_host_ack() reclaims virtqueue descriptors with
> virtqueue_get_buf(). The -ENOSPC waiter wakeup is tied to completing the
> returned token.
>
> If token completion is skipped for any reason, reclaimed descriptors may
> not wake a waiter and the submitter may sleep forever waiting for a free
> slot.
>
> Always wake one -ENOSPC waiter for each virtqueue completion before
> touching the returned token.
>
> Use READ_ONCE()/WRITE_ONCE() for the wait_event() flags (done and
> wq_buf_avail). They are observed by waiters without pmem_lock, so make
> the accesses explicit single loads/stores and avoid compiler
> reordering/caching across the wait/wake paths.
>
> Signed-off-by: Li Chen <me@linux.beauty>
> ---
> drivers/nvdimm/nd_virtio.c | 35 +++++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> index c3f07be4aa22..6f9890361d0b 100644
> --- a/drivers/nvdimm/nd_virtio.c
> +++ b/drivers/nvdimm/nd_virtio.c
> @@ -9,26 +9,33 @@
> #include "virtio_pmem.h"
> #include "nd.h"
>
> +static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
> +{
> + struct virtio_pmem_request *req_buf;
> +
> + if (list_empty(&vpmem->req_list))
> + return;
> +
> + req_buf = list_first_entry(&vpmem->req_list,
> + struct virtio_pmem_request, list);
[...]
> + list_del_init(&req_buf->list);
> + WRITE_ONCE(req_buf->wq_buf_avail, true);
> + wake_up(&req_buf->wq_buf);
Seems with the above change (3 line fix), you are allowing to wakeup a waiter
before accessing the token. Maybe simplify the patch by just
keeping this change in the single patch & other changes (READ_ONCE/WRITE_ONCE)
onto separate patch with corresponding commit log.
Thanks,
Pankaj
> +}
> +
> /* The interrupt handler */
> void virtio_pmem_host_ack(struct virtqueue *vq)
> {
> struct virtio_pmem *vpmem = vq->vdev->priv;
> - struct virtio_pmem_request *req_data, *req_buf;
> + struct virtio_pmem_request *req_data;
> unsigned long flags;
> unsigned int len;
>
> spin_lock_irqsave(&vpmem->pmem_lock, flags);
> while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
> - req_data->done = true;
> + virtio_pmem_wake_one_waiter(vpmem);
> + WRITE_ONCE(req_data->done, true);
> wake_up(&req_data->host_acked);
> -
> - if (!list_empty(&vpmem->req_list)) {
> - req_buf = list_first_entry(&vpmem->req_list,
> - struct virtio_pmem_request, list);
> - req_buf->wq_buf_avail = true;
> - wake_up(&req_buf->wq_buf);
> - list_del(&req_buf->list);
> - }
> }
> spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> }
> @@ -58,7 +65,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> if (!req_data)
> return -ENOMEM;
>
> - req_data->done = false;
> + WRITE_ONCE(req_data->done, false);
> init_waitqueue_head(&req_data->host_acked);
> init_waitqueue_head(&req_data->wq_buf);
> INIT_LIST_HEAD(&req_data->list);
> @@ -79,12 +86,12 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> GFP_ATOMIC)) == -ENOSPC) {
>
> dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
> - req_data->wq_buf_avail = false;
> + WRITE_ONCE(req_data->wq_buf_avail, false);
> list_add_tail(&req_data->list, &vpmem->req_list);
> spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>
> /* A host response results in "host_ack" getting called */
> - wait_event(req_data->wq_buf, req_data->wq_buf_avail);
> + wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail));
> spin_lock_irqsave(&vpmem->pmem_lock, flags);
> }
> err1 = virtqueue_kick(vpmem->req_vq);
> @@ -98,7 +105,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> err = -EIO;
> } else {
> /* A host response results in "host_ack" getting called */
> - wait_event(req_data->host_acked, req_data->done);
> + wait_event(req_data->host_acked, READ_ONCE(req_data->done));
> err = le32_to_cpu(req_data->resp.ret);
> }
>
> --
> 2.52.0
>
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters
2026-01-02 12:29 ` Pankaj Gupta
@ 2026-02-25 11:48 ` Li Chen
0 siblings, 0 replies; 8+ messages in thread
From: Li Chen @ 2026-02-25 11:48 UTC (permalink / raw)
To: Pankaj Gupta
Cc: Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny, nvdimm,
virtualization, linux-kernel, Michael S . Tsirkin
Hi Pankaj,
---- On Fri, 02 Jan 2026 20:29:59 +0800 Pankaj Gupta <pankaj.gupta.linux@gmail.com> wrote ---
> +CC MST
> > virtio_pmem_host_ack() reclaims virtqueue descriptors with
> > virtqueue_get_buf(). The -ENOSPC waiter wakeup is tied to completing the
> > returned token.
> >
> > If token completion is skipped for any reason, reclaimed descriptors may
> > not wake a waiter and the submitter may sleep forever waiting for a free
> > slot.
> >
> > Always wake one -ENOSPC waiter for each virtqueue completion before
> > touching the returned token.
> >
> > Use READ_ONCE()/WRITE_ONCE() for the wait_event() flags (done and
> > wq_buf_avail). They are observed by waiters without pmem_lock, so make
> > the accesses explicit single loads/stores and avoid compiler
> > reordering/caching across the wait/wake paths.
> >
> > Signed-off-by: Li Chen <me@linux.beauty>
> > ---
> > drivers/nvdimm/nd_virtio.c | 35 +++++++++++++++++++++--------------
> > 1 file changed, 21 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> > index c3f07be4aa22..6f9890361d0b 100644
> > --- a/drivers/nvdimm/nd_virtio.c
> > +++ b/drivers/nvdimm/nd_virtio.c
> > @@ -9,26 +9,33 @@
> > #include "virtio_pmem.h"
> > #include "nd.h"
> >
> > +static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
> > +{
> > + struct virtio_pmem_request *req_buf;
> > +
> > + if (list_empty(&vpmem->req_list))
> > + return;
> > +
> > + req_buf = list_first_entry(&vpmem->req_list,
> > + struct virtio_pmem_request, list);
>
> [...]
> > + list_del_init(&req_buf->list);
> > + WRITE_ONCE(req_buf->wq_buf_avail, true);
> > + wake_up(&req_buf->wq_buf);
>
> Seems with the above change (3 line fix), you are allowing to wakeup a waiter
> before accessing the token. Maybe simplify the patch by just
> keeping this change in the single patch & other changes (READ_ONCE/WRITE_ONCE)
> onto separate patch with corresponding commit log.
Sorry for the late reply, I just realized I somehow missed this email :-p
Thanks for the suggestion. I'll do it in v3.
Regards,
Li
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH V2 2/5] nvdimm: virtio_pmem: refcount requests for token lifetime
2025-12-25 4:29 [PATCH V2 0/4] nvdimm: virtio_pmem: fix request lifetime and converge broken queue failures Li Chen
2025-12-25 4:29 ` [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters Li Chen
@ 2025-12-25 4:29 ` Li Chen
2025-12-25 4:29 ` [PATCH V2 3/5] nvdimm: virtio_pmem: converge broken virtqueue to -EIO Li Chen
` (2 subsequent siblings)
4 siblings, 0 replies; 8+ messages in thread
From: Li Chen @ 2025-12-25 4:29 UTC (permalink / raw)
To: Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny, Pankaj Gupta,
nvdimm, virtualization, linux-kernel
Cc: Li Chen
KASAN reports a slab use-after-free from virtio_pmem_host_ack(). It happens
when it wakes a request that has already been freed by the submitter.
This happens when the request token is still reachable via the virtqueue,
but virtio_pmem_flush() returns and frees it.
Fix the token lifetime by refcounting struct virtio_pmem_request.
virtio_pmem_flush() holds a submitter reference, and the virtqueue holds an
extra reference once the request is queued. The completion path drops the
virtqueue reference, and the submitter drops its reference before
returning.
Signed-off-by: Li Chen <me@linux.beauty>
---
drivers/nvdimm/nd_virtio.c | 34 +++++++++++++++++++++++++++++-----
drivers/nvdimm/virtio_pmem.h | 2 ++
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 6f9890361d0b..d0385d4646f2 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -9,6 +9,14 @@
#include "virtio_pmem.h"
#include "nd.h"
+static void virtio_pmem_req_release(struct kref *kref)
+{
+ struct virtio_pmem_request *req;
+
+ req = container_of(kref, struct virtio_pmem_request, kref);
+ kfree(req);
+}
+
static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
{
struct virtio_pmem_request *req_buf;
@@ -36,6 +44,7 @@ void virtio_pmem_host_ack(struct virtqueue *vq)
virtio_pmem_wake_one_waiter(vpmem);
WRITE_ONCE(req_data->done, true);
wake_up(&req_data->host_acked);
+ kref_put(&req_data->kref, virtio_pmem_req_release);
}
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
}
@@ -65,6 +74,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
if (!req_data)
return -ENOMEM;
+ kref_init(&req_data->kref);
WRITE_ONCE(req_data->done, false);
init_waitqueue_head(&req_data->host_acked);
init_waitqueue_head(&req_data->wq_buf);
@@ -82,10 +92,23 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
* to req_list and wait for host_ack to wake us up when free
* slots are available.
*/
- while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
- GFP_ATOMIC)) == -ENOSPC) {
-
- dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
+ for (;;) {
+ err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
+ GFP_ATOMIC);
+ if (!err) {
+ /*
+ * Take the virtqueue reference while @pmem_lock is
+ * held so completion cannot run concurrently.
+ */
+ kref_get(&req_data->kref);
+ break;
+ }
+
+ if (err != -ENOSPC)
+ break;
+
+ dev_info_ratelimited(&vdev->dev,
+ "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
WRITE_ONCE(req_data->wq_buf_avail, false);
list_add_tail(&req_data->list, &vpmem->req_list);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
@@ -94,6 +117,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail));
spin_lock_irqsave(&vpmem->pmem_lock, flags);
}
+
err1 = virtqueue_kick(vpmem->req_vq);
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
/*
@@ -109,7 +133,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
err = le32_to_cpu(req_data->resp.ret);
}
- kfree(req_data);
+ kref_put(&req_data->kref, virtio_pmem_req_release);
return err;
};
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index 0dddefe594c4..fc8f613f8f28 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -12,10 +12,12 @@
#include <linux/module.h>
#include <uapi/linux/virtio_pmem.h>
+#include <linux/kref.h>
#include <linux/libnvdimm.h>
#include <linux/spinlock.h>
struct virtio_pmem_request {
+ struct kref kref;
struct virtio_pmem_req req;
struct virtio_pmem_resp resp;
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH V2 3/5] nvdimm: virtio_pmem: converge broken virtqueue to -EIO
2025-12-25 4:29 [PATCH V2 0/4] nvdimm: virtio_pmem: fix request lifetime and converge broken queue failures Li Chen
2025-12-25 4:29 ` [PATCH V2 1/5] nvdimm: virtio_pmem: always wake -ENOSPC waiters Li Chen
2025-12-25 4:29 ` [PATCH V2 2/5] nvdimm: virtio_pmem: refcount requests for token lifetime Li Chen
@ 2025-12-25 4:29 ` Li Chen
2025-12-25 4:29 ` [PATCH V2 4/5] nvdimm: virtio_pmem: drain requests in freeze Li Chen
2025-12-25 4:29 ` [PATCH 5/5] nvdimm: nd_virtio: export virtio_pmem_mark_broken_and_drain Li Chen
4 siblings, 0 replies; 8+ messages in thread
From: Li Chen @ 2025-12-25 4:29 UTC (permalink / raw)
To: Pankaj Gupta, Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny,
virtualization, nvdimm, linux-kernel
Cc: Li Chen
virtio_pmem_flush() waits for either a free virtqueue descriptor (-ENOSPC)
or a host completion. If the request virtqueue becomes broken (e.g.
virtqueue_kick() notify failure), those waiters may never make progress.
Track a device-level broken state and converge all error paths to -EIO.
Fail fast for new requests, wake all -ENOSPC waiters, and drain/detach
outstanding request tokens to complete them with an error.
Signed-off-by: Li Chen <me@linux.beauty>
---
drivers/nvdimm/nd_virtio.c | 73 +++++++++++++++++++++++++++++++++---
drivers/nvdimm/virtio_pmem.c | 7 ++++
drivers/nvdimm/virtio_pmem.h | 4 ++
3 files changed, 78 insertions(+), 6 deletions(-)
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index d0385d4646f2..de1e3dde85eb 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -17,6 +17,18 @@ static void virtio_pmem_req_release(struct kref *kref)
kfree(req);
}
+static void virtio_pmem_signal_done(struct virtio_pmem_request *req)
+{
+ WRITE_ONCE(req->done, true);
+ wake_up(&req->host_acked);
+}
+
+static void virtio_pmem_complete_err(struct virtio_pmem_request *req)
+{
+ req->resp.ret = cpu_to_le32(1);
+ virtio_pmem_signal_done(req);
+}
+
static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
{
struct virtio_pmem_request *req_buf;
@@ -31,6 +43,40 @@ static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
wake_up(&req_buf->wq_buf);
}
+static void virtio_pmem_wake_all_waiters(struct virtio_pmem *vpmem)
+{
+ struct virtio_pmem_request *req, *tmp;
+
+ list_for_each_entry_safe(req, tmp, &vpmem->req_list, list) {
+ WRITE_ONCE(req->wq_buf_avail, true);
+ wake_up(&req->wq_buf);
+ list_del_init(&req->list);
+ }
+}
+
+void virtio_pmem_mark_broken_and_drain(struct virtio_pmem *vpmem)
+{
+ struct virtio_pmem_request *req;
+ unsigned int len;
+
+ if (READ_ONCE(vpmem->broken))
+ return;
+
+ WRITE_ONCE(vpmem->broken, true);
+ dev_err_once(&vpmem->vdev->dev, "virtqueue is broken\n");
+ virtio_pmem_wake_all_waiters(vpmem);
+
+ while ((req = virtqueue_get_buf(vpmem->req_vq, &len)) != NULL) {
+ virtio_pmem_complete_err(req);
+ kref_put(&req->kref, virtio_pmem_req_release);
+ }
+
+ while ((req = virtqueue_detach_unused_buf(vpmem->req_vq)) != NULL) {
+ virtio_pmem_complete_err(req);
+ kref_put(&req->kref, virtio_pmem_req_release);
+ }
+}
+
/* The interrupt handler */
void virtio_pmem_host_ack(struct virtqueue *vq)
{
@@ -42,8 +88,7 @@ void virtio_pmem_host_ack(struct virtqueue *vq)
spin_lock_irqsave(&vpmem->pmem_lock, flags);
while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
virtio_pmem_wake_one_waiter(vpmem);
- WRITE_ONCE(req_data->done, true);
- wake_up(&req_data->host_acked);
+ virtio_pmem_signal_done(req_data);
kref_put(&req_data->kref, virtio_pmem_req_release);
}
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
@@ -69,6 +114,9 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
return -EIO;
}
+ if (READ_ONCE(vpmem->broken))
+ return -EIO;
+
might_sleep();
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
if (!req_data)
@@ -114,22 +162,35 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
/* A host response results in "host_ack" getting called */
- wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail));
+ wait_event(req_data->wq_buf, READ_ONCE(req_data->wq_buf_avail) ||
+ READ_ONCE(vpmem->broken));
spin_lock_irqsave(&vpmem->pmem_lock, flags);
+
+ if (READ_ONCE(vpmem->broken))
+ break;
}
- err1 = virtqueue_kick(vpmem->req_vq);
+ if (err == -EIO || virtqueue_is_broken(vpmem->req_vq))
+ virtio_pmem_mark_broken_and_drain(vpmem);
+
+ err1 = true;
+ if (!err && !READ_ONCE(vpmem->broken)) {
+ err1 = virtqueue_kick(vpmem->req_vq);
+ if (!err1)
+ virtio_pmem_mark_broken_and_drain(vpmem);
+ }
spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
/*
* virtqueue_add_sgs failed with error different than -ENOSPC, we can't
* do anything about that.
*/
- if (err || !err1) {
+ if (READ_ONCE(vpmem->broken) || err || !err1) {
dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
err = -EIO;
} else {
/* A host response results in "host_ack" getting called */
- wait_event(req_data->host_acked, READ_ONCE(req_data->done));
+ wait_event(req_data->host_acked, READ_ONCE(req_data->done) ||
+ READ_ONCE(vpmem->broken));
err = le32_to_cpu(req_data->resp.ret);
}
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 2396d19ce549..aa07328e3ff9 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -25,6 +25,7 @@ static int init_vq(struct virtio_pmem *vpmem)
spin_lock_init(&vpmem->pmem_lock);
INIT_LIST_HEAD(&vpmem->req_list);
+ WRITE_ONCE(vpmem->broken, false);
return 0;
};
@@ -137,6 +138,12 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
static void virtio_pmem_remove(struct virtio_device *vdev)
{
struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
+ struct virtio_pmem *vpmem = vdev->priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ virtio_pmem_mark_broken_and_drain(vpmem);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
nvdimm_bus_unregister(nvdimm_bus);
vdev->config->del_vqs(vdev);
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index fc8f613f8f28..49dd2e62d198 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -44,6 +44,9 @@ struct virtio_pmem {
/* List to store deferred work if virtqueue is full */
struct list_head req_list;
+ /* Fail fast and wake waiters if the request virtqueue is broken. */
+ bool broken;
+
/* Synchronize virtqueue data */
spinlock_t pmem_lock;
@@ -53,5 +56,6 @@ struct virtio_pmem {
};
void virtio_pmem_host_ack(struct virtqueue *vq);
+void virtio_pmem_mark_broken_and_drain(struct virtio_pmem *vpmem);
int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
#endif
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH V2 4/5] nvdimm: virtio_pmem: drain requests in freeze
2025-12-25 4:29 [PATCH V2 0/4] nvdimm: virtio_pmem: fix request lifetime and converge broken queue failures Li Chen
` (2 preceding siblings ...)
2025-12-25 4:29 ` [PATCH V2 3/5] nvdimm: virtio_pmem: converge broken virtqueue to -EIO Li Chen
@ 2025-12-25 4:29 ` Li Chen
2025-12-25 4:29 ` [PATCH 5/5] nvdimm: nd_virtio: export virtio_pmem_mark_broken_and_drain Li Chen
4 siblings, 0 replies; 8+ messages in thread
From: Li Chen @ 2025-12-25 4:29 UTC (permalink / raw)
To: Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny, Pankaj Gupta,
nvdimm, virtualization, linux-kernel
Cc: Li Chen
virtio_pmem_freeze() deletes virtqueues and resets the device without
waking threads waiting for a virtqueue descriptor or a host completion.
Mark the request virtqueue broken and drain outstanding requests under
pmem_lock before teardown so waiters can make progress and return -EIO.
Signed-off-by: Li Chen <me@linux.beauty>
---
drivers/nvdimm/virtio_pmem.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index aa07328e3ff9..5c60a7b459d4 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -152,6 +152,13 @@ static void virtio_pmem_remove(struct virtio_device *vdev)
static int virtio_pmem_freeze(struct virtio_device *vdev)
{
+ struct virtio_pmem *vpmem = vdev->priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ virtio_pmem_mark_broken_and_drain(vpmem);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+
vdev->config->del_vqs(vdev);
virtio_reset_device(vdev);
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/5] nvdimm: nd_virtio: export virtio_pmem_mark_broken_and_drain
2025-12-25 4:29 [PATCH V2 0/4] nvdimm: virtio_pmem: fix request lifetime and converge broken queue failures Li Chen
` (3 preceding siblings ...)
2025-12-25 4:29 ` [PATCH V2 4/5] nvdimm: virtio_pmem: drain requests in freeze Li Chen
@ 2025-12-25 4:29 ` Li Chen
4 siblings, 0 replies; 8+ messages in thread
From: Li Chen @ 2025-12-25 4:29 UTC (permalink / raw)
To: Pankaj Gupta, Dan Williams, Vishal Verma, Dave Jiang, Ira Weiny,
virtualization, nvdimm, linux-kernel
Cc: kernel test robot, Li Chen
When CONFIG_VIRTIO_PMEM=m, virtio_pmem.ko calls
virtio_pmem_mark_broken_and_drain() from nd_virtio.ko.
Export the symbol to fix the modpost undefined error.
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202512250116.ewtzlD0g-
lkp@intel.com/
Signed-off-by: Li Chen <me@linux.beauty>
---
drivers/nvdimm/nd_virtio.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index de1e3dde85eb..0d13f73ab7f4 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -76,6 +76,7 @@ void virtio_pmem_mark_broken_and_drain(struct virtio_pmem *vpmem)
kref_put(&req->kref, virtio_pmem_req_release);
}
}
+EXPORT_SYMBOL_GPL(virtio_pmem_mark_broken_and_drain);
/* The interrupt handler */
void virtio_pmem_host_ack(struct virtqueue *vq)
--
2.52.0
^ permalink raw reply related [flat|nested] 8+ messages in thread