Linux virtualization list
 help / color / mirror / Atom feed
* [PATCH v1 1/2] virtio-pci: add error_detected for PCI AER recovery
  2026-06-15  2:00 [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Xixin Liu
@ 2026-06-10  6:20 ` Xixin Liu
  2026-06-12 10:00 ` [PATCH v1 2/2] virtio-blk: mark disk dead on ERS permanent failure Xixin Liu
  2026-06-15 14:52 ` [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Stefan Hajnoczi
  2 siblings, 0 replies; 4+ messages in thread
From: Xixin Liu @ 2026-06-10  6:20 UTC (permalink / raw)
  To: linux-block, virtualization
  Cc: mst, jasowang, xuanzhuo, eperezma, pbonzini, stefanha, axboe,
	linux-kernel, liuxixin

virtio-pci only registered reset_prepare/reset_done.  The PCI error
recovery core treats devices without error_detected as NO_AER_DRIVER and
does not deliver pci_channel_io_perm_failure to the driver after a failed
recovery.  Virtio devices therefore miss the normal ERS quiesce/teardown
sequence.

Register error_detected: quiesce on frozen (reset_prepare) before bus
reset; on perm_failure break virtqueues and return DISCONNECT.  Block-layer
cleanup for virtio-blk is handled in the follow-up patch.

Signed-off-by: Xixin Liu <liuxixin@kylinos.cn>
---
 drivers/virtio/virtio_pci_common.c | 30 +++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 164f480b18a6..e2dda946e70e 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -828,7 +828,37 @@ static void virtio_pci_reset_done(struct pci_dev *pci_dev)
 		dev_warn(&pci_dev->dev, "Reset done failure: %d", ret);
 }
 
+static pci_ers_result_t virtio_pci_error_detected(struct pci_dev *pci_dev,
+						  pci_channel_state_t state)
+{
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+
+	/*
+	 * PCI ERS error_detected: quiesce on frozen before bus reset; on
+	 * permanent failure ask the virtio driver to shut down (virtio-blk
+	 * marks the disk dead in its .shutdown handler).
+	 */
+	switch (state) {
+	case pci_channel_io_normal:
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+		pci_info(pci_dev, "frozen error detected, quiesce device\n");
+		if (virtio_device_reset_prepare(&vp_dev->vdev))
+			dev_warn(&pci_dev->dev, "frozen: reset prepare failed\n");
+		return PCI_ERS_RESULT_NEED_RESET;
+	case pci_channel_io_perm_failure:
+		dev_warn(&pci_dev->dev,
+			 "permanent failure, disconnecting device\n");
+		virtio_break_device(&vp_dev->vdev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	default:
+		break;
+	}
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
 static const struct pci_error_handlers virtio_pci_err_handler = {
+	.error_detected = virtio_pci_error_detected,
 	.reset_prepare  = virtio_pci_reset_prepare,
 	.reset_done     = virtio_pci_reset_done,
 };


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v1 2/2] virtio-blk: mark disk dead on ERS permanent failure
  2026-06-15  2:00 [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Xixin Liu
  2026-06-10  6:20 ` [PATCH v1 1/2] virtio-pci: add error_detected for PCI AER recovery Xixin Liu
@ 2026-06-12 10:00 ` Xixin Liu
  2026-06-15 14:52 ` [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Stefan Hajnoczi
  2 siblings, 0 replies; 4+ messages in thread
From: Xixin Liu @ 2026-06-12 10:00 UTC (permalink / raw)
  To: linux-block, virtualization
  Cc: mst, jasowang, xuanzhuo, eperezma, pbonzini, stefanha, axboe,
	linux-kernel, liuxixin

After ERS reports pci_channel_io_perm_failure, virtio-pci must ask the
virtio driver to tear down the block device — not only mark virtqueues
broken.  Call the virtio driver shutdown hook from virtio-pci on
perm_failure; virtio-blk implements shutdown with blk_mark_disk_dead().
Fail new requests early in virtio_queue_rq when the disk is dead or
virtqueues were removed during frozen reset_prepare.

Signed-off-by: Xixin Liu <liuxixin@kylinos.cn>
---
 drivers/block/virtio_blk.c         | 39 +++++++++++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci_common.c | 10 +++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 32bf3ba07a9d..4740ae91d5be 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -435,6 +435,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 	blk_status_t status;
 	int err;
 
+	/* Fail fast if ERS frozen tore down VQs or the disk was marked dead. */
+	if (unlikely(!disk_live(vblk->disk) || !vblk->vqs || !vblk->vdev)) {
+		blk_mq_start_request(req);
+		return BLK_STS_IOERR;
+	}
+
 	status = virtblk_prep_rq(hctx, vblk, req, vbr);
 	if (unlikely(status))
 		return status;
@@ -1561,6 +1567,29 @@ static int virtblk_probe(struct virtio_device *vdev)
 	return err;
 }
 
+/* Stop I/O and mark the gendisk dead (ERS perm_failure or system shutdown). */
+static void virtblk_shutdown(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+	struct request_queue *q;
+	unsigned int memflags;
+
+	if (!vblk || !vblk->disk)
+		return;
+
+	flush_work(&vblk->config_work);
+	virtio_break_device(vdev);
+
+	q = vblk->disk->queue;
+	memflags = blk_mq_freeze_queue(q);
+	blk_mq_quiesce_queue_nowait(q);
+
+	blk_mark_disk_dead(vblk->disk);
+
+	blk_mq_unquiesce_queue(q);
+	blk_mq_unfreeze_queue(q, memflags);
+}
+
 static void virtblk_remove(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk = vdev->priv;
@@ -1684,6 +1713,7 @@ static struct virtio_driver virtio_blk = {
 	.probe				= virtblk_probe,
 	.remove				= virtblk_remove,
 	.config_changed			= virtblk_config_changed,
+	.shutdown			= virtblk_shutdown,
 #ifdef CONFIG_PM_SLEEP
 	.freeze				= virtblk_freeze,
 	.restore			= virtblk_restore,
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index e2dda946e70e..924ceead436b 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -845,7 +845,15 @@ static pci_ers_result_t virtio_pci_error_detected(struct pci_dev *pci_dev,
 	case pci_channel_io_perm_failure:
 		dev_warn(&pci_dev->dev,
 			 "permanent failure, disconnecting device\n");
-		virtio_break_device(&vp_dev->vdev);
+		{
+			struct virtio_driver *drv =
+				drv_to_virtio(vp_dev->vdev.dev.driver);
+
+			if (drv && drv->shutdown)
+				drv->shutdown(&vp_dev->vdev);
+			else
+				virtio_break_device(&vp_dev->vdev);
+		}
 		return PCI_ERS_RESULT_DISCONNECT;
 	default:
 		break;


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk
@ 2026-06-15  2:00 Xixin Liu
  2026-06-10  6:20 ` [PATCH v1 1/2] virtio-pci: add error_detected for PCI AER recovery Xixin Liu
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Xixin Liu @ 2026-06-15  2:00 UTC (permalink / raw)
  To: linux-block, virtualization
  Cc: mst, jasowang, xuanzhuo, eperezma, pbonzini, stefanha, axboe,
	linux-kernel, liuxixin

Hi,

This series adds proper PCI AER error recovery handling for virtio-pci and
completes virtio-blk teardown when ERS reports pci_channel_io_perm_failure.

virtio-pci only registered reset_prepare/reset_done.  The recovery core
treats devices without error_detected as NO_AER_DRIVER and does not
deliver perm_failure to the driver after a failed recovery.  When bus
reset fails (reproduced on QEMU with DLLLA not set within 100 ms after
secondary bus reset), virtio-blk disks stay live even though virtqueues
may already have been torn down during the frozen phase.

Patch 1 registers error_detected (frozen quiesce + perm_failure notify).
Patch 2 calls the virtio driver shutdown hook from virtio-pci on
perm_failure, implements virtio-blk shutdown with blk_mark_disk_dead(),
and fail-fast guards in virtio_queue_rq.

Thanks,
Xixin Liu

---

Xixin Liu (2):
  virtio-pci: add error_detected for PCI AER recovery
  virtio-blk: mark disk dead on ERS permanent failure

 drivers/block/virtio_blk.c         | 39 +++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci_common.c | 47 ++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+)

-- 
2.43.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk
  2026-06-15  2:00 [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Xixin Liu
  2026-06-10  6:20 ` [PATCH v1 1/2] virtio-pci: add error_detected for PCI AER recovery Xixin Liu
  2026-06-12 10:00 ` [PATCH v1 2/2] virtio-blk: mark disk dead on ERS permanent failure Xixin Liu
@ 2026-06-15 14:52 ` Stefan Hajnoczi
  2 siblings, 0 replies; 4+ messages in thread
From: Stefan Hajnoczi @ 2026-06-15 14:52 UTC (permalink / raw)
  To: Xixin Liu
  Cc: linux-block, virtualization, mst, jasowang, xuanzhuo, eperezma,
	pbonzini, axboe, linux-kernel, Parav Pandit

[-- Attachment #1: Type: text/plain, Size: 1472 bytes --]

On Mon, Jun 15, 2026 at 10:00:00AM +0800, Xixin Liu wrote:
> Hi,
> 
> This series adds proper PCI AER error recovery handling for virtio-pci and
> completes virtio-blk teardown when ERS reports pci_channel_io_perm_failure.

CCing Parav because he previously looked at surprise removal:
https://lore.kernel.org/virtualization/20250822091706.21170-1-parav@nvidia.com/

> 
> virtio-pci only registered reset_prepare/reset_done.  The recovery core
> treats devices without error_detected as NO_AER_DRIVER and does not
> deliver perm_failure to the driver after a failed recovery.  When bus
> reset fails (reproduced on QEMU with DLLLA not set within 100 ms after
> secondary bus reset), virtio-blk disks stay live even though virtqueues
> may already have been torn down during the frozen phase.
> 
> Patch 1 registers error_detected (frozen quiesce + perm_failure notify).
> Patch 2 calls the virtio driver shutdown hook from virtio-pci on
> perm_failure, implements virtio-blk shutdown with blk_mark_disk_dead(),
> and fail-fast guards in virtio_queue_rq.
> 
> Thanks,
> Xixin Liu
> 
> ---
> 
> Xixin Liu (2):
>   virtio-pci: add error_detected for PCI AER recovery
>   virtio-blk: mark disk dead on ERS permanent failure
> 
>  drivers/block/virtio_blk.c         | 39 +++++++++++++++++++++++++++++++
>  drivers/virtio/virtio_pci_common.c | 47 ++++++++++++++++++++++++++++++++++
>  2 files changed, 85 insertions(+)
> 
> -- 
> 2.43.0
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-15 14:53 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-15  2:00 [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Xixin Liu
2026-06-10  6:20 ` [PATCH v1 1/2] virtio-pci: add error_detected for PCI AER recovery Xixin Liu
2026-06-12 10:00 ` [PATCH v1 2/2] virtio-blk: mark disk dead on ERS permanent failure Xixin Liu
2026-06-15 14:52 ` [PATCH v1 0/2] virtio: PCI ERS permanent failure teardown for virtio-blk Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox