qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PULL 0/5] Block layer patches
@ 2025-05-22 18:31 Kevin Wolf
  2025-05-22 18:31 ` [PULL 1/5] scsi-disk: Add native FUA write support Kevin Wolf
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Kevin Wolf @ 2025-05-22 18:31 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, qemu-devel

The following changes since commit f0737158b483e7ec2b2512145aeab888b85cc1f7:

  Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging (2025-05-20 10:26:30 -0400)

are available in the Git repository at:

  https://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to bf627788ef17721955bfcfba84209a07ae5f54ea:

  file-posix: Probe paths and retry SG_IO on potential path errors (2025-05-22 17:56:50 +0200)

----------------------------------------------------------------
Block layer patches

- scsi-disk: Add native FUA write support, enable FUA by default
- qemu-img: fix offset calculation in bench
- file-posix: allow BLKZEROOUT with -t writeback
- file-posix: Probe paths and retry SG_IO on potential path errors

----------------------------------------------------------------
Alberto Faria (2):
      scsi-disk: Add native FUA write support
      scsi-disk: Advertise FUA support by default

Denis Rastyogin (1):
      qemu-img: fix offset calculation in bench

Kevin Wolf (1):
      file-posix: Probe paths and retry SG_IO on potential path errors

Stefan Hajnoczi (1):
      file-posix: allow BLKZEROOUT with -t writeback

 block/file-posix.c  | 126 +++++++++++++++++++++++++++++++++++++++++++++++-----
 hw/core/machine.c   |   4 +-
 hw/scsi/scsi-disk.c |  55 +++++++----------------
 qemu-img.c          |   4 +-
 4 files changed, 135 insertions(+), 54 deletions(-)



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PULL 1/5] scsi-disk: Add native FUA write support
  2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
@ 2025-05-22 18:31 ` Kevin Wolf
  2025-05-22 18:31 ` [PULL 2/5] scsi-disk: Advertise FUA support by default Kevin Wolf
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Kevin Wolf @ 2025-05-22 18:31 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, qemu-devel

From: Alberto Faria <afaria@redhat.com>

Simply propagate the FUA flag on write requests to the driver. The block
layer will emulate it if necessary.

Signed-off-by: Alberto Faria <afaria@redhat.com>
Message-ID: <20250502121115.3613717-2-afaria@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi/scsi-disk.c | 53 +++++++++++++--------------------------------
 1 file changed, 15 insertions(+), 38 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index cb4af1b715..738d8df8ec 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -74,7 +74,7 @@ struct SCSIDiskClass {
      */
     DMAIOFunc       *dma_readv;
     DMAIOFunc       *dma_writev;
-    bool            (*need_fua_emulation)(SCSICommand *cmd);
+    bool            (*need_fua)(SCSICommand *cmd);
     void            (*update_sense)(SCSIRequest *r);
 };
 
@@ -85,7 +85,7 @@ typedef struct SCSIDiskReq {
     uint32_t sector_count;
     uint32_t buflen;
     bool started;
-    bool need_fua_emulation;
+    bool need_fua;
     struct iovec iov;
     QEMUIOVector qiov;
     BlockAcctCookie acct;
@@ -389,24 +389,6 @@ static bool scsi_is_cmd_fua(SCSICommand *cmd)
     }
 }
 
-static void scsi_write_do_fua(SCSIDiskReq *r)
-{
-    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-
-    assert(r->req.aiocb == NULL);
-    assert(!r->req.io_canceled);
-
-    if (r->need_fua_emulation) {
-        block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
-                         BLOCK_ACCT_FLUSH);
-        r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
-        return;
-    }
-
-    scsi_req_complete(&r->req, GOOD);
-    scsi_req_unref(&r->req);
-}
-
 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
 {
     assert(r->req.aiocb == NULL);
@@ -416,12 +398,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
 
     r->sector += r->sector_count;
     r->sector_count = 0;
-    if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
-        scsi_write_do_fua(r);
-        return;
-    } else {
-        scsi_req_complete(&r->req, GOOD);
-    }
+    scsi_req_complete(&r->req, GOOD);
 
 done:
     scsi_req_unref(&r->req);
@@ -564,7 +541,7 @@ static void scsi_read_data(SCSIRequest *req)
 
     first = !r->started;
     r->started = true;
-    if (first && r->need_fua_emulation) {
+    if (first && r->need_fua) {
         block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
                          BLOCK_ACCT_FLUSH);
         r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
@@ -589,8 +566,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
     r->sector += n;
     r->sector_count -= n;
     if (r->sector_count == 0) {
-        scsi_write_do_fua(r);
-        return;
+        scsi_req_complete(&r->req, GOOD);
     } else {
         scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
         trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
@@ -623,6 +599,7 @@ static void scsi_write_data(SCSIRequest *req)
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
+    BlockCompletionFunc *cb;
 
     /* No data transfer may already be in progress */
     assert(r->req.aiocb == NULL);
@@ -648,11 +625,10 @@ static void scsi_write_data(SCSIRequest *req)
 
     if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
         r->req.cmd.buf[0] == VERIFY_16) {
-        if (r->req.sg) {
-            scsi_dma_complete_noio(r, 0);
-        } else {
-            scsi_write_complete_noio(r, 0);
-        }
+        block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
+                         BLOCK_ACCT_FLUSH);
+        cb = r->req.sg ? scsi_dma_complete : scsi_write_complete;
+        r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, cb, r);
         return;
     }
 
@@ -2391,7 +2367,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
         scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
         return 0;
     }
-    r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
+    r->need_fua = sdc->need_fua(&r->req.cmd);
     if (r->sector_count == 0) {
         scsi_req_complete(&r->req, GOOD);
     }
@@ -3137,7 +3113,8 @@ BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
 {
     SCSIDiskReq *r = opaque;
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-    return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+    int flags = r->need_fua ? BDRV_REQ_FUA : 0;
+    return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, flags, cb, cb_opaque);
 }
 
 static char *scsi_property_get_loadparm(Object *obj, Error **errp)
@@ -3186,7 +3163,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, const void *data)
     device_class_set_legacy_reset(dc, scsi_disk_reset);
     sdc->dma_readv = scsi_dma_readv;
     sdc->dma_writev = scsi_dma_writev;
-    sdc->need_fua_emulation = scsi_is_cmd_fua;
+    sdc->need_fua  = scsi_is_cmd_fua;
 }
 
 static const TypeInfo scsi_disk_base_info = {
@@ -3338,7 +3315,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, const void *data)
     sdc->dma_readv   = scsi_block_dma_readv;
     sdc->dma_writev  = scsi_block_dma_writev;
     sdc->update_sense = scsi_block_update_sense;
-    sdc->need_fua_emulation = scsi_block_no_fua;
+    sdc->need_fua    = scsi_block_no_fua;
     dc->desc = "SCSI block device passthrough";
     device_class_set_props(dc, scsi_block_properties);
     dc->vmsd  = &vmstate_scsi_disk_state;
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PULL 2/5] scsi-disk: Advertise FUA support by default
  2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
  2025-05-22 18:31 ` [PULL 1/5] scsi-disk: Add native FUA write support Kevin Wolf
@ 2025-05-22 18:31 ` Kevin Wolf
  2025-05-22 18:31 ` [PULL 3/5] qemu-img: fix offset calculation in bench Kevin Wolf
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Kevin Wolf @ 2025-05-22 18:31 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, qemu-devel

From: Alberto Faria <afaria@redhat.com>

Allow the guest to submit FUA requests directly, instead of forcing it
to emulate them using a regular flush.

Signed-off-by: Alberto Faria <afaria@redhat.com>
Message-ID: <20250502121115.3613717-3-afaria@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/core/machine.c   | 4 +++-
 hw/scsi/scsi-disk.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index b8ae155dfa..c3f3a5020d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,7 +37,9 @@
 #include "hw/virtio/virtio-iommu.h"
 #include "audio/audio.h"
 
-GlobalProperty hw_compat_10_0[] = {};
+GlobalProperty hw_compat_10_0[] = {
+    { "scsi-hd", "dpofua", "off" },
+};
 const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0);
 
 GlobalProperty hw_compat_9_2[] = {
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 738d8df8ec..b4782c6248 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -3192,7 +3192,7 @@ static const Property scsi_hd_properties[] = {
     DEFINE_PROP_BIT("removable", SCSIDiskState, features,
                     SCSI_DISK_F_REMOVABLE, false),
     DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
-                    SCSI_DISK_F_DPOFUA, false),
+                    SCSI_DISK_F_DPOFUA, true),
     DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
     DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
     DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PULL 3/5] qemu-img: fix offset calculation in bench
  2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
  2025-05-22 18:31 ` [PULL 1/5] scsi-disk: Add native FUA write support Kevin Wolf
  2025-05-22 18:31 ` [PULL 2/5] scsi-disk: Advertise FUA support by default Kevin Wolf
@ 2025-05-22 18:31 ` Kevin Wolf
  2025-05-22 18:31 ` [PULL 4/5] file-posix: allow BLKZEROOUT with -t writeback Kevin Wolf
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Kevin Wolf @ 2025-05-22 18:31 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, qemu-devel

From: Denis Rastyogin <gerben@altlinux.org>

This error was discovered by fuzzing qemu-img.

The current offset calculation leads to an EIO error
in block/block-backend.c: blk_check_byte_request():

 if (offset > len || len - offset < bytes) {
     return -EIO;
 }

This triggers the error message:
"qemu-img: Failed request: Input/output error".

Example of the issue:
 offset: 260076
 len: 260096
 bytes: 4096

This fix ensures that offset remains within a valid range.

Signed-off-by: Denis Rastyogin <gerben@altlinux.org>
Message-ID: <20250506141410.100119-1-gerben@altlinux.org>
[kwolf: Fixed up integer overflow]
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 qemu-img.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 76ac5d3028..139eeb5039 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -4488,10 +4488,10 @@ static void bench_cb(void *opaque, int ret)
          */
         b->in_flight++;
         b->offset += b->step;
-        if (b->image_size == 0) {
+        if (b->image_size <= b->bufsize) {
             b->offset = 0;
         } else {
-            b->offset %= b->image_size;
+            b->offset %= b->image_size - b->bufsize;
         }
         if (b->write) {
             acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b);
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PULL 4/5] file-posix: allow BLKZEROOUT with -t writeback
  2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
                   ` (2 preceding siblings ...)
  2025-05-22 18:31 ` [PULL 3/5] qemu-img: fix offset calculation in bench Kevin Wolf
@ 2025-05-22 18:31 ` Kevin Wolf
  2025-05-22 18:31 ` [PULL 5/5] file-posix: Probe paths and retry SG_IO on potential path errors Kevin Wolf
  2025-05-23 15:45 ` [PULL 0/5] Block layer patches Stefan Hajnoczi
  5 siblings, 0 replies; 7+ messages in thread
From: Kevin Wolf @ 2025-05-22 18:31 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, qemu-devel

From: Stefan Hajnoczi <stefanha@redhat.com>

The Linux BLKZEROOUT ioctl is only invoked when BDRV_O_NOCACHE is set
because old kernels did not invalidate the page cache. In that case
mixing BLKZEROOUT with buffered I/O could lead to corruption.

However, Linux 4.9 commit 22dd6d356628 ("block: invalidate the page
cache when issuing BLKZEROOUT") made BLKZEROOUT coherent with the page
cache.

I have checked that Linux 4.9+ kernels are shipped at least as far back
as Debian 10 (buster), openSUSE Leap 15.2, and RHEL/CentOS 8.

Use BLKZEROOUT with buffered I/O, mostly so `qemu-img ... -t
writeback` can offload write zeroes.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-ID: <20250417211053.98700-1-stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index ec95b74869..5a3532e40b 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -785,17 +785,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
 #endif
 
-    if (S_ISBLK(st.st_mode)) {
-#ifdef __linux__
-        /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
-         * not rely on the contents of discarded blocks unless using O_DIRECT.
-         * Same for BLKZEROOUT.
-         */
-        if (!(bs->open_flags & BDRV_O_NOCACHE)) {
-            s->has_write_zeroes = false;
-        }
-#endif
-    }
 #ifdef __FreeBSD__
     if (S_ISCHR(st.st_mode)) {
         /*
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PULL 5/5] file-posix: Probe paths and retry SG_IO on potential path errors
  2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
                   ` (3 preceding siblings ...)
  2025-05-22 18:31 ` [PULL 4/5] file-posix: allow BLKZEROOUT with -t writeback Kevin Wolf
@ 2025-05-22 18:31 ` Kevin Wolf
  2025-05-23 15:45 ` [PULL 0/5] Block layer patches Stefan Hajnoczi
  5 siblings, 0 replies; 7+ messages in thread
From: Kevin Wolf @ 2025-05-22 18:31 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, qemu-devel

When scsi-block is used on a host multipath device, it runs into the
problem that the kernel dm-mpath doesn't know anything about SCSI or
SG_IO and therefore can't decide if a SG_IO request returned an error
and needs to be retried on a different path. Instead of getting working
failover, an error is returned to scsi-block and handled according to
the configured error policy. Obviously, this is not what users want,
they want working failover.

QEMU can parse the SG_IO result and determine whether this could have
been a path error, but just retrying the same request could just send it
to the same failing path again and result in the same error.

With a kernel that supports the DM_MPATH_PROBE_PATHS ioctl on dm-mpath
block devices (queued in the device mapper tree for Linux 6.16), we can
tell the kernel to probe all paths and tell us if any usable paths
remained. If so, we can now retry the SG_IO ioctl and expect it to be
sent to a working path.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20250522130803.34738-1-kwolf@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 115 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 5a3532e40b..9b5f08ccb2 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -41,6 +41,7 @@
 
 #include "scsi/pr-manager.h"
 #include "scsi/constants.h"
+#include "scsi/utils.h"
 
 #if defined(__APPLE__) && (__MACH__)
 #include <sys/ioctl.h>
@@ -72,6 +73,7 @@
 #include <linux/blkzoned.h>
 #endif
 #include <linux/cdrom.h>
+#include <linux/dm-ioctl.h>
 #include <linux/fd.h>
 #include <linux/fs.h>
 #include <linux/hdreg.h>
@@ -138,6 +140,22 @@
 #define RAW_LOCK_PERM_BASE             100
 #define RAW_LOCK_SHARED_BASE           200
 
+/*
+ * Multiple retries are mostly meant for two separate scenarios:
+ *
+ * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another
+ *   path goes down.
+ *
+ * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have
+ *   to send another SG_IO to switch to another path group to probe the paths in
+ *   it.
+ *
+ * Even if each path is in a separate path group (path_grouping_policy set to
+ * failover), it's rare to have more than eight path groups - and even then
+ * pretty unlikely that only bad path groups would be chosen in eight retries.
+ */
+#define SG_IO_MAX_RETRIES 8
+
 typedef struct BDRVRawState {
     int fd;
     bool use_lock;
@@ -165,6 +183,7 @@ typedef struct BDRVRawState {
     bool use_linux_aio:1;
     bool has_laio_fdsync:1;
     bool use_linux_io_uring:1;
+    bool use_mpath:1;
     int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
@@ -4253,15 +4272,105 @@ hdev_open_Mac_error:
     /* Since this does ioctl the device must be already opened */
     bs->sg = hdev_is_sg(bs);
 
+    /* sg devices aren't even block devices and can't use dm-mpath */
+    s->use_mpath = !bs->sg;
+
     return ret;
 }
 
 #if defined(__linux__)
+#if defined(DM_MPATH_PROBE_PATHS)
+static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr)
+{
+    if (ret < 0) {
+        switch (ret) {
+        case -ENODEV:
+            return true;
+        case -EAGAIN:
+            /*
+             * The device is probably suspended. This happens while the dm table
+             * is reloaded, e.g. because a path is added or removed. This is an
+             * operation that should complete within 1ms, so just wait a bit and
+             * retry.
+             *
+             * If the device was suspended for another reason, we'll wait and
+             * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before
+             * we return an error and potentially stop the VM.
+             */
+            qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
+            return true;
+        default:
+            return false;
+        }
+    }
+
+    if (io_hdr->host_status != SCSI_HOST_OK) {
+        return true;
+    }
+
+    switch (io_hdr->status) {
+    case GOOD:
+    case CONDITION_GOOD:
+    case INTERMEDIATE_GOOD:
+    case INTERMEDIATE_C_GOOD:
+    case RESERVATION_CONFLICT:
+    case COMMAND_TERMINATED:
+        return false;
+    case CHECK_CONDITION:
+        return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp,
+                                                    io_hdr->mx_sb_len);
+    default:
+        return true;
+    }
+}
+
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
+{
+    BDRVRawState *s = acb->bs->opaque;
+    RawPosixAIOData probe_acb;
+
+    if (!s->use_mpath) {
+        return false;
+    }
+
+    if (!sgio_path_error(ret, acb->ioctl.buf)) {
+        return false;
+    }
+
+    probe_acb = (RawPosixAIOData) {
+        .bs         = acb->bs,
+        .aio_type   = QEMU_AIO_IOCTL,
+        .aio_fildes = s->fd,
+        .aio_offset = 0,
+        .ioctl      = {
+            .buf        = NULL,
+            .cmd        = DM_MPATH_PROBE_PATHS,
+        },
+    };
+
+    ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb);
+    if (ret == -ENOTTY) {
+        s->use_mpath = false;
+    } else if (ret == -EAGAIN) {
+        /* The device might be suspended for a table reload, worth retrying */
+        return true;
+    }
+
+    return ret == 0;
+}
+#else
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
+{
+    return false;
+}
+#endif /* DM_MPATH_PROBE_PATHS */
+
 static int coroutine_fn
 hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
+    int retries = SG_IO_MAX_RETRIES;
     int ret;
 
     ret = fd_open(bs);
@@ -4289,7 +4398,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         },
     };
 
-    return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+    do {
+        ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+    } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret));
+
+    return ret;
 }
 #endif /* linux */
 
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PULL 0/5] Block layer patches
  2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
                   ` (4 preceding siblings ...)
  2025-05-22 18:31 ` [PULL 5/5] file-posix: Probe paths and retry SG_IO on potential path errors Kevin Wolf
@ 2025-05-23 15:45 ` Stefan Hajnoczi
  5 siblings, 0 replies; 7+ messages in thread
From: Stefan Hajnoczi @ 2025-05-23 15:45 UTC (permalink / raw)
  To: Kevin Wolf; +Cc: qemu-block, kwolf, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 116 bytes --]

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/10.1 for any user-visible changes.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-05-23 15:46 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-22 18:31 [PULL 0/5] Block layer patches Kevin Wolf
2025-05-22 18:31 ` [PULL 1/5] scsi-disk: Add native FUA write support Kevin Wolf
2025-05-22 18:31 ` [PULL 2/5] scsi-disk: Advertise FUA support by default Kevin Wolf
2025-05-22 18:31 ` [PULL 3/5] qemu-img: fix offset calculation in bench Kevin Wolf
2025-05-22 18:31 ` [PULL 4/5] file-posix: allow BLKZEROOUT with -t writeback Kevin Wolf
2025-05-22 18:31 ` [PULL 5/5] file-posix: Probe paths and retry SG_IO on potential path errors Kevin Wolf
2025-05-23 15:45 ` [PULL 0/5] Block layer patches Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).