qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] file-posix: Probe paths and retry SG_IO on potential path errors
@ 2025-05-13 11:37 Kevin Wolf
  2025-05-13 13:51 ` Stefan Hajnoczi
  2025-05-14 13:43 ` Hanna Czenczek
  0 siblings, 2 replies; 9+ messages in thread
From: Kevin Wolf @ 2025-05-13 11:37 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, hreitz, stefanha, pbonzini, bmarzins, qemu-devel

When scsi-block is used on a host multipath device, it runs into the
problem that the kernel dm-mpath doesn't know anything about SCSI or
SG_IO and therefore can't decide if a SG_IO request returned an error
and needs to be retried on a different path. Instead of getting working
failover, an error is returned to scsi-block and handled according to
the configured error policy. Obviously, this is not what users want,
they want working failover.

QEMU can parse the SG_IO result and determine whether this could have
been a path error, but just retrying the same request could just send it
to the same failing path again and result in the same error.

With a kernel that supports the DM_MPATH_PROBE_PATHS ioctl on dm-mpath
block devices (queued in the device mapper tree for Linux 6.16), we can
tell the kernel to probe all paths and tell us if any usable paths
remained. If so, we can now retry the SG_IO ioctl and expect it to be
sent to a working path.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 82 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index ef52ed9169..2ea41dbc2d 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -41,6 +41,7 @@
 
 #include "scsi/pr-manager.h"
 #include "scsi/constants.h"
+#include "scsi/utils.h"
 
 #if defined(__APPLE__) && (__MACH__)
 #include <sys/ioctl.h>
@@ -72,6 +73,7 @@
 #include <linux/blkzoned.h>
 #endif
 #include <linux/cdrom.h>
+#include <linux/dm-ioctl.h>
 #include <linux/fd.h>
 #include <linux/fs.h>
 #include <linux/hdreg.h>
@@ -138,6 +140,8 @@
 #define RAW_LOCK_PERM_BASE             100
 #define RAW_LOCK_SHARED_BASE           200
 
+#define SG_IO_MAX_RETRIES 5
+
 typedef struct BDRVRawState {
     int fd;
     bool use_lock;
@@ -165,6 +169,7 @@ typedef struct BDRVRawState {
     bool use_linux_aio:1;
     bool has_laio_fdsync:1;
     bool use_linux_io_uring:1;
+    bool use_mpath:1;
     int page_cache_inconsistent; /* errno from fdatasync failure */
     bool has_fallocate;
     bool needs_alignment;
@@ -4263,15 +4268,86 @@ hdev_open_Mac_error:
     /* Since this does ioctl the device must be already opened */
     bs->sg = hdev_is_sg(bs);
 
+    /* sg devices aren't even block devices and can't use dm-mpath */
+    s->use_mpath = !bs->sg;
+
     return ret;
 }
 
 #if defined(__linux__)
+#if defined(DM_MPATH_PROBE_PATHS)
+static bool sgio_path_error(int ret, sg_io_hdr_t *io_hdr)
+{
+    if (ret == -ENODEV) {
+        return true;
+    } else if (ret < 0) {
+        return false;
+    }
+
+    if (io_hdr->host_status != SCSI_HOST_OK) {
+        return true;
+    }
+
+    switch (io_hdr->status) {
+    case GOOD:
+    case CONDITION_GOOD:
+    case INTERMEDIATE_GOOD:
+    case INTERMEDIATE_C_GOOD:
+    case RESERVATION_CONFLICT:
+    case COMMAND_TERMINATED:
+        return false;
+    case CHECK_CONDITION:
+        return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp,
+                                                    io_hdr->mx_sb_len);
+    default:
+        return true;
+    }
+}
+
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
+{
+    BDRVRawState *s = acb->bs->opaque;
+    RawPosixAIOData probe_acb;
+
+    if (!s->use_mpath) {
+        return false;
+    }
+
+    if (!sgio_path_error(ret, acb->ioctl.buf)) {
+        return false;
+    }
+
+    probe_acb = (RawPosixAIOData) {
+        .bs         = acb->bs,
+        .aio_type   = QEMU_AIO_IOCTL,
+        .aio_fildes = s->fd,
+        .aio_offset = 0,
+        .ioctl      = {
+            .buf        = NULL,
+            .cmd        = DM_MPATH_PROBE_PATHS,
+        },
+    };
+
+    ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb);
+    if (ret == -ENOTTY) {
+        s->use_mpath = false;
+    }
+
+    return ret == 0;
+}
+#else
+static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret)
+{
+    return false;
+}
+#endif /* DM_MPATH_PROBE_PATHS */
+
 static int coroutine_fn
 hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     BDRVRawState *s = bs->opaque;
     RawPosixAIOData acb;
+    int retries = SG_IO_MAX_RETRIES;
     int ret;
 
     ret = fd_open(bs);
@@ -4299,7 +4375,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
         },
     };
 
-    return raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+    do {
+        ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb);
+    } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret));
+
+    return ret;
 }
 #endif /* linux */
 
-- 
2.49.0



^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-05-21 19:59 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-05-13 11:37 [PATCH] file-posix: Probe paths and retry SG_IO on potential path errors Kevin Wolf
2025-05-13 13:51 ` Stefan Hajnoczi
2025-05-15  8:15   ` Kevin Wolf
2025-05-15 14:01     ` Stefan Hajnoczi
2025-05-15 15:02       ` Kevin Wolf
2025-05-20 14:03         ` Stefan Hajnoczi
2025-05-21 17:46           ` Kevin Wolf
2025-05-21 18:23             ` Benjamin Marzinski
2025-05-14 13:43 ` Hanna Czenczek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).