qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/4] scsi-block: Fix error handling with r/werror=stop
@ 2024-07-31 12:32 Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 1/4] scsi-disk: Use positive return value for status in dma_readv/writev Kevin Wolf
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Kevin Wolf @ 2024-07-31 12:32 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, pbonzini, fam, stefanha, qemu-devel

Running validation tests in Windows 2019's Failover Cluster Manager
fails in two different ways when run with rerror/werror=stop:

1. It runs into an assertion failure because the sgio-based I/O path
   takes shortcuts in its error handling that skip necessary cleanup

2. RESERVATION_CONFLICT is treated as a host error and stops the VM,
   which in some cases can't be resumed at all because nothing will make
   the error go away on retry. The error should always go to the guest
   instead, it's an invalid request from the guest.

This series fixes these problems.

v2:
- Patch 4: [Paolo]
  * Set error = 0 explicitly, remove useless variable initialisation
  * Add comment to explain why we consider it a guest error
  * Mention scsi-block specifically in the commit message

Kevin Wolf (4):
  scsi-disk: Use positive return value for status in dma_readv/writev
  scsi-block: Don't skip callback for sgio error status/driver_status
  scsi-disk: Add warning comments that host_status errors take a
    shortcut
  scsi-disk: Always report RESERVATION_CONFLICT to guest

 hw/scsi/scsi-disk.c | 73 +++++++++++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 22 deletions(-)

-- 
2.45.2



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/4] scsi-disk: Use positive return value for status in dma_readv/writev
  2024-07-31 12:32 [PATCH v2 0/4] scsi-block: Fix error handling with r/werror=stop Kevin Wolf
@ 2024-07-31 12:32 ` Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 2/4] scsi-block: Don't skip callback for sgio error status/driver_status Kevin Wolf
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2024-07-31 12:32 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, pbonzini, fam, stefanha, qemu-devel

In some error cases, scsi_block_sgio_complete() never calls the passed
callback, but directly completes the request. This leads to bugs because
its error paths are not exact copies of what the callback would normally
do.

In preparation to fix this, allow passing positive return values to the
callbacks that represent the status code that should be used to complete
the request.

scsi_handle_rw_error() already handles positive values for its ret
parameter because scsi_block_sgio_complete() calls directly into it.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi/scsi-disk.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index a67092db6a..3ff6798bde 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -65,6 +65,10 @@ OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
 
 struct SCSIDiskClass {
     SCSIDeviceClass parent_class;
+    /*
+     * Callbacks receive ret == 0 for success. Errors are represented either as
+     * negative errno values, or as positive SAM status codes.
+     */
     DMAIOFunc       *dma_readv;
     DMAIOFunc       *dma_writev;
     bool            (*need_fua_emulation)(SCSICommand *cmd);
@@ -283,7 +287,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
         return true;
     }
 
-    if (ret < 0) {
+    if (ret != 0) {
         return scsi_handle_rw_error(r, ret, acct_failed);
     }
 
@@ -360,7 +364,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
 static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
 {
     assert(r->req.aiocb == NULL);
-    if (scsi_disk_req_check_error(r, ret, false)) {
+    if (scsi_disk_req_check_error(r, ret, ret > 0)) {
         goto done;
     }
 
@@ -385,9 +389,10 @@ static void scsi_dma_complete(void *opaque, int ret)
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
+    /* ret > 0 is accounted for in scsi_disk_req_check_error() */
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    } else {
+    } else if (ret == 0) {
         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
     }
     scsi_dma_complete_noio(r, ret);
@@ -403,7 +408,7 @@ static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
            qemu_get_current_aio_context());
 
     assert(r->req.aiocb == NULL);
-    if (scsi_disk_req_check_error(r, ret, false)) {
+    if (scsi_disk_req_check_error(r, ret, ret > 0)) {
         goto done;
     }
 
@@ -424,9 +429,10 @@ static void scsi_read_complete(void *opaque, int ret)
     assert(r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
+    /* ret > 0 is accounted for in scsi_disk_req_check_error() */
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    } else {
+    } else if (ret == 0) {
         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
         trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
     }
@@ -534,7 +540,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
            qemu_get_current_aio_context());
 
     assert (r->req.aiocb == NULL);
-    if (scsi_disk_req_check_error(r, ret, false)) {
+    if (scsi_disk_req_check_error(r, ret, ret > 0)) {
         goto done;
     }
 
@@ -562,9 +568,10 @@ static void scsi_write_complete(void * opaque, int ret)
     assert (r->req.aiocb != NULL);
     r->req.aiocb = NULL;
 
+    /* ret > 0 is accounted for in scsi_disk_req_check_error() */
     if (ret < 0) {
         block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
-    } else {
+    } else if (ret == 0) {
         block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
     }
     scsi_write_complete_noio(r, ret);
-- 
2.45.2



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/4] scsi-block: Don't skip callback for sgio error status/driver_status
  2024-07-31 12:32 [PATCH v2 0/4] scsi-block: Fix error handling with r/werror=stop Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 1/4] scsi-disk: Use positive return value for status in dma_readv/writev Kevin Wolf
@ 2024-07-31 12:32 ` Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 3/4] scsi-disk: Add warning comments that host_status errors take a shortcut Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 4/4] scsi-disk: Always report RESERVATION_CONFLICT to guest Kevin Wolf
  3 siblings, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2024-07-31 12:32 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, pbonzini, fam, stefanha, qemu-devel

Instead of calling into scsi_handle_rw_error() directly from
scsi_block_sgio_complete() and skipping the normal callback, go through
the normal cleanup path by calling the callback with a positive error
value.

The important difference here is not only that the code path is cleaner,
but that the callbacks set r->req.aiocb = NULL. If we skip setting this
and the error action is BLOCK_ERROR_ACTION_STOP, resuming the VM runs
into an assertion failure in scsi_read_data() or scsi_write_data()
because the dangling aiocb pointer is unexpected.

Fixes: a108557bbf ("scsi: inline sg_io_sense_from_errno() into the callers.")
Buglink: https://issues.redhat.com/browse/RHEL-50000
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi/scsi-disk.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 3ff6798bde..6e1a5c98df 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2832,16 +2832,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
         } else {
             ret = io_hdr->status;
         }
-
-        if (ret > 0) {
-            if (scsi_handle_rw_error(r, ret, true)) {
-                scsi_req_unref(&r->req);
-                return;
-            }
-
-            /* Ignore error.  */
-            ret = 0;
-        }
     }
 
     req->cb(req->cb_opaque, ret);
-- 
2.45.2



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 3/4] scsi-disk: Add warning comments that host_status errors take a shortcut
  2024-07-31 12:32 [PATCH v2 0/4] scsi-block: Fix error handling with r/werror=stop Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 1/4] scsi-disk: Use positive return value for status in dma_readv/writev Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 2/4] scsi-block: Don't skip callback for sgio error status/driver_status Kevin Wolf
@ 2024-07-31 12:32 ` Kevin Wolf
  2024-07-31 12:32 ` [PATCH v2 4/4] scsi-disk: Always report RESERVATION_CONFLICT to guest Kevin Wolf
  3 siblings, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2024-07-31 12:32 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, pbonzini, fam, stefanha, qemu-devel

scsi_block_sgio_complete() has surprising behaviour in that there are
error cases in which it directly completes the request and never calls
the passed callback. In the current state of the code, this doesn't seem
to result in bugs, but with future code changes, we must be careful to
never rely on the callback doing some cleanup until this code smell is
fixed. For now, just add warnings to make people aware of the trap.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi/scsi-disk.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 6e1a5c98df..69a195177e 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -68,6 +68,9 @@ struct SCSIDiskClass {
     /*
      * Callbacks receive ret == 0 for success. Errors are represented either as
      * negative errno values, or as positive SAM status codes.
+     *
+     * Beware: For errors returned in host_status, the function may directly
+     * complete the request and never call the callback.
      */
     DMAIOFunc       *dma_readv;
     DMAIOFunc       *dma_writev;
@@ -381,6 +384,7 @@ done:
     scsi_req_unref(&r->req);
 }
 
+/* May not be called in all error cases, don't rely on cleanup here */
 static void scsi_dma_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -421,6 +425,7 @@ done:
     scsi_req_unref(&r->req);
 }
 
+/* May not be called in all error cases, don't rely on cleanup here */
 static void scsi_read_complete(void *opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -560,6 +565,7 @@ done:
     scsi_req_unref(&r->req);
 }
 
+/* May not be called in all error cases, don't rely on cleanup here */
 static void scsi_write_complete(void * opaque, int ret)
 {
     SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -2821,6 +2827,7 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
     sg_io_hdr_t *io_hdr = &req->io_header;
 
     if (ret == 0) {
+        /* FIXME This skips calling req->cb() and any cleanup in it */
         if (io_hdr->host_status != SCSI_HOST_OK) {
             scsi_req_complete_failed(&r->req, io_hdr->host_status);
             scsi_req_unref(&r->req);
-- 
2.45.2



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 4/4] scsi-disk: Always report RESERVATION_CONFLICT to guest
  2024-07-31 12:32 [PATCH v2 0/4] scsi-block: Fix error handling with r/werror=stop Kevin Wolf
                   ` (2 preceding siblings ...)
  2024-07-31 12:32 ` [PATCH v2 3/4] scsi-disk: Add warning comments that host_status errors take a shortcut Kevin Wolf
@ 2024-07-31 12:32 ` Kevin Wolf
  3 siblings, 0 replies; 5+ messages in thread
From: Kevin Wolf @ 2024-07-31 12:32 UTC (permalink / raw)
  To: qemu-block; +Cc: kwolf, pbonzini, fam, stefanha, qemu-devel

In the case of scsi-block, RESERVATION_CONFLICT is not a backend error,
but indicates that the guest tried to make a request that it isn't
allowed to execute. Pass the error to the guest so that it can decide
what to do with it.

Without this, if we stop the VM in response to a RESERVATION_CONFLICT
(as is the default policy in management software such as oVirt or
KubeVirt), it can happen that the VM cannot be resumed any more because
every attempt to resume it immediately runs into the same error and
stops the VM again.

One case that expects RESERVATION_CONFLICT errors to be visible in the
guest is running the validation tests in Windows 2019's Failover Cluster
Manager, which intentionally tries to execute invalid requests to see if
they are properly rejected.

Buglink: https://issues.redhat.com/browse/RHEL-50000
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/scsi/scsi-disk.c | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 69a195177e..4d94b2b816 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -224,7 +224,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
     SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
     SCSISense sense = SENSE_CODE(NO_SENSE);
-    int error = 0;
+    int error;
     bool req_has_sense = false;
     BlockErrorAction action;
     int status;
@@ -235,11 +235,35 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
     } else {
         /* A passthrough command has completed with nonzero status.  */
         status = ret;
-        if (status == CHECK_CONDITION) {
+        switch (status) {
+        case CHECK_CONDITION:
             req_has_sense = true;
             error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
-        } else {
+            break;
+        case RESERVATION_CONFLICT:
+            /*
+             * Don't apply the error policy, always report to the guest.
+             *
+             * This is a passthrough code path, so it's not a backend error, but
+             * a response to an invalid guest request.
+             *
+             * Windows Failover Cluster validation intentionally sends invalid
+             * requests to verify that reservations work as intended. It is
+             * crucial that it sees the resulting errors.
+             *
+             * Treating a reservation conflict as a guest-side error is obvious
+             * when a pr-manager is in use. Without one, the situation is less
+             * clear, but there might be nothing that can be fixed on the host
+             * (like in the above example), and we don't want to be stuck in a
+             * loop where resuming the VM and retrying the request immediately
+             * stops it again. So always reporting is still the safer option in
+             * this case, too.
+             */
+            error = 0;
+            break;
+        default:
             error = EINVAL;
+            break;
         }
     }
 
@@ -249,8 +273,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
      * are usually retried immediately, so do not post them to QMP and
      * do not account them as failed I/O.
      */
-    if (req_has_sense &&
-        scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) {
+    if (!error || (req_has_sense &&
+                   scsi_sense_buf_is_guest_recoverable(r->req.sense,
+                                                       sizeof(r->req.sense)))) {
         action = BLOCK_ERROR_ACTION_REPORT;
         acct_failed = false;
     } else {
-- 
2.45.2



^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-07-31 12:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-31 12:32 [PATCH v2 0/4] scsi-block: Fix error handling with r/werror=stop Kevin Wolf
2024-07-31 12:32 ` [PATCH v2 1/4] scsi-disk: Use positive return value for status in dma_readv/writev Kevin Wolf
2024-07-31 12:32 ` [PATCH v2 2/4] scsi-block: Don't skip callback for sgio error status/driver_status Kevin Wolf
2024-07-31 12:32 ` [PATCH v2 3/4] scsi-disk: Add warning comments that host_status errors take a shortcut Kevin Wolf
2024-07-31 12:32 ` [PATCH v2 4/4] scsi-disk: Always report RESERVATION_CONFLICT to guest Kevin Wolf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).