All of lore.kernel.org
 help / color / mirror / Atom feed
From: Markus Armbruster <armbru@redhat.com>
To: Li Feng <fengli@smartx.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
	"Raphael Norwitz" <raphael.norwitz@nutanix.com>,
	"Kevin Wolf" <kwolf@redhat.com>,
	"Hanna Reitz" <hreitz@redhat.com>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Fam Zheng" <fam@euphon.net>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Viresh Kumar" <viresh.kumar@linaro.org>,
	qemu-block@nongnu.org (open list:Block layer core),
	qemu-devel@nongnu.org (open list:All patches CC here)
Subject: Re: [PATCH v3 4/5] vhost-user-scsi: support reconnect to backend
Date: Fri, 01 Sep 2023 14:00:38 +0200	[thread overview]
Message-ID: <878r9q85nd.fsf@pond.sub.org> (raw)
In-Reply-To: <20230731121018.2856310-5-fengli@smartx.com> (Li Feng's message of "Mon, 31 Jul 2023 20:10:09 +0800")

Li Feng <fengli@smartx.com> writes:

> If the backend crashes and restarts, the device is broken.
> This patch adds reconnect for vhost-user-scsi.
>
> Tested with spdk backend.
>
> Signed-off-by: Li Feng <fengli@smartx.com>
> ---
>  hw/scsi/vhost-user-scsi.c           | 199 +++++++++++++++++++++++++---
>  include/hw/virtio/vhost-user-scsi.h |   4 +
>  2 files changed, 184 insertions(+), 19 deletions(-)
>
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index ee99b19e7a..5bf012461b 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -43,26 +43,54 @@ enum VhostUserProtocolFeature {
>      VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
>  };
>  
> +static int vhost_user_scsi_start(VHostUserSCSI *s)
> +{
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    int ret;
> +
> +    ret = vhost_scsi_common_start(vsc);
> +    s->started_vu = (ret < 0 ? false : true);
> +
> +    return ret;
> +}
> +
> +static void vhost_user_scsi_stop(VHostUserSCSI *s)
> +{
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +
> +    if (!s->started_vu) {
> +        return;
> +    }
> +    s->started_vu = false;
> +
> +    vhost_scsi_common_stop(vsc);
> +}
> +
>  static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status)
>  {
>      VHostUserSCSI *s = (VHostUserSCSI *)vdev;
> +    DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
>      VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> -    bool start = (status & VIRTIO_CONFIG_S_DRIVER_OK) && vdev->vm_running;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    bool should_start = virtio_device_should_start(vdev, status);
> +    int ret;
>  
> -    if (vhost_dev_is_started(&vsc->dev) == start) {
> +    if (!s->connected) {
>          return;
>      }
>  
> -    if (start) {
> -        int ret;
> +    if (vhost_dev_is_started(&vsc->dev) == should_start) {
> +        return;
> +    }
>  
> -        ret = vhost_scsi_common_start(vsc);
> +    if (should_start) {
> +        ret = vhost_user_scsi_start(s);
>          if (ret < 0) {
>              error_report("unable to start vhost-user-scsi: %s", strerror(-ret));
> -            exit(1);
> +            qemu_chr_fe_disconnect(&vs->conf.chardev);
>          }
>      } else {
> -        vhost_scsi_common_stop(vsc);
> +        vhost_user_scsi_stop(s);
>      }
>  }
>  
> @@ -89,14 +117,126 @@ static void vhost_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>  }
>  
> +static int vhost_user_scsi_connect(DeviceState *dev, Error **errp)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    int ret = 0;
> +
> +    if (s->connected) {
> +        return 0;
> +    }
> +    s->connected = true;
> +
> +    vsc->dev.num_queues = vs->conf.num_queues;
> +    vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
> +    vsc->dev.vqs = s->vhost_vqs;
> +    vsc->dev.vq_index = 0;
> +    vsc->dev.backend_features = 0;
> +
> +    ret = vhost_dev_init(&vsc->dev, &s->vhost_user, VHOST_BACKEND_TYPE_USER, 0,
> +                         errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    /* restore vhost state */
> +    if (virtio_device_started(vdev, vdev->status)) {
> +        ret = vhost_user_scsi_start(s);
> +        if (ret < 0) {
> +            return ret;
> +        }

Fails without setting an error, violating the function's (tacit)
postcondition.  Callers:

* vhost_user_scsi_event()

  Dereferences the null error and crashes.

* vhost_user_scsi_realize_connect()

  Also fails without setting an error.  Caller:

  - vhost_user_scsi_realize()

    1. Doesn't emit the "Reconnecting after error: " message.  See
       also below.

    2. Succeeds instead of failing!

> +    }
> +
> +    return 0;
> +}
> +
> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event);
> +
> +static void vhost_user_scsi_disconnect(DeviceState *dev)
> +{
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +
> +    if (!s->connected) {
> +        return;
> +    }
> +    s->connected = false;
> +
> +    vhost_user_scsi_stop(s);
> +
> +    vhost_dev_cleanup(&vsc->dev);
> +
> +    /* Re-instate the event handler for new connections */
> +    qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL,
> +                             vhost_user_scsi_event, NULL, dev, NULL, true);
> +}
> +
> +static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
> +{
> +    DeviceState *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
> +    VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    Error *local_err = NULL;
> +
> +    switch (event) {
> +    case CHR_EVENT_OPENED:
> +        if (vhost_user_scsi_connect(dev, &local_err) < 0) {
> +            error_report_err(local_err);
> +            qemu_chr_fe_disconnect(&vs->conf.chardev);
> +            return;
> +        }
> +        break;
> +    case CHR_EVENT_CLOSED:
> +        /* defer close until later to avoid circular close */
> +        vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
> +                               vhost_user_scsi_disconnect);
> +        break;
> +    case CHR_EVENT_BREAK:
> +    case CHR_EVENT_MUX_IN:
> +    case CHR_EVENT_MUX_OUT:
> +        /* Ignore */
> +        break;
> +    }
> +}
> +
> +static int vhost_user_scsi_realize_connect(VHostUserSCSI *s, Error **errp)
> +{
> +    DeviceState *dev = &s->parent_obj.parent_obj.parent_obj.parent_obj;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
> +    int ret;
> +
> +    s->connected = false;
> +
> +    ret = qemu_chr_fe_wait_connected(&vs->conf.chardev, errp);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = vhost_user_scsi_connect(dev, errp);
> +    if (ret < 0) {
> +        qemu_chr_fe_disconnect(&vs->conf.chardev);
> +        return ret;
> +    }
> +    assert(s->connected);
> +
> +    return 0;
> +}
> +
>  static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>      VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>      VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> -    struct vhost_virtqueue *vqs = NULL;
>      Error *err = NULL;
>      int ret;
> +    int retries = VU_REALIZE_CONN_RETRIES;
>  
>      if (!vs->conf.chardev.chr) {
>          error_setg(errp, "vhost-user-scsi: missing chardev");
> @@ -115,18 +255,28 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
>          goto free_virtio;
>      }
>  
> -    vsc->dev.nvqs = VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
> -    vsc->dev.vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
> -    vsc->dev.vq_index = 0;
> -    vsc->dev.backend_features = 0;
> -    vqs = vsc->dev.vqs;
> +    vsc->inflight = g_new0(struct vhost_inflight, 1);
> +    s->vhost_vqs = g_new0(struct vhost_virtqueue,
> +                          VIRTIO_SCSI_VQ_NUM_FIXED + vs->conf.num_queues);
> +
> +    assert(!*errp);

Dereferencing *errp is wrong.  Quoting qapi/error.h's big comment:

 * = Why, when and how to use ERRP_GUARD() =
 *
 * Without ERRP_GUARD(), use of the @errp parameter is restricted:
 * - It must not be dereferenced, because it may be null.
 * - It should not be passed to error_prepend() or
 *   error_append_hint(), because that doesn't work with &error_fatal.
 * ERRP_GUARD() lifts these restrictions.

See there for how to use ERRP_GUARD().

> +    do {
> +        if (*errp) {
> +            error_prepend(errp, "Reconnecting after error: ");
> +            error_report_err(*errp);
> +            *errp = NULL;
> +        }
> +        ret = vhost_user_scsi_realize_connect(s, errp);
> +    } while (ret < 0 && retries--);

Reports "Reconnecting ..." to the HMP monitor when in HMP context, else
to stderr.  I.e. reports to stderr when we're in QMP context.  Is this
really what we want?

>  
> -    ret = vhost_dev_init(&vsc->dev, &s->vhost_user,
> -                         VHOST_BACKEND_TYPE_USER, 0, errp);
>      if (ret < 0) {
>          goto free_vhost;
>      }
>  
> +    /* we're fully initialized, now we can operate, so add the handler */
> +    qemu_chr_fe_set_handlers(&vs->conf.chardev,  NULL, NULL,
> +                             vhost_user_scsi_event, NULL, (void *)dev,
> +                             NULL, true);
>      /* Channel and lun both are 0 for bootable vhost-user-scsi disk */
>      vsc->channel = 0;
>      vsc->lun = 0;
> @@ -135,8 +285,12 @@ static void vhost_user_scsi_realize(DeviceState *dev, Error **errp)
>      return;
>  
>  free_vhost:
> +    g_free(s->vhost_vqs);
> +    s->vhost_vqs = NULL;
> +    g_free(vsc->inflight);
> +    vsc->inflight = NULL;
>      vhost_user_cleanup(&s->vhost_user);
> -    g_free(vqs);
> +
>  free_virtio:
>      virtio_scsi_common_unrealize(dev);
>  }
> @@ -146,16 +300,23 @@ static void vhost_user_scsi_unrealize(DeviceState *dev)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VHostUserSCSI *s = VHOST_USER_SCSI(dev);
>      VHostSCSICommon *vsc = VHOST_SCSI_COMMON(s);
> -    struct vhost_virtqueue *vqs = vsc->dev.vqs;
> +    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev);
>  
>      /* This will stop the vhost backend. */
>      vhost_user_scsi_set_status(vdev, 0);
> +    qemu_chr_fe_set_handlers(&vs->conf.chardev, NULL, NULL, NULL, NULL, NULL,
> +                             NULL, false);
>  
>      vhost_dev_cleanup(&vsc->dev);
> -    g_free(vqs);
> +    g_free(s->vhost_vqs);
> +    s->vhost_vqs = NULL;
> +
> +    vhost_dev_free_inflight(vsc->inflight);
> +    g_free(vsc->inflight);
> +    vsc->inflight = NULL;
>  
> -    virtio_scsi_common_unrealize(dev);
>      vhost_user_cleanup(&s->vhost_user);
> +    virtio_scsi_common_unrealize(dev);
>  }
>  
>  static Property vhost_user_scsi_properties[] = {
> diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h
> index 521b08e559..b405ec952a 100644
> --- a/include/hw/virtio/vhost-user-scsi.h
> +++ b/include/hw/virtio/vhost-user-scsi.h
> @@ -29,6 +29,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCSI, VHOST_USER_SCSI)
>  struct VHostUserSCSI {
>      VHostSCSICommon parent_obj;
>      VhostUserState vhost_user;
> +    bool connected;
> +    bool started_vu;
> +
> +    struct vhost_virtqueue *vhost_vqs;
>  };
>  
>  #endif /* VHOST_USER_SCSI_H */



  parent reply	other threads:[~2023-09-01 12:01 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-21 10:51 [PATCH] vhost-user-scsi: support reconnect to backend Li Feng
2023-07-24 17:21 ` Raphael Norwitz
2023-07-24 20:30   ` Michael S. Tsirkin
2023-07-25 10:19   ` Li Feng
2023-07-27 21:21     ` Raphael Norwitz
2023-07-28  7:48       ` Li Feng
2023-07-30 22:09         ` Raphael Norwitz
2023-07-31 11:32           ` Li Feng
2023-07-25 10:42 ` [PATCH v2 0/4] Implement reconnect for vhost-user-scsi Li Feng
2023-07-25 10:42   ` [PATCH v2 1/4] vhost: fix the fd leak Li Feng
2023-07-30 22:09     ` Raphael Norwitz
2023-07-25 10:42   ` [PATCH v2 2/4] vhost-user-common: send get_inflight_fd once Li Feng
2023-07-28  6:04     ` Michael S. Tsirkin
2023-07-28  7:49       ` Li Feng
2023-07-30 22:13         ` Raphael Norwitz
2023-07-31 11:38           ` Li Feng
2023-07-31 23:11             ` Raphael Norwitz
2023-07-25 10:42   ` [PATCH v2 3/4] vhost: move and rename the conn retry times Li Feng
2023-07-30 22:13     ` Raphael Norwitz
2023-07-25 10:42   ` [PATCH v2 4/4] vhost-user-scsi: support reconnect to backend Li Feng
2023-07-25 10:58     ` Li Feng
2023-07-30 22:14     ` Raphael Norwitz
2023-07-31 10:24       ` Li Feng
2023-07-31 12:10 ` [PATCH v3 0/5] Implement reconnect for vhost-user-scsi Li Feng
2023-07-31 12:10   ` [PATCH v3 1/5] vhost: fix the fd leak Li Feng
2023-08-03 13:41     ` Fiona Ebner
2023-07-31 12:10   ` [PATCH v3 2/5] vhost-user-common: send get_inflight_fd once Li Feng
2023-07-31 23:34     ` Raphael Norwitz
2023-07-31 12:10   ` [PATCH v3 3/5] vhost: move and rename the conn retry times Li Feng
2023-07-31 12:10   ` [PATCH v3 4/5] vhost-user-scsi: support reconnect to backend Li Feng
2023-07-31 23:35     ` Raphael Norwitz
2023-09-01 12:00     ` Markus Armbruster [this message]
2023-09-12  8:29       ` Li Feng
2023-07-31 12:10   ` [PATCH v3 5/5] vhost-user-scsi: start vhost when guest kicks Li Feng
2023-07-31 23:35     ` Raphael Norwitz
2023-09-01 11:44     ` Markus Armbruster
2023-09-12  7:53       ` Li Feng
2023-09-12  8:52 ` [PATCH v4 0/5] Implement reconnect for vhost-user-scsi Li Feng
2023-09-12  8:52   ` [PATCH v4 1/5] vhost-user-common: send get_inflight_fd once Li Feng
2023-09-12  8:52   ` [PATCH v4 2/5] vhost: move and rename the conn retry times Li Feng
2023-09-12  8:52   ` [PATCH v4 3/5] vhost-user-scsi: support reconnect to backend Li Feng
2023-09-12  8:52   ` [PATCH v4 4/5] vhost-user-scsi: start vhost when guest kicks Li Feng
2023-09-12  8:52   ` [PATCH v4 5/5] vhost-user: fix lost reconnect Li Feng
2023-09-19 11:40 ` [PATCH v5 0/5] Implement reconnect for vhost-user-scsi Li Feng
2023-09-19 11:40   ` [PATCH v5 1/5] vhost-user-common: send get_inflight_fd once Li Feng
2023-09-19 11:40   ` [PATCH v5 2/5] vhost: move and rename the conn retry times Li Feng
2023-09-19 11:40   ` [PATCH v5 3/5] vhost-user-scsi: support reconnect to backend Li Feng
2023-09-19 11:40   ` [PATCH v5 4/5] vhost-user-scsi: start vhost when guest kicks Li Feng
2023-09-19 11:40   ` [PATCH v5 5/5] vhost-user: fix lost reconnect Li Feng
2023-09-22 11:46 ` [PATCH v6 0/5] Implement reconnect for vhost-user-scsi Li Feng
2023-09-22 11:46   ` [PATCH v6 1/5] vhost-user-common: send get_inflight_fd once Li Feng
2023-09-29  0:54     ` Raphael Norwitz
2023-10-08  8:49       ` Li Feng
2023-10-08  8:50         ` Michael S. Tsirkin
2023-10-08  8:53           ` Li Feng
2023-09-22 11:46   ` [PATCH v6 2/5] vhost: move and rename the conn retry times Li Feng
2023-09-22 11:46   ` [PATCH v6 3/5] vhost-user-scsi: support reconnect to backend Li Feng
2023-09-29  0:55     ` Raphael Norwitz
2023-10-08  8:45       ` Li Feng
2023-09-22 11:46   ` [PATCH v6 4/5] vhost-user-scsi: start vhost when guest kicks Li Feng
2023-09-29  0:55     ` Raphael Norwitz
2023-09-22 11:46   ` [PATCH v6 5/5] vhost-user: fix lost reconnect Li Feng
2023-09-29  0:55     ` Raphael Norwitz
2023-09-29  0:55     ` Raphael Norwitz
2023-10-08  8:49   ` [PATCH v6 0/5] Implement reconnect for vhost-user-scsi Michael S. Tsirkin
2023-10-08  8:53     ` Li Feng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=878r9q85nd.fsf@pond.sub.org \
    --to=armbru@redhat.com \
    --cc=alex.bennee@linaro.org \
    --cc=fam@euphon.net \
    --cc=fengli@smartx.com \
    --cc=hreitz@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=raphael.norwitz@nutanix.com \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.