From: "Michael S. Tsirkin" <mst@redhat.com>
To: Asias He <asias@redhat.com>
Cc: kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
target-devel@vger.kernel.org,
Stefan Hajnoczi <stefanha@redhat.com>,
Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: [PATCH v5 2/2] tcm_vhost: Wait for pending requests in vhost_scsi_flush()
Date: Tue, 16 Apr 2013 20:58:27 +0300 [thread overview]
Message-ID: <20130416175827.GB2844@redhat.com> (raw)
In-Reply-To: <1366103811-21887-3-git-send-email-asias@redhat.com>
On Tue, Apr 16, 2013 at 05:16:51PM +0800, Asias He wrote:
> This patch makes vhost_scsi_flush() wait for all the pending requests
> issued before the flush operation to be finished.
>
> Changes in v5:
> - Use kref and completion
> - Fail req if vs->vs_inflight is NULL
> - Rename tcm_vhost_alloc_inflight to tcm_vhost_set_inflight
>
> Changes in v4:
> - Introduce vhost_scsi_inflight
> - Drop array to track flush
> - Use RCU to protect vs_inflight explicitly
>
> Changes in v3:
> - Rebase
> - Drop 'tcm_vhost: Wait for pending requests in
> vhost_scsi_clear_endpoint()' in this series, we already did that in
> 'tcm_vhost: Use vq->private_data to indicate if the endpoint is setup'
>
> Changes in v2:
> - Increase/Decrease inflight requests in
> vhost_scsi_{allocate,free}_cmd and tcm_vhost_{allocate,free}_evt
>
> Signed-off-by: Asias He <asias@redhat.com>
OK looks good, except error handling needs to be fixed.
> ---
> drivers/vhost/tcm_vhost.c | 101 +++++++++++++++++++++++++++++++++++++++++++---
> drivers/vhost/tcm_vhost.h | 5 +++
> 2 files changed, 101 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
> index 4ae6725..ef40a8f 100644
> --- a/drivers/vhost/tcm_vhost.c
> +++ b/drivers/vhost/tcm_vhost.c
> @@ -74,6 +74,11 @@ enum {
> #define VHOST_SCSI_MAX_VQ 128
> #define VHOST_SCSI_MAX_EVENT 128
>
> +struct vhost_scsi_inflight {
> + struct completion comp; /* Wait for the flush operation to finish */
> + struct kref kref; /* Refcount for the inflight reqs */
> +};
> +
> struct vhost_scsi {
> /* Protected by vhost_scsi->dev.mutex */
> struct tcm_vhost_tpg **vs_tpg;
> @@ -91,6 +96,8 @@ struct vhost_scsi {
> struct mutex vs_events_lock; /* protect vs_events_dropped,events_nr */
> bool vs_events_dropped; /* any missed events */
> int vs_events_nr; /* num of pending events */
> +
> + struct vhost_scsi_inflight __rcu *vs_inflight; /* track inflight reqs */
> };
>
> /* Local pointer to allocated TCM configfs fabric module */
> @@ -108,6 +115,51 @@ static int iov_num_pages(struct iovec *iov)
> ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
> }
>
> +static int tcm_vhost_set_inflight(struct vhost_scsi *vs)
> +{
> + struct vhost_scsi_inflight *inflight;
> + int ret = -ENOMEM;
> +
> + inflight = kzalloc(sizeof(*inflight), GFP_KERNEL);
kzalloc is not needed, you initialize all fields.
> + if (inflight) {
> + kref_init(&inflight->kref);
> + init_completion(&inflight->comp);
> + ret = 0;
> + }
> + rcu_assign_pointer(vs->vs_inflight, inflight);
So if allocation fails, we stop tracking inflights?
This looks strange, and could break guests. Why not the usual
if (!inflight)
return -ENOMEM;
> + synchronize_rcu();
open call is different:
- sync is not needed
- should use RCU_INIT_POINTER and not rcu_assign_pointer
So please move these out and make this function return the struct:
struct vhost_scsi_inflight *inflight
tcm_vhost_alloc_inflight(void)
> +
> + return ret;
> +}
> +
> +static struct vhost_scsi_inflight *
> +tcm_vhost_inc_inflight(struct vhost_scsi *vs)
And then inc will not need to return inflight pointer,
which is really unusual.
> +{
> + struct vhost_scsi_inflight *inflight;
> +
> + rcu_read_lock();
> + inflight = rcu_dereference(vs->vs_inflight);
> + if (inflight)
> + kref_get(&inflight->kref);
> + rcu_read_unlock();
> +
> + return inflight;
> +}
> +
> +void tcm_vhost_done_inflight(struct kref *kref)
> +{
> + struct vhost_scsi_inflight *inflight;
> +
> + inflight = container_of(kref, struct vhost_scsi_inflight, kref);
> + complete(&inflight->comp);
> +}
> +
> +static void tcm_vhost_dec_inflight(struct vhost_scsi_inflight *inflight)
> +{
> + if (inflight)
Here as in other places, inflight must never be NULL.
Pls fix code so that invariant holds.
> + kref_put(&inflight->kref, tcm_vhost_done_inflight);
> +}
> +
> static bool tcm_vhost_check_feature(struct vhost_scsi *vs, int feature)
> {
> bool ret = false;
> @@ -402,6 +454,7 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
> static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
> {
> mutex_lock(&vs->vs_events_lock);
> + tcm_vhost_dec_inflight(evt->inflight);
> vs->vs_events_nr--;
> kfree(evt);
> mutex_unlock(&vs->vs_events_lock);
> @@ -413,21 +466,27 @@ static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
> struct tcm_vhost_evt *evt;
>
> mutex_lock(&vs->vs_events_lock);
> - if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
> - vs->vs_events_dropped = true;
> - mutex_unlock(&vs->vs_events_lock);
> - return NULL;
> - }
> + if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT)
> + goto out;
>
> evt = kzalloc(sizeof(*evt), GFP_KERNEL);
BTW it looks like we should replace this kzalloc with kmalloc.
Should be a separate patch ...
> if (evt) {
> evt->event.event = event;
> evt->event.reason = reason;
> + evt->inflight = tcm_vhost_inc_inflight(vs);
> + if (!evt->inflight) {
We drop an event because earlier
we run out of memory for allocating the inflight counter.
Does not make sense to me.
> + kfree(evt);
> + goto out;
> + }
> vs->vs_events_nr++;
> }
> mutex_unlock(&vs->vs_events_lock);
>
> return evt;
> +out:
> + vs->vs_events_dropped = true;
> + mutex_unlock(&vs->vs_events_lock);
> + return NULL;
> }
>
> static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
> @@ -445,6 +504,8 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
> kfree(tv_cmd->tvc_sgl);
> }
>
> + tcm_vhost_dec_inflight(tv_cmd->inflight);
> +
> kfree(tv_cmd);
> }
>
> @@ -595,6 +656,9 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
> tv_cmd->tvc_data_direction = data_direction;
> tv_cmd->tvc_nexus = tv_nexus;
> tv_cmd->tvc_vhost = vs;
> + tv_cmd->inflight = tcm_vhost_inc_inflight(vs);
> + if (!tv_cmd->inflight)
> + return ERR_PTR(-ENOMEM);
>
> return tv_cmd;
> }
> @@ -982,12 +1046,35 @@ static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
>
> static void vhost_scsi_flush(struct vhost_scsi *vs)
> {
> + struct vhost_scsi_inflight *inflight;
> int i;
>
> + /* inflight points to the old inflight */
> + inflight = rcu_dereference_protected(vs->vs_inflight,
> + lockdep_is_held(&vs->dev.mutex));
> +
> + /* Allocate a new inflight and make vs->vs_inflight points to it */
> + if (tcm_vhost_set_inflight(vs) < 0)
> + pr_warn("vhost_scsi_flush failed to allocate inflight\n");
That's unlikely to reach the application. How about we stop here,
and propagate the error to ioctl caller?
> +
> + /*
> + * The inflight->kref was initialized to 1. We decrement it here to
> + * indicate the start of the flush operation so that it will reach 0
> + * when all the reqs are finished.
> + */
> + kref_put(&inflight->kref, tcm_vhost_done_inflight);
> +
> + /* Flush both the vhost poll and vhost work */
> for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
> vhost_scsi_flush_vq(vs, i);
> vhost_work_flush(&vs->dev, &vs->vs_completion_work);
> vhost_work_flush(&vs->dev, &vs->vs_event_work);
> +
> + /* Wait for all reqs issued before the flush to be finished */
> + if (inflight) {
inflight should never be NULL, otherwise inflight
tracjing is not effective. Please fix error handling so we
never reach here with inflight == NULL.
> + wait_for_completion(&inflight->comp);
> + kfree(inflight);
> + }
> }
>
> /*
> @@ -1196,6 +1283,9 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
> s->vs_events_dropped = false;
> mutex_init(&s->vs_events_lock);
>
> + if (tcm_vhost_set_inflight(s) < 0)
> + return -ENOMEM;
> +
Better propagate the return code to user.
> s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick;
> s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick;
> for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++)
> @@ -1221,6 +1311,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
> vhost_scsi_clear_endpoint(s, &t);
> vhost_dev_stop(&s->dev);
> vhost_dev_cleanup(&s->dev, false);
> + kfree(s->vs_inflight);
> kfree(s);
> return 0;
> }
> diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
> index 94e9ee53..7567767 100644
> --- a/drivers/vhost/tcm_vhost.h
> +++ b/drivers/vhost/tcm_vhost.h
> @@ -2,6 +2,7 @@
> #define TCM_VHOST_NAMELEN 256
> #define TCM_VHOST_MAX_CDB_SIZE 32
>
> +struct vhost_scsi_inflight;
> struct tcm_vhost_cmd {
> /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
> int tvc_vq_desc;
> @@ -37,6 +38,8 @@ struct tcm_vhost_cmd {
> unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
> /* Completed commands list, serviced from vhost worker thread */
> struct llist_node tvc_completion_list;
> + /* Used to track inflight req */
> + struct vhost_scsi_inflight *inflight;
> };
>
> struct tcm_vhost_nexus {
> @@ -91,6 +94,8 @@ struct tcm_vhost_evt {
> struct virtio_scsi_event event;
> /* virtio_scsi event list, serviced from vhost worker thread */
> struct llist_node list;
> + /* Used to track inflight req */
> + struct vhost_scsi_inflight *inflight;
> };
>
> /*
> --
> 1.8.1.4
next prev parent reply other threads:[~2013-04-16 17:58 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-09 9:39 [PATCH] tcm_vhost: Wait for pending requests in vhost_scsi_flush() Asias He
2013-04-11 10:47 ` Michael S. Tsirkin
2013-04-12 6:25 ` Asias He
2013-04-12 11:33 ` Michael S. Tsirkin
2013-04-12 14:59 ` Asias He
2013-04-12 14:59 ` Asias He
2013-04-14 10:07 ` Michael S. Tsirkin
2013-04-14 12:38 ` Asias He
2013-04-13 3:29 ` [PATCH v4 0/2] tcm_vhost flush Asias He
2013-04-16 9:16 ` [PATCH v5 " Asias He
2013-04-16 9:16 ` Asias He
2013-04-16 9:16 ` [PATCH v5 1/2] tcm_vhost: Pass vhost_scsi to vhost_scsi_allocate_cmd Asias He
2013-04-16 9:16 ` Asias He
2013-04-16 9:16 ` [PATCH v5 2/2] tcm_vhost: Wait for pending requests in vhost_scsi_flush() Asias He
2013-04-16 17:58 ` Michael S. Tsirkin [this message]
2013-04-17 1:29 ` Asias He
2013-04-17 10:07 ` Michael S. Tsirkin
2013-04-17 12:07 ` Asias He
2013-04-13 3:29 ` [PATCH v4 1/2] tcm_vhost: Pass vhost_scsi to vhost_scsi_allocate_cmd Asias He
2013-04-13 3:29 ` [PATCH v4 2/2] tcm_vhost: Wait for pending requests in vhost_scsi_flush() Asias He
2013-04-13 3:29 ` Asias He
2013-04-14 9:58 ` Michael S. Tsirkin
2013-04-14 12:27 ` Asias He
2013-04-14 12:27 ` Asias He
2013-04-15 7:18 ` Asias He
2013-04-15 10:11 ` Michael S. Tsirkin
2013-04-16 0:35 ` Asias He
2013-04-14 9:58 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130416175827.GB2844@redhat.com \
--to=mst@redhat.com \
--cc=asias@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=stefanha@redhat.com \
--cc=target-devel@vger.kernel.org \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.