Linux virtualization list
 help / color / mirror / Atom feed
* Re: [PATCH v2] virtio-scsi: hotplug support for virtio-scsi
From: mc @ 2012-07-04  8:11 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: stefanha, linux-scsi, senwang, zwanp, linuxram, linux-kernel,
	virtualization
In-Reply-To: <4FF2DC6C.1010506@redhat.com>


Quoting Paolo Bonzini <pbonzini@redhat.com>:

> Il 03/07/2012 07:41, Cong Meng ha scritto:
>> This patch implements the hotplug support for virtio-scsi.
>> When there is a device attached/detached, the virtio-scsi driver will be
>> signaled via event virtual queue and it will add/remove the scsi device
>> in question automatically.
>>
>> v2: handle no_event event
>>
>> Signed-off-by: Cong Meng <mc@linux.vnet.ibm.com>
>> Signed-off-by: Sen Wang <senwang@linux.vnet.ibm.com>
>
> The SoB lines are swapped.  Otherwise looks good.  Since you have to
> respin, please add dropped event support too, it shouldn't be hard.

What does "The SoB lines are swapped" mean? should the changelog be  
placed after SoB lines?

>
> Paolo
>
>> ---
>>  drivers/scsi/virtio_scsi.c  |  113  
>> ++++++++++++++++++++++++++++++++++++++++++-
>>  include/linux/virtio_scsi.h |    9 ++++
>>  2 files changed, 121 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
>> index 9fc5e67..e44b2d6 100644
>> --- a/drivers/scsi/virtio_scsi.c
>> +++ b/drivers/scsi/virtio_scsi.c
>> @@ -25,6 +25,7 @@
>>  #include <scsi/scsi_cmnd.h>
>>
>>  #define VIRTIO_SCSI_MEMPOOL_SZ 64
>> +#define VIRTIO_SCSI_EVENT_LEN 8
>>
>>  /* Command queue element */
>>  struct virtio_scsi_cmd {
>> @@ -43,6 +44,12 @@ struct virtio_scsi_cmd {
>>  	} resp;
>>  } ____cacheline_aligned_in_smp;
>>
>> +struct virtio_scsi_event_node {
>> +	struct virtio_scsi *vscsi;
>> +	struct virtio_scsi_event event;
>> +	struct work_struct work;
>> +};
>> +
>>  struct virtio_scsi_vq {
>>  	/* Protects vq */
>>  	spinlock_t vq_lock;
>> @@ -67,6 +74,9 @@ struct virtio_scsi {
>>  	struct virtio_scsi_vq event_vq;
>>  	struct virtio_scsi_vq req_vq;
>>
>> +	/* Get some buffers reday for event vq */
>> +	struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
>> +
>>  	struct virtio_scsi_target_state *tgt[];
>>  };
>>
>> @@ -202,6 +212,97 @@ static void virtscsi_ctrl_done(struct virtqueue *vq)
>>  	spin_unlock_irqrestore(&vscsi->ctrl_vq.vq_lock, flags);
>>  };
>>
>> +static int virtscsi_kick_event(struct virtio_scsi *vscsi,
>> +			       struct virtio_scsi_event_node *event_node)
>> +{
>> +	int ret;
>> +	struct scatterlist sg;
>> +	unsigned long flags;
>> +
>> +	sg_set_buf(&sg, &event_node->event, sizeof(struct virtio_scsi_event));
>> +
>> +	spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
>> +
>> +	ret = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1,  
>> event_node, GFP_ATOMIC);
>> +	if (ret >= 0)
>> +		virtqueue_kick(vscsi->event_vq.vq);
>> +
>> +	spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
>> +
>> +	return ret;
>> +}
>> +
>> +static int virtscsi_kick_event_all(struct virtio_scsi *vscsi)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < VIRTIO_SCSI_EVENT_LEN; i++) {
>> +		vscsi->event_list[i].vscsi = vscsi;
>> +		virtscsi_kick_event(vscsi, &vscsi->event_list[i]);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
>> +                                            struct  
>> virtio_scsi_event *event)
>> +{
>> +	struct scsi_device *sdev;
>> +	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
>> +	unsigned int target = event->lun[1];
>> +	unsigned int lun = (event->lun[2] << 8) | event->lun[3];
>> +
>> +	switch (event->reason) {
>> +	case VIRTIO_SCSI_EVT_RESET_RESCAN:
>> +		scsi_add_device(shost, 0, target, lun);
>> +		break;
>> +	case VIRTIO_SCSI_EVT_RESET_REMOVED:
>> +		sdev = scsi_device_lookup(shost, 0, target, lun);
>> +		if (sdev) {
>> +			scsi_remove_device(sdev);
>> +			scsi_device_put(sdev);
>> +		} else {
>> +			pr_err("SCSI device %d 0 %d %d not found\n",
>> +				shost->host_no, target, lun);
>> +		}
>> +		break;
>> +	default:
>> +		pr_info("Unsupport virtio scsi event reason %x\n", event->reason);
>> +	}
>> +}
>> +
>> +static void virtscsi_handle_event(struct work_struct *work)
>> +{
>> +	struct virtio_scsi_event_node *event_node =
>> +		container_of(work, struct virtio_scsi_event_node, work);
>> +	struct virtio_scsi *vscsi = event_node->vscsi;
>> +	struct virtio_scsi_event *event = &event_node->event;
>> +
>> +	if (event->event & VIRTIO_SCSI_T_EVENTS_MISSED) {
>> +		event->event &= (~VIRTIO_SCSI_T_EVENTS_MISSED);
>> +		/* FIXME: handle event missed here */
>> +	}
>> +
>> +	switch (event->event) {
>> +	case VIRTIO_SCSI_T_NO_EVENT:
>> +		break;
>> +	case VIRTIO_SCSI_T_TRANSPORT_RESET:
>> +		virtscsi_handle_transport_reset(vscsi, event);
>> +		break;
>> +	default:
>> +		pr_err("Unsupport virtio scsi event %x\n", event->event);
>> +	}
>> +	virtscsi_kick_event(vscsi, event_node);
>> +}
>> +
>> +static void virtscsi_complete_event(void *buf)
>> +{
>> +	struct virtio_scsi_event_node *event_node = buf;
>> +
>> +	INIT_WORK(&event_node->work, virtscsi_handle_event);
>> +	schedule_work(&event_node->work);
>> +}
>> +
>>  static void virtscsi_event_done(struct virtqueue *vq)
>>  {
>>  	struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
>> @@ -209,7 +310,7 @@ static void virtscsi_event_done(struct virtqueue *vq)
>>  	unsigned long flags;
>>
>>  	spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
>> -	virtscsi_vq_done(vq, virtscsi_complete_free);
>> +	virtscsi_vq_done(vq, virtscsi_complete_event);
>>  	spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
>>  };
>>
>> @@ -510,6 +611,10 @@ static int virtscsi_init(struct virtio_device *vdev,
>>  	virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
>>  	virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
>>
>> +	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
>> +		virtscsi_kick_event_all(vscsi);
>> +	}
>> +
>>  	/* We need to know how many segments before we allocate.  */
>>  	sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
>>
>> @@ -608,7 +713,13 @@ static struct virtio_device_id id_table[] = {
>>  	{ 0 },
>>  };
>>
>> +static unsigned int features[] = {
>> +	VIRTIO_SCSI_F_HOTPLUG
>> +};
>> +
>>  static struct virtio_driver virtio_scsi_driver = {
>> +	.feature_table = features,
>> +	.feature_table_size = ARRAY_SIZE(features),
>>  	.driver.name = KBUILD_MODNAME,
>>  	.driver.owner = THIS_MODULE,
>>  	.id_table = id_table,
>> diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
>> index 8ddeafd..dc8d305 100644
>> --- a/include/linux/virtio_scsi.h
>> +++ b/include/linux/virtio_scsi.h
>> @@ -69,6 +69,10 @@ struct virtio_scsi_config {
>>  	u32 max_lun;
>>  } __packed;
>>
>> +/* Feature Bits */
>> +#define VIRTIO_SCSI_F_INOUT                    0
>> +#define VIRTIO_SCSI_F_HOTPLUG                  1
>> +
>>  /* Response codes */
>>  #define VIRTIO_SCSI_S_OK                       0
>>  #define VIRTIO_SCSI_S_OVERRUN                  1
>> @@ -105,6 +109,11 @@ struct virtio_scsi_config {
>>  #define VIRTIO_SCSI_T_TRANSPORT_RESET          1
>>  #define VIRTIO_SCSI_T_ASYNC_NOTIFY             2
>>
>> +/* Reasons of transport reset event */
>> +#define VIRTIO_SCSI_EVT_RESET_HARD             0
>> +#define VIRTIO_SCSI_EVT_RESET_RESCAN           1
>> +#define VIRTIO_SCSI_EVT_RESET_REMOVED          2
>> +
>>  #define VIRTIO_SCSI_S_SIMPLE                   0
>>  #define VIRTIO_SCSI_S_ORDERED                  1
>>  #define VIRTIO_SCSI_S_HEAD                     2
>>

^ permalink raw reply

* Re: RFD: virtio balloon API use (was Re: [PATCH 5 of 5] virtio: expose added descriptors immediately)
From: Michael S. Tsirkin @ 2012-07-04 10:55 UTC (permalink / raw)
  To: Rusty Russell; +Cc: virtualization, Rafael Aquini, kvm, linux-kernel
In-Reply-To: <87vci5wtzh.fsf@rustcorp.com.au>

On Tue, Jul 03, 2012 at 10:17:46AM +0930, Rusty Russell wrote:
> On Mon, 2 Jul 2012 13:08:19 -0300, Rafael Aquini <aquini@redhat.com> wrote:
> > As 'locking in balloon', may I assume the approach I took for the compaction case
> > is OK and aligned to address these concerns of yours? If not, do not hesitate in
> > giving me your thoughts, please. I'm respinning a V3 series to address a couple
> > of extra nitpicks from the compaction standpoint, and I'd love to be able to
> > address any extra concern you might have on the balloon side of that work.
> 
> It's orthogonal, though looks like they clash textually :(
> 
> I'll re-spin MST's patch on top of yours, and include both in my tree,
> otherwise linux-next will have to do the merge.  But I'll await your
> push before pushing to Linus next merge window.
> 
> Thanks,
> Rusty.

While theoretical mine is a bugfix so could be 3.5 material, no?

-- 
MST

^ permalink raw reply

* Re: RFD: virtio balloon API use (was Re: [PATCH 5 of 5] virtio: expose added descriptors immediately)
From: Michael S. Tsirkin @ 2012-07-04 10:55 UTC (permalink / raw)
  To: Rafael Aquini; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <20120702160814.GB1750@t510.redhat.com>

On Mon, Jul 02, 2012 at 01:08:19PM -0300, Rafael Aquini wrote:
> On Mon, Jul 02, 2012 at 10:25:58AM +0300, Michael S. Tsirkin wrote:
> > On Mon, Jul 02, 2012 at 10:35:47AM +0930, Rusty Russell wrote:
> > > On Sun, 1 Jul 2012 12:20:51 +0300, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > On Thu, Nov 03, 2011 at 06:12:53PM +1030, Rusty Russell wrote:
> > > > > A virtio driver does virtqueue_add_buf() multiple times before finally
> > > > > calling virtqueue_kick(); previously we only exposed the added buffers
> > > > > in the virtqueue_kick() call.  This means we don't need a memory
> > > > > barrier in virtqueue_add_buf(), but it reduces concurrency as the
> > > > > device (ie. host) can't see the buffers until the kick.
> > > > > 
> > > > > Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> > > > 
> > > > Looking at recent mm compaction patches made me look at locking
> > > > in balloon closely. And I noticed the referenced patch (commit
> > > > ee7cd8981e15bcb365fc762afe3fc47b8242f630 upstream) interacts strangely
> > > > with virtio balloon; balloon currently does:
> > > > 
> > > > static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
> > > > {
> > > >         struct scatterlist sg;
> > > > 
> > > >         sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > > > 
> > > >         init_completion(&vb->acked);
> > > > 
> > > >         /* We should always be able to add one buffer to an empty queue. */
> > > >         if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
> > > >                 BUG();
> > > >         virtqueue_kick(vq);
> > > > 
> > > >         /* When host has read buffer, this completes via balloon_ack */
> > > >         wait_for_completion(&vb->acked);
> > > > }
> > > > 
> > > > 
> > > > While vq callback does:
> > > > 
> > > > static void balloon_ack(struct virtqueue *vq)
> > > > {
> > > >         struct virtio_balloon *vb;
> > > >         unsigned int len;
> > > > 
> > > >         vb = virtqueue_get_buf(vq, &len);
> > > >         if (vb)
> > > >                 complete(&vb->acked);
> > > > }
> > > > 
> > > > 
> > > > So virtqueue_get_buf might now run concurrently with virtqueue_kick.
> > > > I audited both and this seems safe in practice but I think
> > > 
> > > Good spotting!
> > > 
> > > Agreed.  Because there's only add_buf, we get away with it: the add_buf
> > > must be almost finished by the time get_buf runs because the device has
> > > seen the buffer.
> > > 
> > > > we need to either declare this legal at the API level
> > > > or add locking in driver.
> > > 
> > > I wonder if we should just lock in the balloon driver, rather than
> > > document this corner case and set a bad example.
> > 
> > We'll need to replace &vb->acked with a waitqueue
> > and do get_buf from the same thread.
> > But I note that stats_request hash the same issue.
> > Let's see if we can fix it.
> > 
> > > Are there other
> > > drivers which take the same shortcut?
> > 
> > Not that I know.
> > 
> > > > Further, is there a guarantee that we never get
> > > > spurious callbacks?  We currently check ring not empty
> > > > but esp for non shared MSI this might not be needed.
> > > 
> > > Yes, I think this saves us.  A spurious interrupt won't trigger
> > > a spurious callback.
> > > 
> > > > If a spurious callback triggers, virtqueue_get_buf can run
> > > > concurrently with virtqueue_add_buf which is known to be racy.
> > > > Again I think this is currently safe as no spurious callbacks in
> > > > practice but should we guarantee no spurious callbacks at the API level
> > > > or add locking in driver?
> > > 
> > > I think we should guarantee it, but is there a hole in the current
> > > implementation?
> > > 
> > > Thanks,
> > > Rusty.
> > 
> > Could be. The check for ring empty looks somewhat suspicious.
> > It might be expensive to make it 100% robust - that check was
> > intended as an optimization for shared interrupts.
> > Whith per vq interrupts we IMO do not need the check.
> > If we add locking in balloon I think there's no need
> > to guarantee no spurious interrupts.
> > 
> 
> As 'locking in balloon', may I assume the approach I took for the compaction case
> is OK and aligned to address these concerns of yours?

No, I mean the patch I posted. Not so much locking as moving
get_buf to thread itself.

> If not, do not hesitate in
> giving me your thoughts, please. I'm respinning a V3 series to address a couple
> of extra nitpicks from the compaction standpoint, and I'd love to be able to
> address any extra concern you might have on the balloon side of that work.
> 
> 
> Thanks!
> Rafael.

-- 
MST

^ permalink raw reply

* Re: [PATCH v2] virtio-scsi: hotplug support for virtio-scsi
From: Paolo Bonzini @ 2012-07-04 12:35 UTC (permalink / raw)
  To: mc
  Cc: stefanha, linux-scsi, senwang, zwanp, linuxram, linux-kernel,
	virtualization
In-Reply-To: <20120704041124.Horde.MN5WeZir309P8-qsGUCj3vA@imap.linux.ibm.com>

Il 04/07/2012 10:11, mc@linux.vnet.ibm.com ha scritto:
>>>
>>>
>>> Signed-off-by: Cong Meng <mc@linux.vnet.ibm.com>
>>> Signed-off-by: Sen Wang <senwang@linux.vnet.ibm.com>
>>
>> The SoB lines are swapped.  Otherwise looks good.  Since you have to
>> respin, please add dropped event support too, it shouldn't be hard.
> 
> What does "The SoB lines are swapped" mean? should the changelog be
> placed after SoB lines?

No, the patch sender should be the last SoB line.  Sen Wang should hence
come before you, not after.

Paolo

^ permalink raw reply

* [PATCH] virtio-blk spec: document topology fields
From: Paolo Bonzini @ 2012-07-04 14:02 UTC (permalink / raw)
  To: virtualization, kvm, rusty

This completes the changes from yesterday.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virtio-spec.lyx |   62 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/virtio-spec.lyx b/virtio-spec.lyx
index 440af3e..905e619 100644
--- a/virtio-spec.lyx
+++ b/virtio-spec.lyx
@@ -5211,34 +5211,88 @@ struct virtio_blk_config {
 
 \begin_layout Plain Layout
 
-\change_inserted 1531152142 1341301807
+\change_inserted 1531152142 1341409825
 
     struct virtio_blk_topology {
 \end_layout
 
 \begin_layout Plain Layout
 
-\change_inserted 1531152142 1341301810
+\change_inserted 1531152142 1341409827
+
+        // # of logical blocks per physical block (log2)
+\change_inserted 1531152142 1341410119
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 1531152142 1341409834
 
         u8 physical_block_exp;
 \end_layout
 
 \begin_layout Plain Layout
 
-\change_inserted 1531152142 1341301817
+\change_inserted 1531152142 1341409834
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 1531152142 1341409836
+
+        // offset of first aligned logical block
+\change_inserted 1531152142 1341410122
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 1531152142 1341409840
 
         u8 alignment_offset;
 \end_layout
 
 \begin_layout Plain Layout
 
-\change_inserted 1531152142 1341301822
+\change_inserted 1531152142 1341409840
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 1531152142 1341409842
+
+        // suggested minimum I/O size in blocks
+\change_inserted 1531152142 1341410124
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 1531152142 1341409846
 
         u16 min_io_size;
 \end_layout
 
 \begin_layout Plain Layout
 
+\change_inserted 1531152142 1341409846
+
+\end_layout
+
+\begin_layout Plain Layout
+
+\change_inserted 1531152142 1341409848
+
+        // optimal (suggested maximum) I/O size in blocks
+\change_inserted 1531152142 1341410126
+
+\end_layout
+
+\begin_layout Plain Layout
+
 \change_inserted 1531152142 1341301827
 
         u32 opt_io_size;
-- 
1.7.10.2


^ permalink raw reply related

* Re: [PATCH 0/6] tcm_vhost/virtio-scsi WIP code for-3.6
From: Michael S. Tsirkin @ 2012-07-04 14:02 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, Zhi Yong Wu,
	Anthony Liguori, target-devel, linux-scsi, Paolo Bonzini, lf-virt,
	Christoph Hellwig
In-Reply-To: <1341375846-27882-1-git-send-email-nab@linux-iscsi.org>

On Wed, Jul 04, 2012 at 04:24:00AM +0000, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
> 
> Hi folks,
> 
> This series contains patches required to update tcm_vhost <-> virtio-scsi
> connected hosts <-> guests to run on v3.5-rc2 mainline code.  This series is
> available on top of target-pending/auto-next here:
> 
>    git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git tcm_vhost
> 
> This includes the necessary vhost changes from Stefan to to get tcm_vhost
> functioning, along a virtio-scsi LUN scanning change to address a client bug
> with tcm_vhost I ran into..  Also, tcm_vhost driver has been merged into a single
> source + header file that is now living under /drivers/vhost/, along with latest
> tcm_vhost changes from Zhi's tcm_vhost tree.
> 
> Here are a couple of screenshots of the code in action using raw IBLOCK
> backends provided by FusionIO ioDrive Duo:
> 
>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-3.png
>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-4.png
> 
> So the next steps on my end will be converting tcm_vhost to submit backend I/O from
> cmwq context, along with fio benchmark numbers between tcm_vhost/virtio-scsi and
> virtio-scsi-raw using raw IBLOCK iomemory_vsl flash.

OK so this is an RFC, not for merge yet?

> 
> Please have a look vhost + virtio-scsi folks (mst + paolo CC'ed) and let us
> know if you have any concerns.
> 
> Thanks!
> 
> --nab
> Nicholas Bellinger (4):
>   vhost: Add vhost_scsi specific defines
>   tcm_vhost: Initial merge for vhost level target fabric driver
>   virtio-scsi: Add vdrv->scan for post VIRTIO_CONFIG_S_DRIVER_OK LUN
>     scanning
>   virtio-scsi: Set shost->max_id=1 for tcm_vhost WWPNs
> 
> Stefan Hajnoczi (2):
>   vhost: Separate vhost-net features from vhost features
>   vhost: make vhost work queue visible
> 
>  drivers/scsi/virtio_scsi.c |   20 +-
>  drivers/vhost/Kconfig      |    6 +
>  drivers/vhost/Makefile     |    1 +
>  drivers/vhost/net.c        |    4 +-
>  drivers/vhost/tcm_vhost.c  | 1592 ++++++++++++++++++++++++++++++++++++++++++++
>  drivers/vhost/tcm_vhost.h  |   70 ++
>  drivers/vhost/vhost.c      |    5 +-
>  drivers/vhost/vhost.h      |    6 +-
>  drivers/virtio/virtio.c    |    5 +-
>  include/linux/vhost.h      |    9 +
>  include/linux/virtio.h     |    1 +
>  11 files changed, 1708 insertions(+), 11 deletions(-)
>  create mode 100644 drivers/vhost/tcm_vhost.c
>  create mode 100644 drivers/vhost/tcm_vhost.h
> 
> -- 
> 1.7.2.5

^ permalink raw reply

* Re: [PATCH 3/3] virtio-blk: Add bio-based IO path for virtio-blk
From: Paolo Bonzini @ 2012-07-04 14:10 UTC (permalink / raw)
  To: dlaor
  Cc: kvm, Michael S. Tsirkin, linux-kernel, virtualization, Ronen Hod,
	Christoph Hellwig
In-Reply-To: <4FF3017C.4020605@redhat.com>

Il 03/07/2012 16:28, Dor Laor ha scritto:
>>>> Users using a spinning disk still get IO scheduling in the host though.
>>>> What benefit is there in doing it in the guest as well?
>>>
>>> The io scheduler waits for requests to merge and thus batch IOs
>>> together. It's not important w.r.t spinning disks since the host can
>>> do it but it causes much less vmexits which is the key issue for VMs.
>>
>> Does it make sense to use the guest's I/O scheduler at all?
> 
> That's the reason we have a noop io scheduler.

But is performance really better with noop?  We had to revert usage of
QUEUE_FLAG_NONROT in the guests because it caused performance
degradation (commit f8b12e513b953aebf30f8ff7d2de9be7e024dbbe).

The bio-based path is really QUEUE_FLAG_NONROT++, so it should really be
a special case for people who know what they're doing.  (Better would be
to improve QEMU, there's definitely room for 20% improvement...).

Paolo

^ permalink raw reply

* Re: [PATCH 5/6] virtio-scsi: Add vdrv->scan for post VIRTIO_CONFIG_S_DRIVER_OK LUN scanning
From: Paolo Bonzini @ 2012-07-04 14:50 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, Michael S. Tsirkin,
	Zhi Yong Wu, Anthony Liguori, target-devel, linux-scsi, lf-virt,
	Christoph Hellwig
In-Reply-To: <1341375846-27882-6-git-send-email-nab@linux-iscsi.org>

Il 04/07/2012 06:24, Nicholas A. Bellinger ha scritto:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
> 
> This patch changes virtio-scsi to use a new virtio_driver->scan() callback
> so that scsi_scan_host() can be properly invoked once virtio_dev_probe() has
> set add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK) to signal active virtio-ring
> operation, instead of from within virtscsi_probe().
> 
> This fixes a bug where SCSI LUN scanning for both virtio-scsi-raw and
> virtio-scsi/tcm_vhost setups was happening before VIRTIO_CONFIG_S_DRIVER_OK
> had been set, causing VIRTIO_SCSI_S_BAD_TARGET to occur.  This fixes a bug
> with virtio-scsi/tcm_vhost where LUN scan was not detecting LUNs.
> 
> Tested with virtio-scsi-raw + virtio-scsi/tcm_vhost w/ IBLOCK on 3.5-rc2 code.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> Cc: Zhi Yong Wu <wuzhy@cn.ibm.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

Please send this independently.  I think we also want it in stable@vger?

Paolo

> ---
>  drivers/scsi/virtio_scsi.c |   15 ++++++++++++---
>  drivers/virtio/virtio.c    |    5 ++++-
>  include/linux/virtio.h     |    1 +
>  3 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index 1b38431..391b30d 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c
> @@ -481,9 +481,10 @@ static int __devinit virtscsi_probe(struct virtio_device *vdev)
>  	err = scsi_add_host(shost, &vdev->dev);
>  	if (err)
>  		goto scsi_add_host_failed;
> -
> -	scsi_scan_host(shost);
> -
> +	/*
> +	 * scsi_scan_host() happens in virtscsi_scan() via virtio_driver->scan()
> +	 * after VIRTIO_CONFIG_S_DRIVER_OK has been set..
> +	 */
>  	return 0;
>  
>  scsi_add_host_failed:
> @@ -493,6 +494,13 @@ virtscsi_init_failed:
>  	return err;
>  }
>  
> +static void virtscsi_scan(struct virtio_device *vdev)
> +{
> +	struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv;
> +
> +	scsi_scan_host(shost);
> +}
> +
>  static void virtscsi_remove_vqs(struct virtio_device *vdev)
>  {
>  	/* Stop all the virtqueues. */
> @@ -537,6 +545,7 @@ static struct virtio_driver virtio_scsi_driver = {
>  	.driver.owner = THIS_MODULE,
>  	.id_table = id_table,
>  	.probe = virtscsi_probe,
> +	.scan = virtscsi_scan,
>  #ifdef CONFIG_PM
>  	.freeze = virtscsi_freeze,
>  	.restore = virtscsi_restore,
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index f355807..c3b3f7f 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -141,8 +141,11 @@ static int virtio_dev_probe(struct device *_d)
>  	err = drv->probe(dev);
>  	if (err)
>  		add_status(dev, VIRTIO_CONFIG_S_FAILED);
> -	else
> +	else {
>  		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> +		if (drv->scan)
> +			drv->scan(dev);
> +	}
>  
>  	return err;
>  }
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index 8efd28a..a1ba8bb 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -92,6 +92,7 @@ struct virtio_driver {
>  	const unsigned int *feature_table;
>  	unsigned int feature_table_size;
>  	int (*probe)(struct virtio_device *dev);
> +	void (*scan)(struct virtio_device *dev);
>  	void (*remove)(struct virtio_device *dev);
>  	void (*config_changed)(struct virtio_device *dev);
>  #ifdef CONFIG_PM
> 

^ permalink raw reply

* Re: [PATCH 6/6] virtio-scsi: Set shost->max_id=1 for tcm_vhost WWPNs
From: Paolo Bonzini @ 2012-07-04 14:50 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, Michael S. Tsirkin,
	Zhi Yong Wu, Anthony Liguori, target-devel, linux-scsi, lf-virt,
	Christoph Hellwig
In-Reply-To: <1341375846-27882-7-git-send-email-nab@linux-iscsi.org>

Il 04/07/2012 06:24, Nicholas A. Bellinger ha scritto:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
> 
> This is currently required for connecting to tcm_vhost in order to prevent
> the client LUN scan from detecting the same tcm_vhost WWPN on multiple target
> IDs.

But that's what the config field is for... why can't tcm_vhost (or QEMU)
set max_id to 0?

Paolo

> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> Cc: Zhi Yong Wu <wuzhy@cn.ibm.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.de>
> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
> ---
>  drivers/scsi/virtio_scsi.c |    5 ++++-
>  1 files changed, 4 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index 391b30d..8711951 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c
> @@ -475,7 +475,10 @@ static int __devinit virtscsi_probe(struct virtio_device *vdev)
>  	shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
>  	shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
>  	shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1;
> -	shost->max_id = virtscsi_config_get(vdev, max_target) + 1;
> +	/*
> +	 * Currently required for tcm_vhost to function..
> +	 */
> +	shost->max_id = 1;
>  	shost->max_channel = 0;
>  	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
>  	err = scsi_add_host(shost, &vdev->dev);
> 

^ permalink raw reply

* Re: [PATCH 0/6] tcm_vhost/virtio-scsi WIP code for-3.6
From: Paolo Bonzini @ 2012-07-04 14:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, lf-virt, Anthony Liguori,
	target-devel, linux-scsi, Zhi Yong Wu, Christoph Hellwig
In-Reply-To: <20120704140259.GB26485@redhat.com>

Il 04/07/2012 16:02, Michael S. Tsirkin ha scritto:
> On Wed, Jul 04, 2012 at 04:24:00AM +0000, Nicholas A. Bellinger wrote:
>> From: Nicholas Bellinger <nab@linux-iscsi.org>
>>
>> Hi folks,
>>
>> This series contains patches required to update tcm_vhost <-> virtio-scsi
>> connected hosts <-> guests to run on v3.5-rc2 mainline code.  This series is
>> available on top of target-pending/auto-next here:
>>
>>    git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git tcm_vhost
>>
>> This includes the necessary vhost changes from Stefan to to get tcm_vhost
>> functioning, along a virtio-scsi LUN scanning change to address a client bug
>> with tcm_vhost I ran into..  Also, tcm_vhost driver has been merged into a single
>> source + header file that is now living under /drivers/vhost/, along with latest
>> tcm_vhost changes from Zhi's tcm_vhost tree.
>>
>> Here are a couple of screenshots of the code in action using raw IBLOCK
>> backends provided by FusionIO ioDrive Duo:
>>
>>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-3.png
>>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-4.png
>>
>> So the next steps on my end will be converting tcm_vhost to submit backend I/O from
>> cmwq context, along with fio benchmark numbers between tcm_vhost/virtio-scsi and
>> virtio-scsi-raw using raw IBLOCK iomemory_vsl flash.
> 
> OK so this is an RFC, not for merge yet?

Patch 6 definitely looks RFCish, but patch 5 should go in anyway.

Paolo

^ permalink raw reply

* Re: [PATCH 0/6] tcm_vhost/virtio-scsi WIP code for-3.6
From: Michael S. Tsirkin @ 2012-07-04 15:05 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, lf-virt, Anthony Liguori,
	target-devel, linux-scsi, Zhi Yong Wu, Christoph Hellwig
In-Reply-To: <4FF45890.6000205@redhat.com>

On Wed, Jul 04, 2012 at 04:52:00PM +0200, Paolo Bonzini wrote:
> Il 04/07/2012 16:02, Michael S. Tsirkin ha scritto:
> > On Wed, Jul 04, 2012 at 04:24:00AM +0000, Nicholas A. Bellinger wrote:
> >> From: Nicholas Bellinger <nab@linux-iscsi.org>
> >>
> >> Hi folks,
> >>
> >> This series contains patches required to update tcm_vhost <-> virtio-scsi
> >> connected hosts <-> guests to run on v3.5-rc2 mainline code.  This series is
> >> available on top of target-pending/auto-next here:
> >>
> >>    git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git tcm_vhost
> >>
> >> This includes the necessary vhost changes from Stefan to to get tcm_vhost
> >> functioning, along a virtio-scsi LUN scanning change to address a client bug
> >> with tcm_vhost I ran into..  Also, tcm_vhost driver has been merged into a single
> >> source + header file that is now living under /drivers/vhost/, along with latest
> >> tcm_vhost changes from Zhi's tcm_vhost tree.
> >>
> >> Here are a couple of screenshots of the code in action using raw IBLOCK
> >> backends provided by FusionIO ioDrive Duo:
> >>
> >>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-3.png
> >>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-4.png
> >>
> >> So the next steps on my end will be converting tcm_vhost to submit backend I/O from
> >> cmwq context, along with fio benchmark numbers between tcm_vhost/virtio-scsi and
> >> virtio-scsi-raw using raw IBLOCK iomemory_vsl flash.
> > 
> > OK so this is an RFC, not for merge yet?
> 
> Patch 6 definitely looks RFCish, but patch 5 should go in anyway.
> 
> Paolo

I was talking about 4/6 first of all.
Anyway, it's best to split, not to mix RFCs and fixes.

-- 
MST

^ permalink raw reply

* Re: [PATCH] virtio-blk: allow toggling host cache between writeback and writethrough
From: Michael S. Tsirkin @ 2012-07-04 15:42 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <1341321577-24435-1-git-send-email-pbonzini@redhat.com>

On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> which exposes the cache mode in the configuration space and lets the
> driver modify it.  The cache mode is exposed via sysfs.
> 
> Even if the host does not support the new feature, the cache mode is
> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

I note this has been applied but I think the userspace
API is a bit painful to use. Let's fix it before
it gets set in stone?

Some more minor nits below.
Also Cc lists from MAINTAINERS.

> ---
>  drivers/block/virtio_blk.c |   90 ++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/virtio_blk.h |    5 ++-
>  2 files changed, 91 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 693187d..5602505 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -397,6 +397,83 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
>  	return 0;
>  }
>  
> +static int virtblk_get_cache_mode(struct virtio_device *vdev)

Why are you converting u8 to int here?
All users convert it back ...
Also, this is not really "get cache mode" it's more of a
"writeback_enabled".


> +{
> +	u8 writeback;
> +	int err;
> +
> +	err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
> +				offsetof(struct virtio_blk_config, wce),
> +				&writeback);
> +	if (err)
> +		writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
> +
> +	return writeback;
> +}
> +
> +static void virtblk_update_cache_mode(struct virtio_device *vdev)
> +{
> +	u8 writeback = virtblk_get_cache_mode(vdev);
> +	struct virtio_blk *vblk = vdev->priv;
> +
> +	if (writeback)
> +		blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
> +	else
> +		blk_queue_flush(vblk->disk->queue, 0);
> +
> +       revalidate_disk(vblk->disk);
> +}
> +
> +static const char *virtblk_cache_types[] = {
> +	"write through", "write back"
> +};
> +

I wonder whether something that lacks space would have been better,
especially for show: shells might get confused and split
a string at a space. How about we change it to writethrough,
writeback before it's too late? It's part of a userspace API
after all, and you see to prefer writeback in one word in your own
code so let's not inflict pain on others :)

Also, would be nice to make it discoverable what the legal
values are. Another attribute valid_cache_types with all values
space separated?


> +static ssize_t
> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
> +			 const char *buf, size_t count)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +	struct virtio_blk *vblk = disk->private_data;
> +	struct virtio_device *vdev = vblk->vdev;
> +	int i;
> +	u8 writeback;
> +
> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
> +			break;
> +
> +	if (i < 0)
> +		return -EINVAL;
> +
> +	writeback = i;
> +	vdev->config->set(vdev,
> +			  offsetof(struct virtio_blk_config, wce),
> +			  &writeback, sizeof(writeback));
> +
> +	virtblk_update_cache_mode(vdev);
> +	return count;
> +}
> +
> +static ssize_t
> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
> +			 char *buf)
> +{
> +	struct gendisk *disk = dev_to_disk(dev);
> +	struct virtio_blk *vblk = disk->private_data;
> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
> +
> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);

Why 40? Why snprintf? A plain sprintf won't do?

> +}
> +
> +static const struct device_attribute dev_attr_cache_type_ro =
> +	__ATTR(cache_type, S_IRUGO,
> +	       virtblk_cache_type_show, NULL);
> +static const struct device_attribute dev_attr_cache_type_rw =
> +	__ATTR(cache_type, S_IRUGO|S_IWUSR,
> +	       virtblk_cache_type_show, virtblk_cache_type_store);
> +
>  static int __devinit virtblk_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_blk *vblk;
> @@ -474,8 +549,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	vblk->index = index;
>  
>  	/* configure queue flush support */
> -	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
> -		blk_queue_flush(q, REQ_FLUSH);
> +	virtblk_update_cache_mode(vdev);
>  
>  	/* If disk is read-only in the host, the guest should obey */
>  	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
> @@ -553,6 +627,14 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
>  	if (err)
>  		goto out_del_disk;
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
> +		err = device_create_file(disk_to_dev(vblk->disk),
> +					 &dev_attr_cache_type_rw);
> +	else
> +		err = device_create_file(disk_to_dev(vblk->disk),
> +					 &dev_attr_cache_type_ro);
> +	if (err)
> +		goto out_del_disk;
>  	return 0;
>  
>  out_del_disk:
> @@ -655,7 +737,7 @@ static const struct virtio_device_id id_table[] = {
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
> -	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
> +	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
>  };
>  
>  /*
> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
> index e0edb40..18a1027 100644
> --- a/include/linux/virtio_blk.h
> +++ b/include/linux/virtio_blk.h
> @@ -37,8 +37,9 @@
>  #define VIRTIO_BLK_F_RO		5	/* Disk is read-only */
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
> -#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
> +#define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
> +#define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
>  
>  #define VIRTIO_BLK_ID_BYTES	20	/* ID string length */
>  
> @@ -69,6 +70,8 @@ struct virtio_blk_config {
>  	/* optimal sustained I/O size in logical blocks. */
>  	__u32 opt_io_size;
>  
> +        /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
> +        __u8 wce;
>  } __attribute__((packed));
>  
>  /*
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* Re: [PATCH 1/2] virtio-blk spec: document topology info
From: Michael S. Tsirkin @ 2012-07-04 15:49 UTC (permalink / raw)
  To: Rusty Russell; +Cc: Paolo Bonzini, virtualization, kvm
In-Reply-To: <87d34cvzrv.fsf@rustcorp.com.au>

On Wed, Jul 04, 2012 at 03:22:36PM +0930, Rusty Russell wrote:
> On Tue,  3 Jul 2012 15:16:51 +0200, Paolo Bonzini <pbonzini@redhat.com> wrote:
> > Current QEMU and Linux drivers can export queue parameters via the
> > virtio-blk configuration space.  Document this, since the next patch
> > will have to add another configuration field after these.
> > 
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Applied, thanks for the grammar fix, too!
> 
> > +        u8 physical_block_exp;
> > +        u8 alignment_offset;
> > +        u16 min_io_size;
> > +        u32 opt_io_size;
> 
> These aren't entirely self-evident though :(
> 
> I've put the latest on github (your changes included!), so please patch
> against that:
> 
> https://github.com/rustyrussell/virtio-spec
> 
> Thanks,
> Rusty.

Cool!
Could you please add tags that refer to various point releases
v0.8.9 to 0.9.5?

> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] virtio-blk: allow toggling host cache between writeback and writethrough
From: Paolo Bonzini @ 2012-07-04 15:54 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <20120704154220.GB27064@redhat.com>

Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
> On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
>> which exposes the cache mode in the configuration space and lets the
>> driver modify it.  The cache mode is exposed via sysfs.
>>
>> Even if the host does not support the new feature, the cache mode is
>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> I note this has been applied but I think the userspace
> API is a bit painful to use. Let's fix it before
> it gets set in stone?

I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
would totally agree with you.

>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> 
> Why are you converting u8 to int here?

The fact that it is a u8 is really an internal detail.  Perhaps the bug
is using u8 in the callers.

>> +static const char *virtblk_cache_types[] = {
>> +	"write through", "write back"
>> +};
>> +
> 
> I wonder whether something that lacks space would have been better,
> especially for show: shells might get confused and split
> a string at a space. How about we change it to writethrough,
> writeback before it's too late? It's part of a userspace API
> after all, and you see to prefer writeback in one word in your own
> code so let's not inflict pain on others :)
> 
> Also, would be nice to make it discoverable what the legal
> values are. Another attribute valid_cache_types with all values
> space separated?

We probably should add such an attribute to SCSI disks too... Even with
the spaces we could make the values \n-separated.

>> +static ssize_t
>> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
>> +			 const char *buf, size_t count)
>> +{
>> +	struct gendisk *disk = dev_to_disk(dev);
>> +	struct virtio_blk *vblk = disk->private_data;
>> +	struct virtio_device *vdev = vblk->vdev;
>> +	int i;
>> +	u8 writeback;
>> +
>> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
>> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
>> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
>> +			break;
>> +
>> +	if (i < 0)
>> +		return -EINVAL;
>> +
>> +	writeback = i;
>> +	vdev->config->set(vdev,
>> +			  offsetof(struct virtio_blk_config, wce),
>> +			  &writeback, sizeof(writeback));
>> +
>> +	virtblk_update_cache_mode(vdev);
>> +	return count;
>> +}
>> +
>> +static ssize_t
>> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
>> +			 char *buf)
>> +{
>> +	struct gendisk *disk = dev_to_disk(dev);
>> +	struct virtio_blk *vblk = disk->private_data;
>> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
>> +
>> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
>> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
> 
> Why 40? Why snprintf? A plain sprintf won't do?

I plead guilty of cut-and-paste from drivers/scsi/sd.c. :)

Paolo

^ permalink raw reply

* Re: [PATCH] virtio-blk: allow toggling host cache between writeback and writethrough
From: Michael S. Tsirkin @ 2012-07-04 16:02 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <4FF46728.7030302@redhat.com>

On Wed, Jul 04, 2012 at 05:54:16PM +0200, Paolo Bonzini wrote:
> Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
> > On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> >> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> >> which exposes the cache mode in the configuration space and lets the
> >> driver modify it.  The cache mode is exposed via sysfs.
> >>
> >> Even if the host does not support the new feature, the cache mode is
> >> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> >>
> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > 
> > I note this has been applied but I think the userspace
> > API is a bit painful to use. Let's fix it before
> > it gets set in stone?
> 
> I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
> would totally agree with you.

Hmm. Want to try fixing scsi? Need to be compatible but it could
maybe ignore spaces.

> >> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> > 
> > Why are you converting u8 to int here?
> 
> The fact that it is a u8 is really an internal detail.  Perhaps the bug
> is using u8 in the callers.

Make it bool then?

You are using u8 in the config. So you could get any value
besides 0 and 1, and you interpret that as 1.
Is 1 always a safe value? If not maybe it's better to set
to a safe value if it is not 0 or 1, that is we don't know how to interpret it.

> >> +static const char *virtblk_cache_types[] = {
> >> +	"write through", "write back"
> >> +};
> >> +
> > 
> > I wonder whether something that lacks space would have been better,
> > especially for show: shells might get confused and split
> > a string at a space. How about we change it to writethrough,
> > writeback before it's too late? It's part of a userspace API
> > after all, and you see to prefer writeback in one word in your own
> > code so let's not inflict pain on others :)
> > 
> > Also, would be nice to make it discoverable what the legal
> > values are. Another attribute valid_cache_types with all values
> > space separated?
> 
> We probably should add such an attribute to SCSI disks too... Even with
> the spaces we could make the values \n-separated.
> 
> >> +static ssize_t
> >> +virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
> >> +			 const char *buf, size_t count)
> >> +{
> >> +	struct gendisk *disk = dev_to_disk(dev);
> >> +	struct virtio_blk *vblk = disk->private_data;
> >> +	struct virtio_device *vdev = vblk->vdev;
> >> +	int i;
> >> +	u8 writeback;
> >> +
> >> +	BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
> >> +	for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
> >> +		if (sysfs_streq(buf, virtblk_cache_types[i]))
> >> +			break;
> >> +
> >> +	if (i < 0)
> >> +		return -EINVAL;
> >> +
> >> +	writeback = i;
> >> +	vdev->config->set(vdev,
> >> +			  offsetof(struct virtio_blk_config, wce),
> >> +			  &writeback, sizeof(writeback));
> >> +
> >> +	virtblk_update_cache_mode(vdev);
> >> +	return count;
> >> +}
> >> +
> >> +static ssize_t
> >> +virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
> >> +			 char *buf)
> >> +{
> >> +	struct gendisk *disk = dev_to_disk(dev);
> >> +	struct virtio_blk *vblk = disk->private_data;
> >> +	u8 writeback = virtblk_get_cache_mode(vblk->vdev);
> >> +
> >> +	BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
> >> +	return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
> > 
> > Why 40? Why snprintf? A plain sprintf won't do?
> 
> I plead guilty of cut-and-paste from drivers/scsi/sd.c. :)
> 
> Paolo

^ permalink raw reply

* Re: [PATCH] virtio-blk: allow toggling host cache between writeback and writethrough
From: Paolo Bonzini @ 2012-07-04 16:08 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <20120704160258.GB27271@redhat.com>

Il 04/07/2012 18:02, Michael S. Tsirkin ha scritto:
> On Wed, Jul 04, 2012 at 05:54:16PM +0200, Paolo Bonzini wrote:
>> Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
>>> On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
>>>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
>>>> which exposes the cache mode in the configuration space and lets the
>>>> driver modify it.  The cache mode is exposed via sysfs.
>>>>
>>>> Even if the host does not support the new feature, the cache mode is
>>>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
>>>>
>>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>>
>>> I note this has been applied but I think the userspace
>>> API is a bit painful to use. Let's fix it before
>>> it gets set in stone?
>>
>> I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
>> would totally agree with you.
> 
> Hmm. Want to try fixing scsi? Need to be compatible but it could
> maybe ignore spaces.

Honestly I'm not sure it's really worthwhile...  And you also have the
same problem when printing.  You cannot remove the spaces, because
clients will look for the "old" string, with the spaces.

>>>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
>>>
>>> Why are you converting u8 to int here?
>>
>> The fact that it is a u8 is really an internal detail.  Perhaps the bug
>> is using u8 in the callers.
> 
> Make it bool then?
> 
> You are using u8 in the config. So you could get any value
> besides 0 and 1, and you interpret that as 1.
> Is 1 always a safe value? If not maybe it's better to set
> to a safe value if it is not 0 or 1, that is we don't know how to interpret it.

That would be a host bug; the spec only gives meaning to 0 and 1.  In
any case, "have a cache" means "needs to flush", so it's always safe.
If you interpret another value as 0, you risk omitting flushes.

Paolo

^ permalink raw reply

* Re: [PATCH v3 2/4] virtio_balloon: handle concurrent accesses to virtio_balloon struct elements
From: Rafael Aquini @ 2012-07-04 19:51 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Rik van Riel, Michael S. Tsirkin, Konrad Rzeszutek Wilk,
	linux-kernel, virtualization, linux-mm, Andi Kleen, Minchan Kim,
	Andrew Morton
In-Reply-To: <87vci4uj34.fsf@rustcorp.com.au>

Howdy Rusty,

First and foremost, thank you very much for taking the time to go through this
proposal and provide me with such valuable feedback. I really appreciate that.

On Wed, Jul 04, 2012 at 04:08:23PM +0930, Rusty Russell wrote:
> On Tue,  3 Jul 2012 20:48:50 -0300, Rafael Aquini <aquini@redhat.com> wrote:
> > This patch introduces access sychronization to critical elements of struct
> > virtio_balloon, in order to allow the thread concurrency compaction/migration
> > bits might ended up imposing to the balloon driver on several situations.
> > 
> > Signed-off-by: Rafael Aquini <aquini@redhat.com>
> 
> That's pretty vague, and it's almost impossible to audit this.
> 

I'll definetely attempt to improve this one.
Despite it looks concise to me as it states the "whats" and the "whys", any clue
on how to improve it and turn it into something that would make a lot more sense
is very welcome and appreciated. I'm probably failing miserably to express the
whole idea because I'm a terrible writer, no doubts about it. :)


> > +/* Protection for concurrent accesses to balloon virtqueues and vb->acked */
> > +DEFINE_MUTEX(vb_queue_completion);
> >  
> > +static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq,
> > +		      struct scatterlist *sg)
> > +{
> > +	mutex_lock(&vb_queue_completion);
> >  	init_completion(&vb->acked);
> >  
> >  	/* We should always be able to add one buffer to an empty queue. */
> > -	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
> > +	if (virtqueue_add_buf(vq, sg, 1, 0, vb, GFP_KERNEL) < 0)
> >  		BUG();
> >  	virtqueue_kick(vq);
> >  
> >  	/* When host has read buffer, this completes via balloon_ack */
> >  	wait_for_completion(&vb->acked);
> > +	mutex_unlock(&vb_queue_completion);
> >  }
> 
> OK, this lock is superceded by Michael's patch, and AFAICT is not due to
> any requirement introduced by these patches.
> 

Unfortunately, I'm compelled to disagree with you on this one.

Because tell_host() can be called concurrently, this lock is placed to avoid two
issues, basically:
 a) completion var vb->acked corruption (overriden upon several
    initializations);
 b) virtqueue operations (inflate/deflate) being called simultaneously;

Even though Michael's patch addresses the case (a), as far as this patch series
is concerned, it shows no way to prevent case (b), if two or more threads are
calling tell_host() simultaneously.


> >  static void set_page_pfns(u32 pfns[], struct page *page)
> > @@ -126,9 +132,12 @@ static void set_page_pfns(u32 pfns[], struct page *page)
> >  
> >  static void fill_balloon(struct virtio_balloon *vb, size_t num)
> >  {
> > +	struct scatterlist sg;
> > +	int alloc_failed = 0;
> >  	/* We can only do one array worth at a time. */
> >  	num = min(num, ARRAY_SIZE(vb->pfns));
> >  
> > +	spin_lock(&vb->pfn_list_lock);
> >  	for (vb->num_pfns = 0; vb->num_pfns < num;
> >  	     vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
> >  		struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY |
> > @@ -138,8 +147,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> >  				dev_printk(KERN_INFO, &vb->vdev->dev,
> >  					   "Out of puff! Can't get %zu pages\n",
> >  					   num);
> > -			/* Sleep for at least 1/5 of a second before retry. */
> > -			msleep(200);
> > +			alloc_failed = 1;
> >  			break;
> >  		}
> >  		set_page_pfns(vb->pfns + vb->num_pfns, page);
> > @@ -149,10 +157,19 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num)
> >  	}
> >  
> >  	/* Didn't get any?  Oh well. */
> > -	if (vb->num_pfns == 0)
> > +	if (vb->num_pfns == 0) {
> > +		spin_unlock(&vb->pfn_list_lock);
> >  		return;
> > +	}
> > +
> > +	sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
> > +	spin_unlock(&vb->pfn_list_lock);
> >  
> > -	tell_host(vb, vb->inflate_vq);
> > +	/* alloc_page failed, sleep for at least 1/5 of a sec before retry. */
> > +	if (alloc_failed)
> > +		msleep(200);
> > +
> > +	tell_host(vb, vb->inflate_vq, &sg);
> 
> So, we drop the lock which procects vp->pfns[] and vb->num_pfns, then
> use it in tell_host?  Surely it could be corrupted between there.
> 

The lock is dropped following these conditions:
 a) virtqueue_add_buf() works based on a scatterlist array (buf);
 b) vp->pfns[] and vb->num_pfns are not anymore being directly accessed/updated
    at tell_host() level;
 c) *vb ptr address is only used as a token to identify the buffer at
    virtqueue_add_buf() and no particular struct's element is updated/accessed;
 d) we are not supposed to block/sleep while holding the spinlock;
 
Changes made to vp->pfns[] and vb->num_pfns after the spinlock is released
doesn't matter for a particular thread anymore since the scatterlist setup is
now moved outside tell_host() and it's being made within the locked session, and
no one else down that codepath directly uses vp->pfns[] or vb->num_pfns to make
its decisions.
Unfortunately, I'm not able to see the same corruption window you've spotted. Am
I missing something here?


Cheers!
Rafael

> > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> > index bfbc15c..d47c5c2 100644
> > --- a/drivers/virtio/virtio_balloon.c
> > +++ b/drivers/virtio/virtio_balloon.c
> > @@ -51,6 +51,10 @@ struct virtio_balloon
> >  
> >  	/* Number of balloon pages we've told the Host we're not using. */
> >  	unsigned int num_pages;
> > +
> > +	/* Protect 'pages', 'pfns' & 'num_pnfs' against concurrent updates */
> > +	spinlock_t pfn_list_lock;
> > +
> >  	/*
> >  	 * The pages we've told the Host we're not using.
> >  	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
> 
> You might be better of taking num_pfns and pfns[] out of struct
> virtio_balloon, and putting them on the stack (maybe 64, not 256).
> 
> Cheers,
> Rusty.

^ permalink raw reply

* Re: [PATCH] virtio-blk: allow toggling host cache between writeback and writethrough
From: Michael S. Tsirkin @ 2012-07-04 21:30 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: linux-kernel, kvm, virtualization
In-Reply-To: <4FF46A92.3020009@redhat.com>

On Wed, Jul 04, 2012 at 06:08:50PM +0200, Paolo Bonzini wrote:
> Il 04/07/2012 18:02, Michael S. Tsirkin ha scritto:
> > On Wed, Jul 04, 2012 at 05:54:16PM +0200, Paolo Bonzini wrote:
> >> Il 04/07/2012 17:42, Michael S. Tsirkin ha scritto:
> >>> On Tue, Jul 03, 2012 at 03:19:37PM +0200, Paolo Bonzini wrote:
> >>>> This patch adds support for the new VIRTIO_BLK_F_CONFIG_WCE feature,
> >>>> which exposes the cache mode in the configuration space and lets the
> >>>> driver modify it.  The cache mode is exposed via sysfs.
> >>>>
> >>>> Even if the host does not support the new feature, the cache mode is
> >>>> visible (thanks to the existing VIRTIO_BLK_F_WCE), but not modifiable.
> >>>>
> >>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> >>>
> >>> I note this has been applied but I think the userspace
> >>> API is a bit painful to use. Let's fix it before
> >>> it gets set in stone?
> >>
> >> I'm trying to mimic the existing userspace API for SCSI disks.  FWIW I
> >> would totally agree with you.
> > 
> > Hmm. Want to try fixing scsi? Need to be compatible but it could
> > maybe ignore spaces.
> 
> Honestly I'm not sure it's really worthwhile...  And you also have the
> same problem when printing.  You cannot remove the spaces, because
> clients will look for the "old" string, with the spaces.

Right. Oh well.

> >>>> +static int virtblk_get_cache_mode(struct virtio_device *vdev)
> >>>
> >>> Why are you converting u8 to int here?
> >>
> >> The fact that it is a u8 is really an internal detail.  Perhaps the bug
> >> is using u8 in the callers.
> > 
> > Make it bool then?
> > 
> > You are using u8 in the config. So you could get any value
> > besides 0 and 1, and you interpret that as 1.
> > Is 1 always a safe value? If not maybe it's better to set
> > to a safe value if it is not 0 or 1, that is we don't know how to interpret it.
> 
> That would be a host bug; the spec only gives meaning to 0 and 1.

Yes but if the other side does not validate values implementations
*will* have bugs. Why not declare bits 1-7 reserved?
Then we can reuse other bits later.

>  In
> any case, "have a cache" means "needs to flush", so it's always safe.
> If you interpret another value as 0, you risk omitting flushes.
> 
> Paolo

OK, that's good.

-- 
MST

^ permalink raw reply

* Re: [PATCH 0/6] tcm_vhost/virtio-scsi WIP code for-3.6
From: Anthony Liguori @ 2012-07-04 22:12 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jens Axboe, linux-scsi, kvm-devel, lf-virt, Anthony Liguori,
	target-devel, Paolo Bonzini, Zhi Yong Wu, Christoph Hellwig,
	Stefan Hajnoczi
In-Reply-To: <20120704150557.GA26951@redhat.com>

On 07/04/2012 10:05 AM, Michael S. Tsirkin wrote:
> On Wed, Jul 04, 2012 at 04:52:00PM +0200, Paolo Bonzini wrote:
>> Il 04/07/2012 16:02, Michael S. Tsirkin ha scritto:
>>> On Wed, Jul 04, 2012 at 04:24:00AM +0000, Nicholas A. Bellinger wrote:
>>>> From: Nicholas Bellinger<nab@linux-iscsi.org>
>>>>
>>>> Hi folks,
>>>>
>>>> This series contains patches required to update tcm_vhost<->  virtio-scsi
>>>> connected hosts<->  guests to run on v3.5-rc2 mainline code.  This series is
>>>> available on top of target-pending/auto-next here:
>>>>
>>>>     git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git tcm_vhost
>>>>
>>>> This includes the necessary vhost changes from Stefan to to get tcm_vhost
>>>> functioning, along a virtio-scsi LUN scanning change to address a client bug
>>>> with tcm_vhost I ran into..  Also, tcm_vhost driver has been merged into a single
>>>> source + header file that is now living under /drivers/vhost/, along with latest
>>>> tcm_vhost changes from Zhi's tcm_vhost tree.
>>>>
>>>> Here are a couple of screenshots of the code in action using raw IBLOCK
>>>> backends provided by FusionIO ioDrive Duo:
>>>>
>>>>     http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-3.png
>>>>     http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-4.png
>>>>
>>>> So the next steps on my end will be converting tcm_vhost to submit backend I/O from
>>>> cmwq context, along with fio benchmark numbers between tcm_vhost/virtio-scsi and
>>>> virtio-scsi-raw using raw IBLOCK iomemory_vsl flash.
>>>
>>> OK so this is an RFC, not for merge yet?
>>
>> Patch 6 definitely looks RFCish, but patch 5 should go in anyway.
>>
>> Paolo
>
> I was talking about 4/6 first of all.
> Anyway, it's best to split, not to mix RFCs and fixes.

What is the use-case that we're targeting for this?

I certainly think it's fine to merge this into the kernel...  maybe something 
will use it.  But I'm pretty opposed to taking support for this into QEMU.  It's 
going to create more problems than it solves specifically because I have no idea 
what problem it actually solves.

We cannot avoid doing better SCSI emulation in QEMU.  That cannot be a long term 
strategy on our part and vhost-scsi isn't going to solve that problem for us.

Regards,

Anthony Liguori

^ permalink raw reply

* Re: [PATCH 0/6] tcm_vhost/virtio-scsi WIP code for-3.6
From: Nicholas A. Bellinger @ 2012-07-05  1:52 UTC (permalink / raw)
  To: Anthony Liguori
  Cc: Jens Axboe, linux-scsi, kvm-devel, Michael S. Tsirkin, lf-virt,
	Anthony Liguori, target-devel, Paolo Bonzini, Zhi Yong Wu,
	Christoph Hellwig, Stefan Hajnoczi
In-Reply-To: <4FF4BFBD.2080000@us.ibm.com>

Hi Anthony & Co,

On Wed, 2012-07-04 at 17:12 -0500, Anthony Liguori wrote:
> On 07/04/2012 10:05 AM, Michael S. Tsirkin wrote:
> > On Wed, Jul 04, 2012 at 04:52:00PM +0200, Paolo Bonzini wrote:
> >> Il 04/07/2012 16:02, Michael S. Tsirkin ha scritto:
> >>> On Wed, Jul 04, 2012 at 04:24:00AM +0000, Nicholas A. Bellinger wrote:
> >>>> From: Nicholas Bellinger<nab@linux-iscsi.org>
> >>>>
> >>>> Hi folks,
> >>>>
> >>>> This series contains patches required to update tcm_vhost<->  virtio-scsi
> >>>> connected hosts<->  guests to run on v3.5-rc2 mainline code.  This series is
> >>>> available on top of target-pending/auto-next here:
> >>>>
> >>>>     git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git tcm_vhost
> >>>>
> >>>> This includes the necessary vhost changes from Stefan to to get tcm_vhost
> >>>> functioning, along a virtio-scsi LUN scanning change to address a client bug
> >>>> with tcm_vhost I ran into..  Also, tcm_vhost driver has been merged into a single
> >>>> source + header file that is now living under /drivers/vhost/, along with latest
> >>>> tcm_vhost changes from Zhi's tcm_vhost tree.
> >>>>
> >>>> Here are a couple of screenshots of the code in action using raw IBLOCK
> >>>> backends provided by FusionIO ioDrive Duo:
> >>>>
> >>>>     http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-3.png
> >>>>     http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-4.png
> >>>>
> >>>> So the next steps on my end will be converting tcm_vhost to submit backend I/O from
> >>>> cmwq context, along with fio benchmark numbers between tcm_vhost/virtio-scsi and
> >>>> virtio-scsi-raw using raw IBLOCK iomemory_vsl flash.
> >>>
> >>> OK so this is an RFC, not for merge yet?
> >>
> >> Patch 6 definitely looks RFCish, but patch 5 should go in anyway.
> >>
> >> Paolo
> >
> > I was talking about 4/6 first of all.
> > Anyway, it's best to split, not to mix RFCs and fixes.
> 
> What is the use-case that we're targeting for this?
> 

The first use case is high performance small block random IO access into
KVM guest from IBLOCK/FILEIO + pSCSI passthrough backends.  (see below)

The second use case is shared storage access across multiple KVM guests
using TCM level SPC-3 persistent reservations + ALUA multipath logic.

The third use case is future DIF support within virtio-scsi supported
guests that we connect directly to tcm_vhost.

> I certainly think it's fine to merge this into the kernel...  maybe something 
> will use it.  But I'm pretty opposed to taking support for this into QEMU.  It's 
> going to create more problems than it solves specifically because I have no idea 
> what problem it actually solves.
> 

To give an idea of how things are looking on the performance side, here
some initial numbers for small block (4k) mixed random IOPs using the
following fio test setup:

[randrw]
rw=randrw
rwmixwrite=25
rwmixread=75
size=131072m
ioengine=libaio
direct=1
iodepth=64
blocksize=4k
filename=/dev/sdb

The backend is a single iomemory_vsl (FusionIO) raw flash block_device
using IBLOCK w/ emulate_write_cache=1 set.  Also note the noop scheduler
has been set with virtio-scsi LUNs.  Here are the QEMU cli opts for both
cases:

./x86_64-softmmu/qemu-system-x86_64 -enable-kvm -smp 2 -m 2048 -serial
file:/tmp/vhost-serial.txt -hda debian_squeeze_amd64_standard-old.qcow2
-vhost-scsi id=vhost-scsi0,wwpn=naa.600140579ad21088,tpgt=1 -device
virtio-scsi-pci,vhost-scsi=vhost-scsi0,event_idx=off

./x86_64-softmmu/qemu-system-x86_64 -enable-kvm -smp 4 -m 2048 -serial
file:/tmp/vhost-serial.txt -hda debian_squeeze_amd64_standard-old.qcow2
-drive file=/dev/fioa,format=raw,if=none,id=sdb,cache=none,aio=native
-device virtio-scsi-pci,id=mcbus -device scsi-disk,drive=sdb


fio randrw workload | virtio-scsi-raw | virtio-scsi+tcm_vhost | bare-metal raw block
------------------------------------------------------------------------------------
25 Write / 75 Read  |      ~15K       |         ~45K          |         ~70K
75 Write / 25 Read  |      ~20K       |         ~55K          |         ~60K


In the first case, virtio-scsi+tcm_vhost is out performing by 3x
compared to virtio-scsi-raw using QEMU SCSI emulation with the same raw
flash backend device.  For the second case heavier WRITE case, tcm_vhost
is nearing full bare-metal utilization (~55K vs. ~60K).

Also converting tcm_vhost to use proper cmwq process context I/O
submission will help to get even closer to bare metal speeds for both
work-loads.

> We cannot avoid doing better SCSI emulation in QEMU.  That cannot be a long term 
> strategy on our part and vhost-scsi isn't going to solve that problem for us.
> 

Yes, QEMU needs a sane level of host OS independent functional SCSI
emulation, I don't think that is the interesting point up for debate
here..  ;)

I think performance wise it's now pretty clear that vhost is
outperforming QEMU block with virtio-scsi for intestive small block
randrw workloads.  When connected to raw block flash backends where we
avoid the SCSI LLD bottleneck for small block random I/O on the KVM host
all-together, the difference between the two case is even larger based
upon these initial benchmarks.

Thanks for your comments!

--nab

^ permalink raw reply

* Re: [PATCH 0/6] tcm_vhost/virtio-scsi WIP code for-3.6
From: Nicholas A. Bellinger @ 2012-07-05  2:01 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, lf-virt, Anthony Liguori,
	target-devel, linux-scsi, Paolo Bonzini, Zhi Yong Wu,
	Christoph Hellwig
In-Reply-To: <20120704150557.GA26951@redhat.com>

On Wed, 2012-07-04 at 18:05 +0300, Michael S. Tsirkin wrote:
> On Wed, Jul 04, 2012 at 04:52:00PM +0200, Paolo Bonzini wrote:
> > Il 04/07/2012 16:02, Michael S. Tsirkin ha scritto:
> > > On Wed, Jul 04, 2012 at 04:24:00AM +0000, Nicholas A. Bellinger wrote:
> > >> From: Nicholas Bellinger <nab@linux-iscsi.org>
> > >>
> > >> Hi folks,
> > >>
> > >> This series contains patches required to update tcm_vhost <-> virtio-scsi
> > >> connected hosts <-> guests to run on v3.5-rc2 mainline code.  This series is
> > >> available on top of target-pending/auto-next here:
> > >>
> > >>    git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git tcm_vhost
> > >>
> > >> This includes the necessary vhost changes from Stefan to to get tcm_vhost
> > >> functioning, along a virtio-scsi LUN scanning change to address a client bug
> > >> with tcm_vhost I ran into..  Also, tcm_vhost driver has been merged into a single
> > >> source + header file that is now living under /drivers/vhost/, along with latest
> > >> tcm_vhost changes from Zhi's tcm_vhost tree.
> > >>
> > >> Here are a couple of screenshots of the code in action using raw IBLOCK
> > >> backends provided by FusionIO ioDrive Duo:
> > >>
> > >>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-3.png
> > >>    http://linux-iscsi.org/images/Virtio-scsi-tcm-vhost-3.5-rc2-4.png
> > >>
> > >> So the next steps on my end will be converting tcm_vhost to submit backend I/O from
> > >> cmwq context, along with fio benchmark numbers between tcm_vhost/virtio-scsi and
> > >> virtio-scsi-raw using raw IBLOCK iomemory_vsl flash.
> > > 
> > > OK so this is an RFC, not for merge yet?
> > 
> > Patch 6 definitely looks RFCish, but patch 5 should go in anyway.
> > 
> > Paolo
> 
> I was talking about 4/6 first of all.

So yeah, this code is still considered RFC at this point for-3.6, but
I'd like to get this into target-pending/for-next in next week for more
feedback and start collecting signoffs for the necessary pieces that
effect existing vhost code.

By that time the cmwq conversion of tcm_vhost should be in place as
well..

> Anyway, it's best to split, not to mix RFCs and fixes.
> 

<nod>, I'll send patch #5 separately to linux-scsi -> James and CC
stable following Paolo's request.

Thanks!

--nab

^ permalink raw reply

* Re: [PATCH 6/6] virtio-scsi: Set shost->max_id=1 for tcm_vhost WWPNs
From: Nicholas A. Bellinger @ 2012-07-05  2:05 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, Michael S. Tsirkin,
	Zhi Yong Wu, Anthony Liguori, target-devel, linux-scsi, lf-virt,
	Christoph Hellwig
In-Reply-To: <4FF45850.20501@redhat.com>

On Wed, 2012-07-04 at 16:50 +0200, Paolo Bonzini wrote:
> Il 04/07/2012 06:24, Nicholas A. Bellinger ha scritto:
> > From: Nicholas Bellinger <nab@linux-iscsi.org>
> > 
> > This is currently required for connecting to tcm_vhost in order to prevent
> > the client LUN scan from detecting the same tcm_vhost WWPN on multiple target
> > IDs.
> 
> But that's what the config field is for... why can't tcm_vhost (or QEMU)
> set max_id to 0?
> 

So this patch was carried forward from Stefan's original code that I
thought was required due to other limitations..

If that's not the case anymore I'm happy to drop it for now and look
into a proper fix outside of virtio-scsi.

> Paolo
> 
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> > Cc: Zhi Yong Wu <wuzhy@cn.ibm.com>
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Hannes Reinecke <hare@suse.de>
> > Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
> > ---
> >  drivers/scsi/virtio_scsi.c |    5 ++++-
> >  1 files changed, 4 insertions(+), 1 deletions(-)
> > 
> > diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> > index 391b30d..8711951 100644
> > --- a/drivers/scsi/virtio_scsi.c
> > +++ b/drivers/scsi/virtio_scsi.c
> > @@ -475,7 +475,10 @@ static int __devinit virtscsi_probe(struct virtio_device *vdev)
> >  	shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
> >  	shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
> >  	shost->max_lun = virtscsi_config_get(vdev, max_lun) + 1;
> > -	shost->max_id = virtscsi_config_get(vdev, max_target) + 1;
> > +	/*
> > +	 * Currently required for tcm_vhost to function..
> > +	 */
> > +	shost->max_id = 1;
> >  	shost->max_channel = 0;
> >  	shost->max_cmd_len = VIRTIO_SCSI_CDB_SIZE;
> >  	err = scsi_add_host(shost, &vdev->dev);
> > 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe target-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] tcm_vhost: Fix tv_cmd completion -> release SGL memory leak
From: Nicholas A. Bellinger @ 2012-07-05  4:14 UTC (permalink / raw)
  To: target-devel
  Cc: Stefan Hajnoczi, kvm-devel, Michael S. Tsirkin, Zhi Yong Wu,
	linux-scsi, Paolo Bonzini, lf-virt

From: Nicholas Bellinger <nab@linux-iscsi.org>

The SGL memory allocated during vhost_scsi_map_iov_to_sgl() setup was never
getting freed during tv_cmd completion -> release path.  Fix this up by releasing
tv_cmd->tvc_sgl in vhost_scsi_free_cmd() ahead of tv_cmd descriptor free.

Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Cc: Zhi Yong Wu <wuzhy@cn.ibm.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/vhost/tcm_vhost.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
index cd86633..9692153 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/tcm_vhost.c
@@ -387,6 +387,8 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
 		u32 i;
 		for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
 			put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+
+		kfree(tv_cmd->tvc_sgl);
 	}
 
 	kfree(tv_cmd);
-- 
1.7.2.5

^ permalink raw reply related

* Re: [PATCH] tcm_vhost: Fix tv_cmd completion -> release SGL memory leak
From: Zhi Yong Wu @ 2012-07-05  5:24 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: Stefan Hajnoczi, linux-scsi, Michael S. Tsirkin, Zhi Yong Wu,
	target-devel, kvm-devel, Paolo Bonzini, lf-virt
In-Reply-To: <1341461663-13434-1-git-send-email-nab@linux-iscsi.org>

On Thu, 2012-07-05 at 04:14 +0000, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
> 
> The SGL memory allocated during vhost_scsi_map_iov_to_sgl() setup was never
> getting freed during tv_cmd completion -> release path.  Fix this up by releasing
> tv_cmd->tvc_sgl in vhost_scsi_free_cmd() ahead of tv_cmd descriptor free.
> 
> Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> Cc: Zhi Yong Wu <wuzhy@cn.ibm.com>
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
> ---
>  drivers/vhost/tcm_vhost.c |    2 ++
>  1 files changed, 2 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
> index cd86633..9692153 100644
> --- a/drivers/vhost/tcm_vhost.c
> +++ b/drivers/vhost/tcm_vhost.c
> @@ -387,6 +387,8 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
>  		u32 i;
>  		for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
>  			put_page(sg_page(&tv_cmd->tvc_sgl[i]));
> +
> +		kfree(tv_cmd->tvc_sgl);
>  	}
> 
>  	kfree(tv_cmd);

Reviewed-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

-- 
Regards,

Zhi Yong Wu

^ permalink raw reply

* Re: [PATCH 6/6] virtio-scsi: Set shost->max_id=1 for tcm_vhost WWPNs
From: Paolo Bonzini @ 2012-07-05  6:42 UTC (permalink / raw)
  To: Nicholas A. Bellinger
  Cc: Jens Axboe, Stefan Hajnoczi, kvm-devel, Michael S. Tsirkin,
	Zhi Yong Wu, Anthony Liguori, target-devel, linux-scsi, lf-virt,
	Christoph Hellwig
In-Reply-To: <1341453940.23954.229.camel@haakon2.linux-iscsi.org>

Il 05/07/2012 04:05, Nicholas A. Bellinger ha scritto:
>> > But that's what the config field is for... why can't tcm_vhost (or QEMU)
>> > set max_id to 0?
>> > 
> So this patch was carried forward from Stefan's original code that I
> thought was required due to other limitations..
> 
> If that's not the case anymore I'm happy to drop it for now and look
> into a proper fix outside of virtio-scsi.
> 

I think max_id did not exist in the virtio-scsi configuration at the
time Stefan was working on it.

Paolo

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox