Linux virtualization list
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Changpeng Liu <changpeng.liu@intel.com>
Cc: james.r.harris@intel.com, pawelx.wodkowski@intel.com,
	fabio.miranda.martins@canonical.com,
	virtualization@lists.linux-foundation.org, stefanha@redhat.com,
	pbonzini@redhat.com, cavery@redhat.com
Subject: Re: [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support
Date: Fri, 30 Mar 2018 04:07:18 +0300	[thread overview]
Message-ID: <20180330034517-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <1522370974-28259-1-git-send-email-changpeng.liu@intel.com>

On Fri, Mar 30, 2018 at 08:49:34AM +0800, Changpeng Liu wrote:
> Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES
> command support, this will impact the performance when using SSD
> backend over file systems.
> 
> The idea here is using 16 Bytes payload as one descriptor for
> DISCARD/WRITE ZEROES command, users can put several ranges into
> one command, for the purpose to support such feature, two feature
> flags VIRTIO_BLK_F_DISCARD/VIRTIO_BLK_F_WRITE_ZEROES and two
> commands VIRTIO_BLK_T_DISCARD/VIRTIO_BLK_T_WRITE_ZEROES are
> introduced, and some parameters are added to the configuration
> space to tell the OS the granularity of DISCARD/WRITE ZEROES
> commands.

Pls fix grammar in this comment, I am not sure what are you
trying to say.

> 
> The specification change list here:
> https://github.com/oasis-tcs/virtio-spec

Which commit?

> CHANGELOG:
> v3: finalized the specification change.

Changelog belongs after -- and should be complete
including changes v1 to v2.

> Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> ---
>  drivers/block/virtio_blk.c      | 96 +++++++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_blk.h | 39 +++++++++++++++++
>  2 files changed, 132 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 4a07593c..1943adb 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -172,10 +172,53 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +static inline int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
> +{
> +	unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;

Split on two lines pls:
	unsigned short n = 0;

> +	u32 block_size = queue_logical_block_size(req->q);

Seems to be unused except for the sanity check below. Why?

> +	struct virtio_blk_discard_write_zeroes *range;
> +	struct bio *bio;
> +
> +	if (block_size < 512 || !block_size)

Why 512? And when is it 0? 

> +		return -1;

-1 isn't a normal errno code.

> +
> +	range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);

This might be pretty large: with 64K segments it looks like you are
trying to allocate ~1Mbyte with GFP_ATOMIC which is unlikely to succeed.
Can we split this up in chunks?

> +	if (!range)
> +		return -1;
> +
> +	__rq_for_each_bio(bio, req) {
> +		u64 sector = bio->bi_iter.bi_sector;
> +		u32 num_sectors = bio->bi_iter.bi_size >> 9;

why 9?

> +
> +		range[n].flags.unmap = cpu_to_le32(unmap);
> +		range[n].flags.reserved = cpu_to_le32(0);
> +		range[n].num_sectors = cpu_to_le32(num_sectors);
> +		range[n].sector = cpu_to_le64(sector);

Isn't this causing sparse warnings?


> +		n++;
> +	}
> +
> +	if (WARN_ON_ONCE(n != segments)) {

and when does this happen?

> +		kfree(range);
> +		return -1;
> +	}
> +
> +	req->special_vec.bv_page = virt_to_page(range);
> +	req->special_vec.bv_offset = offset_in_page(range);
> +	req->special_vec.bv_len = sizeof(*range) * segments;
> +	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +	return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>  	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +		kfree(page_address(req->special_vec.bv_page) +
> +		      req->special_vec.bv_offset);
> +	}
> +
>  	switch (req_op(req)) {
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
> @@ -225,6 +268,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	int qid = hctx->queue_num;
>  	int err;
>  	bool notify = false;
> +	bool unmap = false;
>  	u32 type;
>  
>  	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> @@ -237,6 +281,13 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	case REQ_OP_FLUSH:
>  		type = VIRTIO_BLK_T_FLUSH;
>  		break;
> +	case REQ_OP_DISCARD:
> +		type = VIRTIO_BLK_T_DISCARD;
> +		break;
> +	case REQ_OP_WRITE_ZEROES:
> +		type = VIRTIO_BLK_T_WRITE_ZEROES;
> +		unmap = !(req->cmd_flags & REQ_NOUNMAP);
> +		break;
>  	case REQ_OP_SCSI_IN:
>  	case REQ_OP_SCSI_OUT:
>  		type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,9 +307,16 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
>  
>  	blk_mq_start_request(req);
>  
> +	if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
> +		err = virtblk_setup_discard_write_zeroes(req, unmap);
> +		if (err)
> +			return BLK_STS_IOERR;

Does a failure actually indicate an IO error?


> +	}
> +
>  	num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>  	if (num) {
> -		if (rq_data_dir(req) == WRITE)
> +		if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD ||
> +		    type == VIRTIO_BLK_T_WRITE_ZEROES)
>  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
>  		else
>  			vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
> @@ -777,6 +835,38 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +		q->limits.discard_granularity = blk_size;
> +
> +		virtio_cread(vdev, struct virtio_blk_config, discard_sector_alignment, &v);
> +		if (v)
> +			q->limits.discard_alignment = v << 9;
> +		else
> +			q->limits.discard_alignment = 0;
> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v);
> +		if (v)
> +			blk_queue_max_discard_sectors(q, v);
> +		else
> +			blk_queue_max_discard_sectors(q, UINT_MAX);
> +
> +		virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v);
> +		if (v)
> +			blk_queue_max_discard_segments(q, v);
> +		else
> +			blk_queue_max_discard_segments(q, USHRT_MAX);
> +
> +		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
> +	}
> +
> +	if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> +		virtio_cread(vdev, struct virtio_blk_config, max_write_zeroes_sectors, &v);
> +		if (v)
> +			blk_queue_max_write_zeroes_sectors(q, v);
> +		else
> +			blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> +	}
> +
>  	virtblk_update_capacity(vblk, false);
>  	virtio_device_ready(vdev);
>  
> @@ -885,14 +975,14 @@ static int virtblk_restore(struct virtio_device *vdev)
>  	VIRTIO_BLK_F_SCSI,
>  #endif
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  }
>  ;
>  static unsigned int features[] = {
>  	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>  	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>  	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -	VIRTIO_BLK_F_MQ,
> +	VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d9..78a60e6 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,8 @@
>  #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
>  #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
>  #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD command is supported */
> +#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES command is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +88,22 @@ struct virtio_blk_config {
>  
>  	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>  	__u16 num_queues;
> +	/* The maximum discard segment size (if VIRTIO_BLK_F_DISCARD) */
> +	__u32 max_discard_sectors;
> +	/* The maximum number of discard segments (if VIRTIO_BLK_F_DISCARD) */
> +	__u32 max_discard_seg;
> +	/* The sector alignment for discard (if VIRTIO_BLK_F_DISCARD) */
> +	__u32 discard_sector_alignment;
> +	/* The maximum number of write zeroes sectors (if VIRTIO_BLK_F_WRITE_ZEROES) */
> +	__u32 max_write_zeroes_sectors;
> +	/* The maximum number of write zeroes segments (if VIRTIO_BLK_F_WRITE_ZEROES) */
> +	__u32 max_write_zeroes_seg;
> +	/* Device clear this bit when write zeroes command cannot result in
> +	 * deallocating one or more sectors
> +	 * (if VIRTIO_BLK_F_WRITE_ZEROES with unmap bit)

Pls fix grammar in this comment, I am not sure what are you trying to
say.

> +	 */
> +	__u8 write_zeroes_may_unmap;
> +	__u8 unused1[3];
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +132,12 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD	11
> +
> +/* Write zeroes command */
> +#define VIRTIO_BLK_T_WRITE_ZEROES	13
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER	0x80000000
> @@ -133,6 +157,21 @@ struct virtio_blk_outhdr {
>  	__virtio64 sector;
>  };
>  
> +/*
> + * discard/write zeroes range for each request.
> + */
> +struct virtio_blk_discard_write_zeroes {
> +	/* discard/write zeroes start sector */
> +	__virtio64 sector;
> +	/* number of discard/write zeroes sectors */
> +	__virtio32 num_sectors;
> +	/* valid for write zeroes command */
> +	struct {
> +		__virtio32 unmap:1;
> +		__virtio32 reserved:31;
> +	} flags;
> +};
> +

You can't use bitmaps in portable code.
The format differs between architectures.

>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>  	__virtio32 errors;
> -- 
> 1.9.3

  reply	other threads:[~2018-03-30  1:07 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-30  0:49 [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support Changpeng Liu
2018-03-30  1:07 ` Michael S. Tsirkin [this message]
2018-05-23 20:37   ` Liu, Changpeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180330034517-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=cavery@redhat.com \
    --cc=changpeng.liu@intel.com \
    --cc=fabio.miranda.martins@canonical.com \
    --cc=james.r.harris@intel.com \
    --cc=pawelx.wodkowski@intel.com \
    --cc=pbonzini@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox