From: Changpeng Liu <changpeng.liu@intel.com>
To: changpeng.liu@intel.com, virtualization@lists.linux-foundation.org
Cc: mst@redhat.com, hch@lst.de, pbonzini@redhat.com
Subject: [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver
Date: Tue, 4 Jul 2017 08:26:57 +0000 (UTC)
Date: Wed, 5 Jul 2017 16:44:39 +0800 [thread overview]
Message-ID: <1499244279-3484-1-git-send-email-changpeng.liu@intel.com> (raw)
Currently virtio-blk driver does not provide discard feature flag, so the
filesystems which built on top of the block device will not send discard
command. This is okay for HDD backend, but it will impact the performance
for SSD backend.
Add a feature flag VIRTIO_BLK_F_DISCARD and command VIRTIO_BLK_T_DISCARD
to extend exist virtio-blk protocol, define 16 bytes discard descriptor
for each discard segment, the discard segment defination aligns with
SCSI or NVM Express protocols, virtio-blk driver will support multi-range
discard request as well.
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
---
drivers/block/virtio_blk.c | 76 +++++++++++++++++++++++++++++++++++++++--
include/uapi/linux/virtio_blk.h | 19 +++++++++++
2 files changed, 92 insertions(+), 3 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0297ad7..8f0c614 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -172,10 +172,52 @@ static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
}
+static inline int virtblk_setup_discard(struct request *req)
+{
+ unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
+ u32 block_size = queue_logical_block_size(req->q);
+ struct virtio_blk_discard *range;
+ struct bio *bio;
+
+ if (block_size < 512 || !block_size)
+ return -1;
+
+ range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
+ if (!range)
+ return -1;
+
+ __rq_for_each_bio(bio, req) {
+ u64 slba = (bio->bi_iter.bi_sector << 9) / block_size;
+ u32 nlb = bio->bi_iter.bi_size / block_size;
+
+ range[n].reserved = cpu_to_le32(0);
+ range[n].nlba = cpu_to_le32(nlb);
+ range[n].slba = cpu_to_le64(slba);
+ n++;
+ }
+
+ if (WARN_ON_ONCE(n != segments)) {
+ kfree(range);
+ return -1;
+ }
+
+ req->special_vec.bv_page = virt_to_page(range);
+ req->special_vec.bv_offset = offset_in_page(range);
+ req->special_vec.bv_len = sizeof(*range) * segments;
+ req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+ return 0;
+}
+
static inline void virtblk_request_done(struct request *req)
{
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ kfree(page_address(req->special_vec.bv_page) +
+ req->special_vec.bv_offset);
+ }
+
switch (req_op(req)) {
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
@@ -237,6 +279,9 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
case REQ_OP_FLUSH:
type = VIRTIO_BLK_T_FLUSH;
break;
+ case REQ_OP_DISCARD:
+ type = VIRTIO_BLK_T_DISCARD;
+ break;
case REQ_OP_SCSI_IN:
case REQ_OP_SCSI_OUT:
type = VIRTIO_BLK_T_SCSI_CMD;
@@ -256,9 +301,15 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(req);
+ if (type == VIRTIO_BLK_T_DISCARD) {
+ err = virtblk_setup_discard(req);
+ if (err)
+ return BLK_STS_IOERR;
+ }
+
num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
if (num) {
- if (rq_data_dir(req) == WRITE)
+ if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD)
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
else
vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
@@ -767,6 +818,25 @@ static int virtblk_probe(struct virtio_device *vdev)
if (!err && opt_io_size)
blk_queue_io_opt(q, blk_size * opt_io_size);
+ if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
+ q->limits.discard_alignment = blk_size;
+ q->limits.discard_granularity = blk_size;
+
+ virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v);
+ if (v)
+ blk_queue_max_discard_sectors(q, v);
+ else
+ blk_queue_max_discard_sectors(q, -1U);
+
+ virtio_cread(vdev, struct virtio_blk_config, max_discard_num, &v);
+ if (v)
+ blk_queue_max_discard_segments(q, v);
+ else
+ blk_queue_max_discard_segments(q, 256);
+
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+ }
+
virtio_device_ready(vdev);
device_add_disk(&vdev->dev, vblk->disk);
@@ -874,14 +944,14 @@ static int virtblk_restore(struct virtio_device *vdev)
VIRTIO_BLK_F_SCSI,
#endif
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
- VIRTIO_BLK_F_MQ,
+ VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
}
;
static unsigned int features[] = {
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
- VIRTIO_BLK_F_MQ,
+ VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
};
static struct virtio_driver virtio_blk = {
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 9ebe4d9..3354cc3 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -38,6 +38,7 @@
#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD command is supported */
/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -86,6 +87,10 @@ struct virtio_blk_config {
/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
__u16 num_queues;
+ /* The maximum segment size (if VIRTIO_BLK_F_DISCARD) */
+ __u32 max_discard_seg;
+ /* The maximum number of segments (if VIRTIO_BLK_F_DISCARD) */
+ __u32 max_discard_num;
} __attribute__((packed));
/*
@@ -114,6 +119,9 @@ struct virtio_blk_config {
/* Get device ID command */
#define VIRTIO_BLK_T_GET_ID 8
+/* Discard command */
+#define VIRTIO_BLK_T_DISCARD 16
+
#ifndef VIRTIO_BLK_NO_LEGACY
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER 0x80000000
@@ -133,6 +141,17 @@ struct virtio_blk_outhdr {
__virtio64 sector;
};
+/*
+ * Array of discard ranges for each request.
+ */
+struct virtio_blk_discard {
+ /* start discard lba */
+ __virtio64 slba;
+ /* number of discard sectors */
+ __virtio32 nlba;
+ __virtio32 reserved;
+};
+
#ifndef VIRTIO_BLK_NO_LEGACY
struct virtio_scsi_inhdr {
__virtio32 errors;
--
1.9.3
next reply other threads:[~2017-07-04 8:26 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-04 8:26 Changpeng Liu [this message]
2017-07-04 9:24 ` [PATCH v2] virtio-blk: add DISCARD support to virtio-blk driver Paolo Bonzini
2017-07-04 21:35 ` Michael S. Tsirkin
2017-07-05 7:57 ` Liu, Changpeng
2017-07-05 8:00 ` Paolo Bonzini
2017-07-05 12:56 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1499244279-3484-1-git-send-email-changpeng.liu@intel.com \
--to=changpeng.liu@intel.com \
--cc=hch@lst.de \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).