From: Stefano Garzarella <sgarzare@redhat.com>
To: qemu-devel@nongnu.org
Cc: "Michael S. Tsirkin" <mst@redhat.com>,
"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
Kevin Wolf <kwolf@redhat.com>,
Eduardo Habkost <ehabkost@redhat.com>,
Laurent Vivier <lvivier@redhat.com>,
Marcel Apfelbaum <marcel.apfelbaum@gmail.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Stefan Hajnoczi <stefanha@redhat.com>,
Jason Wang <jasowang@redhat.com>,
qemu-block@nongnu.org, Max Reitz <mreitz@redhat.com>,
Thomas Huth <thuth@redhat.com>
Subject: [Qemu-devel] [PATCH v5 06/10] virtio-blk: add DISCARD and WRITE_ZEROES features
Date: Mon, 18 Feb 2019 15:02:57 +0100 [thread overview]
Message-ID: <20190218140301.197408-7-sgarzare@redhat.com> (raw)
In-Reply-To: <20190218140301.197408-1-sgarzare@redhat.com>
This patch adds the support of DISCARD and WRITE_ZEROES commands,
that have been introduced in the virtio-blk protocol to have
better performance when using SSD backend.
We support only one segment per request since multiple segments
are not widely used and there are no userspace APIs that allow
applications to submit multiple segments in a single call.
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
hw/block/virtio-blk.c | 184 +++++++++++++++++++++++++++++++++
include/hw/virtio/virtio-blk.h | 2 +
2 files changed, 186 insertions(+)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8798d13bc4..c159a3d5f7 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -169,6 +169,30 @@ out:
aio_context_release(blk_get_aio_context(s->conf.conf.blk));
}
+static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
+{
+ VirtIOBlockReq *req = opaque;
+ VirtIOBlock *s = req->dev;
+ bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) &
+ ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES;
+
+ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+ if (ret) {
+ if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
+ goto out;
+ }
+ }
+
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
+ if (is_write_zeroes) {
+ block_acct_done(blk_get_stats(s->blk), &req->acct);
+ }
+ virtio_blk_free_request(req);
+
+out:
+ aio_context_release(blk_get_aio_context(s->conf.conf.blk));
+}
+
#ifdef __linux__
typedef struct {
@@ -502,6 +526,84 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
return true;
}
+static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req,
+ struct virtio_blk_discard_write_zeroes *dwz_hdr, bool is_write_zeroes)
+{
+ VirtIOBlock *s = req->dev;
+ VirtIODevice *vdev = VIRTIO_DEVICE(s);
+ uint64_t sector;
+ uint32_t num_sectors, flags, max_sectors;
+ uint8_t err_status;
+ int bytes;
+
+ sector = virtio_ldq_p(vdev, &dwz_hdr->sector);
+ num_sectors = virtio_ldl_p(vdev, &dwz_hdr->num_sectors);
+ flags = virtio_ldl_p(vdev, &dwz_hdr->flags);
+ max_sectors = is_write_zeroes ? s->conf.max_write_zeroes_sectors :
+ s->conf.max_discard_sectors;
+
+ /*
+ * max_sectors is at most BDRV_REQUEST_MAX_SECTORS, this check
+ * make us sure that "num_sectors << BDRV_SECTOR_BITS" can fit in
+ * the integer variable.
+ */
+ if (unlikely(num_sectors > max_sectors)) {
+ err_status = VIRTIO_BLK_S_IOERR;
+ goto err;
+ }
+
+ bytes = num_sectors << BDRV_SECTOR_BITS;
+
+ if (unlikely(!virtio_blk_sect_range_ok(s, sector, bytes))) {
+ err_status = VIRTIO_BLK_S_IOERR;
+ goto err;
+ }
+
+ /*
+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+ * and write zeroes commands if any unknown flag is set.
+ */
+ if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+ err_status = VIRTIO_BLK_S_UNSUPP;
+ goto err;
+ }
+
+ if (is_write_zeroes) { /* VIRTIO_BLK_T_WRITE_ZEROES */
+ int blk_aio_flags = 0;
+
+ if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+ blk_aio_flags |= BDRV_REQ_MAY_UNMAP;
+ }
+
+ block_acct_start(blk_get_stats(s->blk), &req->acct, bytes,
+ BLOCK_ACCT_WRITE);
+
+ blk_aio_pwrite_zeroes(s->blk, sector << BDRV_SECTOR_BITS,
+ bytes, blk_aio_flags,
+ virtio_blk_discard_write_zeroes_complete, req);
+ } else { /* VIRTIO_BLK_T_DISCARD */
+ /*
+ * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+ * discard commands if the unmap flag is set.
+ */
+ if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+ err_status = VIRTIO_BLK_S_UNSUPP;
+ goto err;
+ }
+
+ blk_aio_pdiscard(s->blk, sector << BDRV_SECTOR_BITS, bytes,
+ virtio_blk_discard_write_zeroes_complete, req);
+ }
+
+ return VIRTIO_BLK_S_OK;
+
+err:
+ if (is_write_zeroes) {
+ block_acct_invalid(blk_get_stats(s->blk), BLOCK_ACCT_WRITE);
+ }
+ return err_status;
+}
+
static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
{
uint32_t type;
@@ -603,6 +705,47 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
virtio_blk_free_request(req);
break;
}
+ /*
+ * VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
+ * VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
+ * so we must mask it for these requests, then we will check if it is set.
+ */
+ case VIRTIO_BLK_T_DISCARD & ~VIRTIO_BLK_T_OUT:
+ case VIRTIO_BLK_T_WRITE_ZEROES & ~VIRTIO_BLK_T_OUT:
+ {
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
+ size_t out_len = iov_size(out_iov, out_num);
+ bool is_write_zeroes = (type & ~VIRTIO_BLK_T_BARRIER) ==
+ VIRTIO_BLK_T_WRITE_ZEROES;
+ uint8_t err_status;
+
+ /*
+ * Unsupported if VIRTIO_BLK_T_OUT is not set or the request contains
+ * more than one segment.
+ */
+ if (unlikely(!(type & VIRTIO_BLK_T_OUT) ||
+ out_len > sizeof(dwz_hdr))) {
+ virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
+ virtio_blk_free_request(req);
+ return 0;
+ }
+
+ if (unlikely(iov_to_buf(out_iov, out_num, 0, &dwz_hdr,
+ sizeof(dwz_hdr)) != sizeof(dwz_hdr))) {
+ virtio_error(vdev, "virtio-blk discard/write_zeroes header"
+ " too short");
+ return -1;
+ }
+
+ err_status = virtio_blk_handle_discard_write_zeroes(req, &dwz_hdr,
+ is_write_zeroes);
+ if (err_status != VIRTIO_BLK_S_OK) {
+ virtio_blk_req_complete(req, err_status);
+ virtio_blk_free_request(req);
+ }
+
+ break;
+ }
default:
virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
virtio_blk_free_request(req);
@@ -782,6 +925,24 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blkcfg.alignment_offset = 0;
blkcfg.wce = blk_enable_write_cache(s->blk);
virtio_stw_p(vdev, &blkcfg.num_queues, s->conf.num_queues);
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD)) {
+ virtio_stl_p(vdev, &blkcfg.max_discard_sectors,
+ s->conf.max_discard_sectors);
+ virtio_stl_p(vdev, &blkcfg.discard_sector_alignment,
+ blk_size >> BDRV_SECTOR_BITS);
+ /*
+ * We support only one segment per request since multiple segments
+ * are not widely used and there are no userspace APIs that allow
+ * applications to submit multiple segments in a single call.
+ */
+ virtio_stl_p(vdev, &blkcfg.max_discard_seg, 1);
+ }
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES)) {
+ virtio_stl_p(vdev, &blkcfg.max_write_zeroes_sectors,
+ s->conf.max_write_zeroes_sectors);
+ blkcfg.write_zeroes_may_unmap = 1;
+ virtio_stl_p(vdev, &blkcfg.max_write_zeroes_seg, 1);
+ }
memcpy(config, &blkcfg, s->config_size);
}
@@ -973,6 +1134,25 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
return;
}
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_DISCARD) &&
+ (!conf->max_discard_sectors ||
+ conf->max_discard_sectors > BDRV_REQUEST_MAX_SECTORS)) {
+ error_setg(errp, "invalid max-discard-sectors property (%" PRIu32 ")"
+ ", must be between 1 and %d",
+ conf->max_discard_sectors, (int)BDRV_REQUEST_MAX_SECTORS);
+ return;
+ }
+
+ if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_WRITE_ZEROES) &&
+ (!conf->max_write_zeroes_sectors ||
+ conf->max_write_zeroes_sectors > BDRV_REQUEST_MAX_SECTORS)) {
+ error_setg(errp, "invalid max-write-zeroes-sectors property (%" PRIu32
+ "), must be between 1 and %d",
+ conf->max_write_zeroes_sectors,
+ (int)BDRV_REQUEST_MAX_SECTORS);
+ return;
+ }
+
virtio_blk_set_config_size(s, s->host_features);
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, s->config_size);
@@ -1050,6 +1230,10 @@ static Property virtio_blk_properties[] = {
VIRTIO_BLK_F_DISCARD, true),
DEFINE_PROP_BIT64("write-zeroes", VirtIOBlock, host_features,
VIRTIO_BLK_F_WRITE_ZEROES, true),
+ DEFINE_PROP_UINT32("max-discard-sectors", VirtIOBlock,
+ conf.max_discard_sectors, BDRV_REQUEST_MAX_SECTORS),
+ DEFINE_PROP_UINT32("max-write-zeroes-sectors", VirtIOBlock,
+ conf.max_write_zeroes_sectors, BDRV_REQUEST_MAX_SECTORS),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 7877ae67ae..cddcfbebe9 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -38,6 +38,8 @@ struct VirtIOBlkConf
uint32_t request_merging;
uint16_t num_queues;
uint16_t queue_size;
+ uint32_t max_discard_sectors;
+ uint32_t max_write_zeroes_sectors;
};
struct VirtIOBlockDataPlane;
--
2.20.1
next prev parent reply other threads:[~2019-02-18 14:06 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-18 14:02 [Qemu-devel] [PATCH v5 00/10] virtio-blk: add DISCARD and WRITE_ZEROES features Stefano Garzarella
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 01/10] virtio-blk: add acct_failed param to virtio_blk_handle_rw_error() Stefano Garzarella
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 02/10] virtio-blk: add host_features field in VirtIOBlock Stefano Garzarella
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 03/10] virtio-blk: add "discard" and "write-zeroes" properties Stefano Garzarella
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 04/10] virtio-net: make VirtIOFeature usable for other virtio devices Stefano Garzarella
2019-02-20 16:00 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 05/10] virtio-blk: set config size depending on the features enabled Stefano Garzarella
2019-02-20 16:02 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2019-02-18 14:02 ` Stefano Garzarella [this message]
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 07/10] tests/virtio-blk: change assert on data_size in virtio_blk_request() Stefano Garzarella
2019-02-18 14:02 ` [Qemu-devel] [PATCH v5 08/10] tests/virtio-blk: add virtio_blk_fix_dwz_hdr() function Stefano Garzarella
2019-02-20 16:11 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2019-02-18 14:03 ` [Qemu-devel] [PATCH v5 09/10] tests/virtio-blk: add test for WRITE_ZEROES command Stefano Garzarella
2019-02-18 14:03 ` [Qemu-devel] [PATCH v5 10/10] tests/virtio-blk: add test for DISCARD command Stefano Garzarella
2019-02-20 16:11 ` Stefan Hajnoczi
2019-02-18 15:05 ` [Qemu-devel] [PATCH v5 00/10] virtio-blk: add DISCARD and WRITE_ZEROES features no-reply
2019-02-20 16:30 ` [Qemu-devel] [Qemu-block] " Stefan Hajnoczi
2019-02-20 21:05 ` Stefano Garzarella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190218140301.197408-7-sgarzare@redhat.com \
--to=sgarzare@redhat.com \
--cc=dgilbert@redhat.com \
--cc=ehabkost@redhat.com \
--cc=jasowang@redhat.com \
--cc=kwolf@redhat.com \
--cc=lvivier@redhat.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mreitz@redhat.com \
--cc=mst@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
--cc=thuth@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).