From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
Changpeng Liu <changpeng.liu@intel.com>,
Stefan Hajnoczi <stefanha@redhat.com>,
Kevin Wolf <kwolf@redhat.com>, Max Reitz <mreitz@redhat.com>,
qemu-block@nongnu.org
Subject: [Qemu-devel] [PULL 17/25] vhost-user-blk: add discard/write zeroes features support
Date: Mon, 4 Feb 2019 09:43:52 -0500 [thread overview]
Message-ID: <20190204142638.27021-18-mst@redhat.com> (raw)
In-Reply-To: <20190204142638.27021-1-mst@redhat.com>
From: Changpeng Liu <changpeng.liu@intel.com>
Linux commit 1f23816b8 "virtio_blk: add discard and write zeroes support"
added the support in the Guest kernel, while here also enable the features
support with vhost-user-blk driver. Also enable the test example utility
with DISCARD and WRITE ZEROES commands.
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
include/standard-headers/linux/virtio_blk.h | 48 +++++++
contrib/vhost-user-blk/vhost-user-blk.c | 140 +++++++++++++++-----
hw/block/vhost-user-blk.c | 4 +
3 files changed, 159 insertions(+), 33 deletions(-)
diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h
index ae6e865fee..0229b0fbe4 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -38,6 +38,8 @@
#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */
/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -84,6 +86,39 @@ struct virtio_blk_config {
/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
uint16_t num_queues;
+
+ /* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+ /*
+ * The maximum discard sectors (in 512-byte sectors) for
+ * one segment.
+ */
+ uint32_t max_discard_sectors;
+ /*
+ * The maximum number of discard segments in a
+ * discard command.
+ */
+ uint32_t max_discard_seg;
+ /* Discard commands must be aligned to this number of sectors. */
+ uint32_t discard_sector_alignment;
+
+ /* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+ /*
+ * The maximum number of write zeroes sectors (in 512-byte sectors) in
+ * one segment.
+ */
+ uint32_t max_write_zeroes_sectors;
+ /*
+ * The maximum number of segments in a write zeroes
+ * command.
+ */
+ uint32_t max_write_zeroes_seg;
+ /*
+ * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+ * deallocation of one or more of the sectors.
+ */
+ uint8_t write_zeroes_may_unmap;
+
+ uint8_t unused1[3];
} QEMU_PACKED;
/*
@@ -137,6 +172,19 @@ struct virtio_blk_outhdr {
__virtio64 sector;
};
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+ /* discard/write zeroes start sector */
+ uint64_t sector;
+ /* number of discard/write zeroes sectors */
+ uint32_t num_sectors;
+ /* flags for this range */
+ uint32_t flags;
+};
+
#ifndef VIRTIO_BLK_NO_LEGACY
struct virtio_scsi_inhdr {
__virtio32 errors;
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
index 49640dfdf2..43583f2659 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -63,6 +63,20 @@ static size_t vub_iov_size(const struct iovec *iov,
return len;
}
+static size_t vub_iov_to_buf(const struct iovec *iov,
+ const unsigned int iov_cnt, void *buf)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ memcpy(buf + len, iov[i].iov_base, iov[i].iov_len);
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
static void vub_panic_cb(VuDev *vu_dev, const char *buf)
{
VugDev *gdev;
@@ -161,6 +175,44 @@ vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
return rc;
}
+static int
+vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
+ uint32_t type)
+{
+ struct virtio_blk_discard_write_zeroes *desc;
+ ssize_t size;
+ void *buf;
+
+ size = vub_iov_size(iov, iovcnt);
+ if (size != sizeof(*desc)) {
+ fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
+ return -1;
+ }
+ buf = g_new0(char, size);
+ vub_iov_to_buf(iov, iovcnt, buf);
+
+ #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+ VubDev *vdev_blk = req->vdev_blk;
+ desc = (struct virtio_blk_discard_write_zeroes *)buf;
+ uint64_t range[2] = { le64toh(desc->sector) << 9,
+ le32toh(desc->num_sectors) << 9 };
+ if (type == VIRTIO_BLK_T_DISCARD) {
+ if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
+ g_free(buf);
+ return 0;
+ }
+ } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+ if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
+ g_free(buf);
+ return 0;
+ }
+ }
+ #endif
+
+ g_free(buf);
+ return -1;
+}
+
static void
vub_flush(VubReq *req)
{
@@ -216,44 +268,55 @@ static int vub_virtio_process_req(VubDev *vdev_blk,
in_num--;
type = le32toh(req->out->type);
- switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
- case VIRTIO_BLK_T_IN: {
- ssize_t ret = 0;
- bool is_write = type & VIRTIO_BLK_T_OUT;
- req->sector_num = le64toh(req->out->sector);
- if (is_write) {
- ret = vub_writev(req, &elem->out_sg[1], out_num);
- } else {
- ret = vub_readv(req, &elem->in_sg[0], in_num);
- }
- if (ret >= 0) {
- req->in->status = VIRTIO_BLK_S_OK;
- } else {
- req->in->status = VIRTIO_BLK_S_IOERR;
- }
- vub_req_complete(req);
- break;
+ switch (type & ~VIRTIO_BLK_T_BARRIER) {
+ case VIRTIO_BLK_T_IN:
+ case VIRTIO_BLK_T_OUT: {
+ ssize_t ret = 0;
+ bool is_write = type & VIRTIO_BLK_T_OUT;
+ req->sector_num = le64toh(req->out->sector);
+ if (is_write) {
+ ret = vub_writev(req, &elem->out_sg[1], out_num);
+ } else {
+ ret = vub_readv(req, &elem->in_sg[0], in_num);
}
- case VIRTIO_BLK_T_FLUSH: {
- vub_flush(req);
+ if (ret >= 0) {
req->in->status = VIRTIO_BLK_S_OK;
- vub_req_complete(req);
- break;
+ } else {
+ req->in->status = VIRTIO_BLK_S_IOERR;
}
- case VIRTIO_BLK_T_GET_ID: {
- size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
- VIRTIO_BLK_ID_BYTES);
- snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+ vub_req_complete(req);
+ break;
+ }
+ case VIRTIO_BLK_T_FLUSH:
+ vub_flush(req);
+ req->in->status = VIRTIO_BLK_S_OK;
+ vub_req_complete(req);
+ break;
+ case VIRTIO_BLK_T_GET_ID: {
+ size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
+ VIRTIO_BLK_ID_BYTES);
+ snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+ req->in->status = VIRTIO_BLK_S_OK;
+ req->size = elem->in_sg[0].iov_len;
+ vub_req_complete(req);
+ break;
+ }
+ case VIRTIO_BLK_T_DISCARD:
+ case VIRTIO_BLK_T_WRITE_ZEROES: {
+ int rc;
+ rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
+ if (rc == 0) {
req->in->status = VIRTIO_BLK_S_OK;
- req->size = elem->in_sg[0].iov_len;
- vub_req_complete(req);
- break;
- }
- default: {
- req->in->status = VIRTIO_BLK_S_UNSUPP;
- vub_req_complete(req);
- break;
+ } else {
+ req->in->status = VIRTIO_BLK_S_IOERR;
}
+ vub_req_complete(req);
+ break;
+ }
+ default:
+ req->in->status = VIRTIO_BLK_S_UNSUPP;
+ vub_req_complete(req);
+ break;
}
return 0;
@@ -317,6 +380,10 @@ vub_get_features(VuDev *dev)
1ull << VIRTIO_BLK_F_TOPOLOGY |
1ull << VIRTIO_BLK_F_BLK_SIZE |
1ull << VIRTIO_BLK_F_FLUSH |
+ #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+ 1ull << VIRTIO_BLK_F_DISCARD |
+ 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
+ #endif
1ull << VIRTIO_BLK_F_CONFIG_WCE |
1ull << VIRTIO_F_VERSION_1 |
1ull << VHOST_USER_F_PROTOCOL_FEATURES;
@@ -478,6 +545,13 @@ vub_initialize_config(int fd, struct virtio_blk_config *config)
config->min_io_size = 1;
config->opt_io_size = 1;
config->num_queues = 1;
+ #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+ config->max_discard_sectors = 32768;
+ config->max_discard_seg = 1;
+ config->discard_sector_alignment = config->blk_size >> 9;
+ config->max_write_zeroes_sectors = 32768;
+ config->max_write_zeroes_seg = 1;
+ #endif
}
static VubDev *
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index c3af28fad4..44ac814016 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -38,6 +38,8 @@ static const int user_feature_bits[] = {
VIRTIO_BLK_F_RO,
VIRTIO_BLK_F_FLUSH,
VIRTIO_BLK_F_CONFIG_WCE,
+ VIRTIO_BLK_F_DISCARD,
+ VIRTIO_BLK_F_WRITE_ZEROES,
VIRTIO_F_VERSION_1,
VIRTIO_RING_F_INDIRECT_DESC,
VIRTIO_RING_F_EVENT_IDX,
@@ -204,6 +206,8 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
virtio_add_feature(&features, VIRTIO_BLK_F_RO);
+ virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
+ virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);
if (s->config_wce) {
virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
--
MST
next prev parent reply other threads:[~2019-02-04 14:51 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-04 14:43 [Qemu-devel] [PULL 00/25] pci, pc, virtio: fixes, cleanups, features Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 01/25] virtio: add checks for the size of the indirect table Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 02/25] contrib/libvhost-user: switch to uint64_t Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 03/25] scripts/update-linux-headers.sh: adjust for Linux 4.21-rc1 (or 5.0-rc1) Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 04/25] include: update Linux headers to 4.21-rc1/5.0-rc1 Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 10/25] hw: virtio-pci: drop DO_UPCAST Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 11/25] intel_iommu: fix operator in vtd_switch_address_space Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 12/25] intel_iommu: reset intr_enabled when system reset Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 13/25] pci/msi: export msi_is_masked() Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 14/25] i386/kvm: ignore masked irqs when update msi routes Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 15/25] contrib: compile vhost-user-blk tool by default Michael S. Tsirkin
2019-02-04 15:07 ` Daniel P. Berrangé
2019-02-04 15:19 ` Michael S. Tsirkin
2019-02-04 15:29 ` Daniel P. Berrangé
2019-02-05 1:48 ` Michael S. Tsirkin
2019-02-08 7:13 ` Stefan Hajnoczi
2019-02-04 14:43 ` [Qemu-devel] [PULL 16/25] contrib/vhost-user-blk: fix the compilation issue Michael S. Tsirkin
2019-02-04 14:43 ` Michael S. Tsirkin [this message]
2019-02-04 14:43 ` [Qemu-devel] [PULL 18/25] hw/virtio: Use CONFIG_VIRTIO_PCI switch instead of CONFIG_PCI Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 19/25] acpi: Make TPM 2.0 with TIS available as MSFT0101 Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 20/25] fw_cfg: fix the life cycle and the name of "qemu_extra_params_fw" Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 21/25] i386, acpi: cleanup build_facs by removing second unused argument Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 22/25] mmap-alloc: unfold qemu_ram_mmap() Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 23/25] mmap-alloc: fix hugetlbfs misaligned length in ppc64 Michael S. Tsirkin
2019-02-04 15:15 ` Greg Kurz
2019-02-04 15:20 ` Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 24/25] r2d: fix build on mingw Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 25/25] contrib/libvhost-user: cleanup casts Michael S. Tsirkin
2019-02-04 17:59 ` [Qemu-devel] [PULL 00/25] pci, pc, virtio: fixes, cleanups, features Peter Maydell
2019-02-04 19:39 ` Michael S. Tsirkin
2019-02-05 1:50 ` Michael S. Tsirkin
2019-02-05 1:51 ` Michael S. Tsirkin
2019-02-05 12:41 ` Peter Maydell
2019-02-05 16:06 ` Michael S. Tsirkin
2019-02-05 17:38 ` Peter Maydell
2019-02-12 7:11 ` Peter Xu
2019-02-12 10:39 ` Philippe Mathieu-Daudé
2019-02-12 13:04 ` Michael S. Tsirkin
2019-02-12 13:15 ` Philippe Mathieu-Daudé
2019-02-12 13:24 ` Michael S. Tsirkin
2019-02-12 13:53 ` Philippe Mathieu-Daudé
2019-02-12 14:04 ` Michael S. Tsirkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190204142638.27021-18-mst@redhat.com \
--to=mst@redhat.com \
--cc=changpeng.liu@intel.com \
--cc=kwolf@redhat.com \
--cc=mreitz@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).