qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>,
	Changpeng Liu <changpeng.liu@intel.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Kevin Wolf <kwolf@redhat.com>, Max Reitz <mreitz@redhat.com>,
	qemu-block@nongnu.org
Subject: [Qemu-devel] [PULL 17/25] vhost-user-blk: add discard/write zeroes features support
Date: Mon, 4 Feb 2019 09:43:52 -0500	[thread overview]
Message-ID: <20190204142638.27021-18-mst@redhat.com> (raw)
In-Reply-To: <20190204142638.27021-1-mst@redhat.com>

From: Changpeng Liu <changpeng.liu@intel.com>

Linux commit 1f23816b8 "virtio_blk: add discard and write zeroes support"
added the support in the Guest kernel, while here also enable the features
support with vhost-user-blk driver. Also enable the test example utility
with DISCARD and WRITE ZEROES commands.

Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/standard-headers/linux/virtio_blk.h |  48 +++++++
 contrib/vhost-user-blk/vhost-user-blk.c     | 140 +++++++++++++++-----
 hw/block/vhost-user-blk.c                   |   4 +
 3 files changed, 159 insertions(+), 33 deletions(-)

diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h
index ae6e865fee..0229b0fbe4 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -38,6 +38,8 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
+#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
+#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */
 
 /* Legacy feature bits */
 #ifndef VIRTIO_BLK_NO_LEGACY
@@ -84,6 +86,39 @@ struct virtio_blk_config {
 
 	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
 	uint16_t num_queues;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
+	/*
+	 * The maximum discard sectors (in 512-byte sectors) for
+	 * one segment.
+	 */
+	uint32_t max_discard_sectors;
+	/*
+	 * The maximum number of discard segments in a
+	 * discard command.
+	 */
+	uint32_t max_discard_seg;
+	/* Discard commands must be aligned to this number of sectors. */
+	uint32_t discard_sector_alignment;
+
+	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
+	/*
+	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
+	 * one segment.
+	 */
+	uint32_t max_write_zeroes_sectors;
+	/*
+	 * The maximum number of segments in a write zeroes
+	 * command.
+	 */
+	uint32_t max_write_zeroes_seg;
+	/*
+	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
+	 * deallocation of one or more of the sectors.
+	 */
+	uint8_t write_zeroes_may_unmap;
+
+	uint8_t unused1[3];
 } QEMU_PACKED;
 
 /*
@@ -137,6 +172,19 @@ struct virtio_blk_outhdr {
 	__virtio64 sector;
 };
 
+/* Unmap this range (only valid for write zeroes command) */
+#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001
+
+/* Discard/write zeroes range for each request. */
+struct virtio_blk_discard_write_zeroes {
+	/* discard/write zeroes start sector */
+	uint64_t sector;
+	/* number of discard/write zeroes sectors */
+	uint32_t num_sectors;
+	/* flags for this range */
+	uint32_t flags;
+};
+
 #ifndef VIRTIO_BLK_NO_LEGACY
 struct virtio_scsi_inhdr {
 	__virtio32 errors;
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
index 49640dfdf2..43583f2659 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -63,6 +63,20 @@ static size_t vub_iov_size(const struct iovec *iov,
     return len;
 }
 
+static size_t vub_iov_to_buf(const struct iovec *iov,
+                             const unsigned int iov_cnt, void *buf)
+{
+    size_t len;
+    unsigned int i;
+
+    len = 0;
+    for (i = 0; i < iov_cnt; i++) {
+        memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
+        len += iov[i].iov_len;
+    }
+    return len;
+}
+
 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
 {
     VugDev *gdev;
@@ -161,6 +175,44 @@ vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
     return rc;
 }
 
+static int
+vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
+                         uint32_t type)
+{
+    struct virtio_blk_discard_write_zeroes *desc;
+    ssize_t size;
+    void *buf;
+
+    size = vub_iov_size(iov, iovcnt);
+    if (size != sizeof(*desc)) {
+        fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
+        return -1;
+    }
+    buf = g_new0(char, size);
+    vub_iov_to_buf(iov, iovcnt, buf);
+
+    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+    VubDev *vdev_blk = req->vdev_blk;
+    desc = (struct virtio_blk_discard_write_zeroes *)buf;
+    uint64_t range[2] = { le64toh(desc->sector) << 9,
+                          le32toh(desc->num_sectors) << 9 };
+    if (type == VIRTIO_BLK_T_DISCARD) {
+        if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
+            g_free(buf);
+            return 0;
+        }
+    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+        if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
+            g_free(buf);
+            return 0;
+        }
+    }
+    #endif
+
+    g_free(buf);
+    return -1;
+}
+
 static void
 vub_flush(VubReq *req)
 {
@@ -216,44 +268,55 @@ static int vub_virtio_process_req(VubDev *vdev_blk,
     in_num--;
 
     type = le32toh(req->out->type);
-    switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
-        case VIRTIO_BLK_T_IN: {
-            ssize_t ret = 0;
-            bool is_write = type & VIRTIO_BLK_T_OUT;
-            req->sector_num = le64toh(req->out->sector);
-            if (is_write) {
-                ret  = vub_writev(req, &elem->out_sg[1], out_num);
-            } else {
-                ret = vub_readv(req, &elem->in_sg[0], in_num);
-            }
-            if (ret >= 0) {
-                req->in->status = VIRTIO_BLK_S_OK;
-            } else {
-                req->in->status = VIRTIO_BLK_S_IOERR;
-            }
-            vub_req_complete(req);
-            break;
+    switch (type & ~VIRTIO_BLK_T_BARRIER) {
+    case VIRTIO_BLK_T_IN:
+    case VIRTIO_BLK_T_OUT: {
+        ssize_t ret = 0;
+        bool is_write = type & VIRTIO_BLK_T_OUT;
+        req->sector_num = le64toh(req->out->sector);
+        if (is_write) {
+            ret  = vub_writev(req, &elem->out_sg[1], out_num);
+        } else {
+            ret = vub_readv(req, &elem->in_sg[0], in_num);
         }
-        case VIRTIO_BLK_T_FLUSH: {
-            vub_flush(req);
+        if (ret >= 0) {
             req->in->status = VIRTIO_BLK_S_OK;
-            vub_req_complete(req);
-            break;
+        } else {
+            req->in->status = VIRTIO_BLK_S_IOERR;
         }
-        case VIRTIO_BLK_T_GET_ID: {
-            size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
-                              VIRTIO_BLK_ID_BYTES);
-            snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+        vub_req_complete(req);
+        break;
+    }
+    case VIRTIO_BLK_T_FLUSH:
+        vub_flush(req);
+        req->in->status = VIRTIO_BLK_S_OK;
+        vub_req_complete(req);
+        break;
+    case VIRTIO_BLK_T_GET_ID: {
+        size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
+                          VIRTIO_BLK_ID_BYTES);
+        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+        req->in->status = VIRTIO_BLK_S_OK;
+        req->size = elem->in_sg[0].iov_len;
+        vub_req_complete(req);
+        break;
+    }
+    case VIRTIO_BLK_T_DISCARD:
+    case VIRTIO_BLK_T_WRITE_ZEROES: {
+        int rc;
+        rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
+        if (rc == 0) {
             req->in->status = VIRTIO_BLK_S_OK;
-            req->size = elem->in_sg[0].iov_len;
-            vub_req_complete(req);
-            break;
-        }
-        default: {
-            req->in->status = VIRTIO_BLK_S_UNSUPP;
-            vub_req_complete(req);
-            break;
+        } else {
+            req->in->status = VIRTIO_BLK_S_IOERR;
         }
+        vub_req_complete(req);
+        break;
+    }
+    default:
+        req->in->status = VIRTIO_BLK_S_UNSUPP;
+        vub_req_complete(req);
+        break;
     }
 
     return 0;
@@ -317,6 +380,10 @@ vub_get_features(VuDev *dev)
                1ull << VIRTIO_BLK_F_TOPOLOGY |
                1ull << VIRTIO_BLK_F_BLK_SIZE |
                1ull << VIRTIO_BLK_F_FLUSH |
+               #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+               1ull << VIRTIO_BLK_F_DISCARD |
+               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
+               #endif
                1ull << VIRTIO_BLK_F_CONFIG_WCE |
                1ull << VIRTIO_F_VERSION_1 |
                1ull << VHOST_USER_F_PROTOCOL_FEATURES;
@@ -478,6 +545,13 @@ vub_initialize_config(int fd, struct virtio_blk_config *config)
     config->min_io_size = 1;
     config->opt_io_size = 1;
     config->num_queues = 1;
+    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+    config->max_discard_sectors = 32768;
+    config->max_discard_seg = 1;
+    config->discard_sector_alignment = config->blk_size >> 9;
+    config->max_write_zeroes_sectors = 32768;
+    config->max_write_zeroes_seg = 1;
+    #endif
 }
 
 static VubDev *
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index c3af28fad4..44ac814016 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -38,6 +38,8 @@ static const int user_feature_bits[] = {
     VIRTIO_BLK_F_RO,
     VIRTIO_BLK_F_FLUSH,
     VIRTIO_BLK_F_CONFIG_WCE,
+    VIRTIO_BLK_F_DISCARD,
+    VIRTIO_BLK_F_WRITE_ZEROES,
     VIRTIO_F_VERSION_1,
     VIRTIO_RING_F_INDIRECT_DESC,
     VIRTIO_RING_F_EVENT_IDX,
@@ -204,6 +206,8 @@ static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev,
     virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
     virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH);
     virtio_add_feature(&features, VIRTIO_BLK_F_RO);
+    virtio_add_feature(&features, VIRTIO_BLK_F_DISCARD);
+    virtio_add_feature(&features, VIRTIO_BLK_F_WRITE_ZEROES);
 
     if (s->config_wce) {
         virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
-- 
MST

  parent reply	other threads:[~2019-02-04 14:51 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04 14:43 [Qemu-devel] [PULL 00/25] pci, pc, virtio: fixes, cleanups, features Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 01/25] virtio: add checks for the size of the indirect table Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 02/25] contrib/libvhost-user: switch to uint64_t Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 03/25] scripts/update-linux-headers.sh: adjust for Linux 4.21-rc1 (or 5.0-rc1) Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 04/25] include: update Linux headers to 4.21-rc1/5.0-rc1 Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 10/25] hw: virtio-pci: drop DO_UPCAST Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 11/25] intel_iommu: fix operator in vtd_switch_address_space Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 12/25] intel_iommu: reset intr_enabled when system reset Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 13/25] pci/msi: export msi_is_masked() Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 14/25] i386/kvm: ignore masked irqs when update msi routes Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 15/25] contrib: compile vhost-user-blk tool by default Michael S. Tsirkin
2019-02-04 15:07   ` Daniel P. Berrangé
2019-02-04 15:19     ` Michael S. Tsirkin
2019-02-04 15:29       ` Daniel P. Berrangé
2019-02-05  1:48         ` Michael S. Tsirkin
2019-02-08  7:13           ` Stefan Hajnoczi
2019-02-04 14:43 ` [Qemu-devel] [PULL 16/25] contrib/vhost-user-blk: fix the compilation issue Michael S. Tsirkin
2019-02-04 14:43 ` Michael S. Tsirkin [this message]
2019-02-04 14:43 ` [Qemu-devel] [PULL 18/25] hw/virtio: Use CONFIG_VIRTIO_PCI switch instead of CONFIG_PCI Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 19/25] acpi: Make TPM 2.0 with TIS available as MSFT0101 Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 20/25] fw_cfg: fix the life cycle and the name of "qemu_extra_params_fw" Michael S. Tsirkin
2019-02-04 14:43 ` [Qemu-devel] [PULL 21/25] i386, acpi: cleanup build_facs by removing second unused argument Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 22/25] mmap-alloc: unfold qemu_ram_mmap() Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 23/25] mmap-alloc: fix hugetlbfs misaligned length in ppc64 Michael S. Tsirkin
2019-02-04 15:15   ` Greg Kurz
2019-02-04 15:20     ` Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 24/25] r2d: fix build on mingw Michael S. Tsirkin
2019-02-04 14:44 ` [Qemu-devel] [PULL 25/25] contrib/libvhost-user: cleanup casts Michael S. Tsirkin
2019-02-04 17:59 ` [Qemu-devel] [PULL 00/25] pci, pc, virtio: fixes, cleanups, features Peter Maydell
2019-02-04 19:39   ` Michael S. Tsirkin
2019-02-05  1:50   ` Michael S. Tsirkin
2019-02-05  1:51   ` Michael S. Tsirkin
2019-02-05 12:41     ` Peter Maydell
2019-02-05 16:06       ` Michael S. Tsirkin
2019-02-05 17:38         ` Peter Maydell
2019-02-12  7:11         ` Peter Xu
2019-02-12 10:39           ` Philippe Mathieu-Daudé
2019-02-12 13:04             ` Michael S. Tsirkin
2019-02-12 13:15               ` Philippe Mathieu-Daudé
2019-02-12 13:24                 ` Michael S. Tsirkin
2019-02-12 13:53                   ` Philippe Mathieu-Daudé
2019-02-12 14:04                     ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190204142638.27021-18-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=changpeng.liu@intel.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).