All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, peter.maydell@linaro.org, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 10/13] file-posix: Fix write_zeroes with unmap on block devices
Date: Mon, 30 Jul 2018 17:09:55 +0200	[thread overview]
Message-ID: <20180730150958.14607-11-kwolf@redhat.com> (raw)
In-Reply-To: <20180730150958.14607-1-kwolf@redhat.com>

The BLKDISCARD ioctl doesn't guarantee that the discarded blocks read as
all-zero afterwards, so don't try to abuse it for zero writing. We try
to only use this if BLKDISCARDZEROES tells us that it is safe, but this
is unreliable on older kernels and a constant 0 in newer kernels. In
other words, this code path is never actually used with newer kernels,
so we don't even try to unmap while writing zeros.

This patch removes the abuse of discard for writing zeroes from
file-posix and instead adds a new function that uses interfaces that are
actually meant to deallocate and zero out at the same time. Only if
those fail, it falls back to zeroing out without unmap. We never fall
back to a discard operation any more that may or may not result in
zeros.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 59 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 928b863ced..fe83cbf0eb 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -648,7 +648,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
 #endif
 
-    bs->supported_zero_flags = s->discard_zeroes ? BDRV_REQ_MAY_UNMAP : 0;
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
     ret = 0;
 fail:
     if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -1487,6 +1487,35 @@ static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
     return -ENOTSUP;
 }
 
+static ssize_t handle_aiocb_write_zeroes_unmap(RawPosixAIOData *aiocb)
+{
+    BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
+    int ret;
+
+    /* First try to write zeros and unmap at the same time */
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                       aiocb->aio_offset, aiocb->aio_nbytes);
+    if (ret != -ENOTSUP) {
+        return ret;
+    }
+#endif
+
+#ifdef CONFIG_XFS
+    if (s->is_xfs) {
+        /* xfs_discard() guarantees that the discarded area reads as all-zero
+         * afterwards, so we can use it here. */
+        return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
+    }
+#endif
+
+    /* If we couldn't manage to unmap while guaranteed that the area reads as
+     * all-zero afterwards, just write zeroes without unmapping */
+    ret = handle_aiocb_write_zeroes(aiocb);
+    return ret;
+}
+
 #ifndef HAVE_COPY_FILE_RANGE
 static off_t copy_file_range(int in_fd, off_t *in_off, int out_fd,
                              off_t *out_off, size_t len, unsigned int flags)
@@ -1732,6 +1761,9 @@ static int aio_worker(void *arg)
     case QEMU_AIO_WRITE_ZEROES:
         ret = handle_aiocb_write_zeroes(aiocb);
         break;
+    case QEMU_AIO_WRITE_ZEROES | QEMU_AIO_DISCARD:
+        ret = handle_aiocb_write_zeroes_unmap(aiocb);
+        break;
     case QEMU_AIO_COPY_RANGE:
         ret = handle_aiocb_copy_range(aiocb);
         break;
@@ -2556,15 +2588,13 @@ static int coroutine_fn raw_co_pwrite_zeroes(
     int bytes, BdrvRequestFlags flags)
 {
     BDRVRawState *s = bs->opaque;
+    int operation = QEMU_AIO_WRITE_ZEROES;
 
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
-                              QEMU_AIO_WRITE_ZEROES);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
-                              QEMU_AIO_DISCARD);
+    if (flags & BDRV_REQ_MAY_UNMAP) {
+        operation |= QEMU_AIO_DISCARD;
     }
-    return -ENOTSUP;
+
+    return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
 }
 
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -3057,20 +3087,19 @@ static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
     int64_t offset, int bytes, BdrvRequestFlags flags)
 {
     BDRVRawState *s = bs->opaque;
+    int operation = QEMU_AIO_WRITE_ZEROES | QEMU_AIO_BLKDEV;
     int rc;
 
     rc = fd_open(bs);
     if (rc < 0) {
         return rc;
     }
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
-                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, offset, NULL, bytes,
-                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
+
+    if (flags & BDRV_REQ_MAY_UNMAP) {
+        operation |= QEMU_AIO_DISCARD;
     }
-    return -ENOTSUP;
+
+    return paio_submit_co(bs, s->fd, offset, NULL, bytes, operation);
 }
 
 static int coroutine_fn hdev_co_create_opts(const char *filename, QemuOpts *opts,
-- 
2.13.6

  parent reply	other threads:[~2018-07-30 15:10 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-30 15:09 [Qemu-devel] [PULL 00/13] Block layer patches Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 01/13] qcow: fix a reference leak Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 02/13] qcow2: A grammar fix in conflicting cache sizing error message Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 03/13] file-posix: Handle EINTR in preallocation=full write Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 04/13] docs: Describe using images in writing iotests Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 05/13] iotests: Don't lock /dev/null in 226 Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 06/13] Revert "qemu-img: Document copy offloading implications with -S and -c" Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 07/13] qemu-img: Add -C option for convert with copy offloading Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 08/13] iotests: Add test for 'qemu-img convert -C' compatibility Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 09/13] block: Fix documentation for BDRV_REQ_MAY_UNMAP Kevin Wolf
2018-07-30 15:09 ` Kevin Wolf [this message]
2018-07-30 15:09 ` [Qemu-devel] [PULL 11/13] block/qapi: Add 'qdev' field to query-blockstats result Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 12/13] block/qapi: Include anonymous BBs in query-blockstats Kevin Wolf
2018-07-30 15:09 ` [Qemu-devel] [PULL 13/13] qemu-iotests: Test query-blockstats with -drive and -blockdev Kevin Wolf
2018-07-31  9:02 ` [Qemu-devel] [PULL 00/13] Block layer patches Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180730150958.14607-11-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.