qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Anthony Liguori <aliguori@us.ibm.com>,
	Stefan Hajnoczi <stefanha@redhat.com>
Subject: [Qemu-devel] [PATCH 06/15] block: make discard asynchronous
Date: Tue, 15 Jan 2013 17:48:22 +0100	[thread overview]
Message-ID: <1358268511-27061-7-git-send-email-stefanha@redhat.com> (raw)
In-Reply-To: <1358268511-27061-1-git-send-email-stefanha@redhat.com>

From: Paolo Bonzini <pbonzini@redhat.com>

This is easy with the thread pool, because we can use s->is_xfs and
s->has_discard from the worker function.

QEMU has a widespread assumption that each I/O operation writes less
than 2^32 bytes.  This patch doesn't fix it throughout of course,
but it starts correcting struct RawPosixAIOData so that there is
no regression with respect to the synchronous discard implementation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/raw-aio.h   |   5 +-
 block/raw-posix.c | 164 ++++++++++++++++++++++++++++--------------------------
 2 files changed, 88 insertions(+), 81 deletions(-)

diff --git a/block/raw-aio.h b/block/raw-aio.h
index e77f361..c61f159 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -20,11 +20,14 @@
 #define QEMU_AIO_WRITE        0x0002
 #define QEMU_AIO_IOCTL        0x0004
 #define QEMU_AIO_FLUSH        0x0008
+#define QEMU_AIO_DISCARD      0x0010
 #define QEMU_AIO_TYPE_MASK \
-	(QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH)
+        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
+         QEMU_AIO_DISCARD)
 
 /* AIO flags */
 #define QEMU_AIO_MISALIGNED   0x1000
+#define QEMU_AIO_BLKDEV       0x2000
 
 
 /* linux-aio.c - Linux native implementation */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 1d32139..679fcc5 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -163,7 +163,7 @@ typedef struct RawPosixAIOData {
         void *aio_ioctl_buf;
     };
     int aio_niov;
-    size_t aio_nbytes;
+    uint64_t aio_nbytes;
 #define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
     off_t aio_offset;
     int aio_type;
@@ -623,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
     return nbytes;
 }
 
+#ifdef CONFIG_XFS
+static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
+{
+    struct xfs_flock64 fl;
+
+    memset(&fl, 0, sizeof(fl));
+    fl.l_whence = SEEK_SET;
+    fl.l_start = offset;
+    fl.l_len = bytes;
+
+    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
+        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+#endif
+
+static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
+{
+    int ret = -EOPNOTSUPP;
+    BDRVRawState *s = aiocb->bs->opaque;
+
+    if (s->has_discard == 0) {
+        return 0;
+    }
+
+    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+#ifdef BLKDISCARD
+        do {
+            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+            if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
+
+        ret = -errno;
+#endif
+    } else {
+#ifdef CONFIG_XFS
+        if (s->is_xfs) {
+            return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
+        }
+#endif
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+        do {
+            if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                          aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
+
+        ret = -errno;
+#endif
+    }
+
+    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
+        ret == -ENOTTY) {
+        s->has_discard = 0;
+        ret = 0;
+    }
+    return ret;
+}
+
 static int aio_worker(void *arg)
 {
     RawPosixAIOData *aiocb = arg;
@@ -657,6 +723,9 @@ static int aio_worker(void *arg)
     case QEMU_AIO_IOCTL:
         ret = handle_aiocb_ioctl(aiocb);
         break;
+    case QEMU_AIO_DISCARD:
+        ret = handle_aiocb_discard(aiocb);
+        break;
     default:
         fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
         ret = -EINVAL;
@@ -1057,57 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
     }
 }
 
-#ifdef CONFIG_XFS
-static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors)
-{
-    struct xfs_flock64 fl;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = sector_num << 9;
-    fl.l_len = (int64_t)nb_sectors << 9;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
-        DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
-        return -errno;
-    }
-
-    return 0;
-}
-#endif
-
-static coroutine_fn int raw_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
-    int ret = -EOPNOTSUPP;
     BDRVRawState *s = bs->opaque;
 
-    if (!s->has_discard) {
-        return 0;
-    }
-
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        return xfs_discard(s, sector_num, nb_sectors);
-    }
-#endif
-
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-    do {
-        if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                      sector_num << BDRV_SECTOR_BITS,
-                      (int64_t)nb_sectors << BDRV_SECTOR_BITS) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = -errno;
-#endif
-
-    if (ret == -EOPNOTSUPP) {
-        return 0;
-    }
-    return ret;
+    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+                       cb, opaque, QEMU_AIO_DISCARD);
 }
 
 static QEMUOptionParameter raw_create_options[] = {
@@ -1130,12 +1156,12 @@ static BlockDriver bdrv_file = {
     .bdrv_reopen_abort = raw_reopen_abort,
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
-    .bdrv_co_discard = raw_co_discard,
     .bdrv_co_is_allocated = raw_co_is_allocated,
 
     .bdrv_aio_readv = raw_aio_readv,
     .bdrv_aio_writev = raw_aio_writev,
     .bdrv_aio_flush = raw_aio_flush,
+    .bdrv_aio_discard = raw_aio_discard,
 
     .bdrv_truncate = raw_truncate,
     .bdrv_getlength = raw_getlength,
@@ -1345,38 +1371,17 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
     return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
-static coroutine_fn int hdev_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors,
+    BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
-    int ret;
-
-    if (s->has_discard == 0) {
-        return 0;
-    }
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
 
-    ret = -EOPNOTSUPP;
-#ifdef BLKDISCARD
-    do {
-        uint64_t range[2] = { sector_num * 512, (uint64_t)nb_sectors * 512 };
-        if (ioctl(s->fd, BLKDISCARD, range) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = -errno;
-#endif
-    if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
-        ret == -ENOTTY) {
-        s->has_discard = 0;
-        ret = 0;
+    if (fd_open(bs) < 0) {
+        return NULL;
     }
-    return ret;
-
+    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
 }
 
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -1447,11 +1452,10 @@ static BlockDriver bdrv_host_device = {
     .create_options     = raw_create_options,
     .bdrv_has_zero_init = hdev_has_zero_init,
 
-    .bdrv_co_discard    = hdev_co_discard,
-
     .bdrv_aio_readv	= raw_aio_readv,
     .bdrv_aio_writev	= raw_aio_writev,
     .bdrv_aio_flush	= raw_aio_flush,
+    .bdrv_aio_discard   = hdev_aio_discard,
 
     .bdrv_truncate      = raw_truncate,
     .bdrv_getlength	= raw_getlength,
-- 
1.8.0.2

  parent reply	other threads:[~2013-01-15 16:49 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-15 16:48 [Qemu-devel] [PULL 00/15] Block patches for QEMU 1.4 Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 01/15] qcow2: Fix segfault on zero-length write Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 02/15] block: fix initialization in bdrv_io_limits_enable() Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 03/15] raw-posix: support discard on more filesystems Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 04/15] raw-posix: remember whether discard failed Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 05/15] raw: support discard on block devices Stefan Hajnoczi
2013-01-15 16:48 ` Stefan Hajnoczi [this message]
2013-01-15 16:48 ` [Qemu-devel] [PATCH 07/15] ide: fix TRIM with empty range entry Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 08/15] ide: issue discard asynchronously but serialize the pieces Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 09/15] block: clear dirty bitmap when discarding Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 10/15] sheepdog: multiplex the rw FD to flush cache Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 11/15] sheepdog: clean up sd_aio_setup() Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 12/15] w32: Make qemu_vfree() accept NULL like the POSIX implementation Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 13/15] scsi-disk: qemu_vfree(NULL) is fine, simplify Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 14/15] win32-aio: Fix how win32_aio_process_completion() frees buffer Stefan Hajnoczi
2013-01-15 16:48 ` [Qemu-devel] [PATCH 15/15] block: Fix how mirror_run() frees its buffer Stefan Hajnoczi
2013-01-16  1:18 ` [Qemu-devel] [PULL 00/15] Block patches for QEMU 1.4 Anthony Liguori

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1358268511-27061-7-git-send-email-stefanha@redhat.com \
    --to=stefanha@redhat.com \
    --cc=aliguori@us.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).