qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Akihiko Odaki <akihiko.odaki@gmail.com>
Cc: Kevin Wolf <kwolf@redhat.com>, Fam Zheng <fam@euphon.net>,
	pkrempa@redhat.com, Akihiko Odaki <akihiko.odaki@gmail.com>,
	qemu-block@nongnu.org, Markus Armbruster <armbru@redhat.com>,
	qemu-devel@nongnu.org, Max Reitz <mreitz@redhat.com>,
	Konstantin Nazarov <mail@knazarov.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	John Snow <jsnow@redhat.com>,
	dgilbert@redhat.com
Subject: [PATCH v2] block/file-posix: Optimize for macOS
Date: Fri,  5 Mar 2021 21:17:48 +0900	[thread overview]
Message-ID: <20210305121748.65173-1-akihiko.odaki@gmail.com> (raw)

This commit introduces "punch hole" operation and optimizes transfer
block size for macOS.

This commit introduces two additional members,
discard_granularity and opt_io to BlockSizes type in
include/block/block.h. Also, the members of the type are now
optional. Set -1 to discard_granularity and 0 to other members
for the default values.

Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
old version of this change:
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667

Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
---
 block/file-posix.c    | 40 ++++++++++++++++++++++++++++++++++++++--
 block/nvme.c          |  2 ++
 block/raw-format.c    |  4 +++-
 hw/block/block.c      | 12 ++++++++++--
 include/block/block.h |  2 ++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 05079b40cae..21bdaf969c5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -44,6 +44,7 @@
 #if defined(__APPLE__) && (__MACH__)
 #include <paths.h>
 #include <sys/param.h>
+#include <sys/mount.h>
 #include <IOKit/IOKitLib.h>
 #include <IOKit/IOBSD.h>
 #include <IOKit/storage/IOMediaBSDClient.h>
@@ -1292,6 +1293,8 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     if (check_for_dasd(s->fd) < 0) {
         return -ENOTSUP;
     }
+    bsz->opt_io = 0;
+    bsz->discard_granularity = -1;
     ret = probe_logical_blocksize(s->fd, &bsz->log);
     if (ret < 0) {
         return ret;
@@ -1586,6 +1589,7 @@ out:
     }
 }
 
+G_GNUC_UNUSED
 static int translate_err(int err)
 {
     if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1795,16 +1799,27 @@ static int handle_aiocb_discard(void *opaque)
             }
         } while (errno == EINTR);
 
-        ret = -errno;
+        ret = translate_err(-errno);
 #endif
     } else {
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
         ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                            aiocb->aio_offset, aiocb->aio_nbytes);
+        ret = translate_err(-errno);
+#elif defined(__APPLE__) && (__MACH__)
+        fpunchhole_t fpunchhole;
+        fpunchhole.fp_flags = 0;
+        fpunchhole.reserved = 0;
+        fpunchhole.fp_offset = aiocb->aio_offset;
+        fpunchhole.fp_length = aiocb->aio_nbytes;
+        if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
+            ret = errno == ENODEV ? -ENOTSUP : -errno;
+        } else {
+            ret = 0;
+        }
 #endif
     }
 
-    ret = translate_err(ret);
     if (ret == -ENOTSUP) {
         s->has_discard = false;
     }
@@ -2113,6 +2128,26 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
     return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
 }
 
+static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+#if defined(__APPLE__) && (__MACH__)
+    BDRVRawState *s = bs->opaque;
+    struct statfs buf;
+
+    if (!fstatfs(s->fd, &buf)) {
+        bsz->phys = 0;
+        bsz->log = 0;
+        bsz->opt_io = buf.f_iosize;
+        bsz->discard_granularity = buf.f_bsize;
+        return 0;
+    }
+
+    return -errno;
+#else
+    return -ENOTSUP;
+#endif
+}
+
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
                                        AioContext *new_context)
 {
@@ -3247,6 +3282,7 @@ BlockDriver bdrv_file = {
     .bdrv_refresh_limits = raw_refresh_limits,
     .bdrv_io_plug = raw_aio_plug,
     .bdrv_io_unplug = raw_aio_unplug,
+    .bdrv_probe_blocksizes = raw_probe_blocksizes,
     .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
     .bdrv_co_truncate = raw_co_truncate,
diff --git a/block/nvme.c b/block/nvme.c
index 2b5421e7aa6..1845d07577b 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -989,6 +989,8 @@ static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
     uint32_t blocksize = nvme_get_blocksize(bs);
     bsz->phys = blocksize;
     bsz->log = blocksize;
+    bsz->opt_io = 0;
+    bsz->discard_granularity = -1;
     return 0;
 }
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6a..847df11f2ae 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -507,6 +507,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
     BDRVRawState *s = bs->opaque;
+    uint32_t size;
     int ret;
 
     ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
@@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
         return ret;
     }
 
-    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+    size = MAX(bsz->log, bsz->phys);
+    if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
         return -ENOTSUP;
     }
 
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da71..c907e5a7722 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -70,19 +70,27 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
     backend_ret = blk_probe_blocksizes(blk, &blocksizes);
     /* fill in detected values if they are not defined via qemu command line */
     if (!conf->physical_block_size) {
-        if (!backend_ret) {
+        if (!backend_ret && blocksizes.phys) {
            conf->physical_block_size = blocksizes.phys;
         } else {
             conf->physical_block_size = BDRV_SECTOR_SIZE;
         }
     }
     if (!conf->logical_block_size) {
-        if (!backend_ret) {
+        if (!backend_ret && blocksizes.log) {
             conf->logical_block_size = blocksizes.log;
         } else {
             conf->logical_block_size = BDRV_SECTOR_SIZE;
         }
     }
+    if (!backend_ret) {
+        if (!conf->opt_io_size) {
+            conf->opt_io_size = blocksizes.opt_io;
+        }
+        if (conf->discard_granularity == -1) {
+            conf->discard_granularity = blocksizes.discard_granularity;
+        }
+    }
 
     if (conf->logical_block_size > conf->physical_block_size) {
         error_setg(errp,
diff --git a/include/block/block.h b/include/block/block.h
index b3f6e509d49..d12471a6cc4 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -93,6 +93,8 @@ typedef enum {
 typedef struct BlockSizes {
     uint32_t phys;
     uint32_t log;
+    uint32_t discard_granularity;
+    uint32_t opt_io;
 } BlockSizes;
 
 typedef struct HDGeometry {
-- 
2.24.3 (Apple Git-128)



             reply	other threads:[~2021-03-05 12:19 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-05 12:17 Akihiko Odaki [this message]
2021-03-05 12:21 ` [PATCH v2] block/file-posix: Optimize for macOS no-reply
2021-03-08 15:17 ` Stefan Hajnoczi
2021-03-08 15:37   ` Akihiko Odaki
2021-03-09  4:52     ` Akihiko Odaki
2021-03-09  9:37       ` Kevin Wolf
2021-03-09 10:26         ` Daniel P. Berrangé
2021-03-09 10:47           ` Kevin Wolf
2021-03-09 10:52           ` Akihiko Odaki
2021-03-10 11:38     ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210305121748.65173-1-akihiko.odaki@gmail.com \
    --to=akihiko.odaki@gmail.com \
    --cc=armbru@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=fam@euphon.net \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mail@knazarov.com \
    --cc=mreitz@redhat.com \
    --cc=pkrempa@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).