qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, vsementsov@virtuozzo.com, mreitz@redhat.com,
	eblake@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH] file-posix: Cache lseek result for data regions
Date: Thu, 24 Jan 2019 15:17:31 +0100	[thread overview]
Message-ID: <20190124141731.21509-1-kwolf@redhat.com> (raw)

Depending on the exact image layout and the storage backend (tmpfs is
konwn to have very slow SEEK_HOLE/SEEK_DATA), caching lseek results can
save us a lot of time e.g. during a mirror block job or qemu-img convert
with a fragmented source image (.bdrv_co_block_status on the protocol
layer can be called for every single cluster in the extreme case).

We may only cache data regions because of possible concurrent writers.
This means that we can later treat a recently punched hole as data, but
this is safe. We can't cache holes because then we might treat recently
written data as holes, which can cause corruption.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 51 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 8aee7a3fb8..7272c7c99d 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -168,6 +168,12 @@ typedef struct BDRVRawState {
     bool needs_alignment;
     bool check_cache_dropped;
 
+    struct seek_data_cache {
+        bool        valid;
+        uint64_t    start;
+        uint64_t    end;
+    } seek_data_cache;
+
     PRManager *pr_mgr;
 } BDRVRawState;
 
@@ -1555,8 +1561,17 @@ static int handle_aiocb_write_zeroes_unmap(void *opaque)
 {
     RawPosixAIOData *aiocb = opaque;
     BDRVRawState *s G_GNUC_UNUSED = aiocb->bs->opaque;
+    struct seek_data_cache *sdc;
     int ret;
 
+    /* Invalidate seek_data_cache if it overlaps */
+    sdc = &s->seek_data_cache;
+    if (sdc->valid && !(sdc->end < aiocb->aio_offset ||
+                        sdc->start > aiocb->aio_offset + aiocb->aio_nbytes))
+    {
+        sdc->valid = false;
+    }
+
     /* First try to write zeros and unmap at the same time */
 
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
@@ -1634,11 +1649,20 @@ static int handle_aiocb_discard(void *opaque)
     RawPosixAIOData *aiocb = opaque;
     int ret = -EOPNOTSUPP;
     BDRVRawState *s = aiocb->bs->opaque;
+    struct seek_data_cache *sdc;
 
     if (!s->has_discard) {
         return -ENOTSUP;
     }
 
+    /* Invalidate seek_data_cache if it overlaps */
+    sdc = &s->seek_data_cache;
+    if (sdc->valid && !(sdc->end < aiocb->aio_offset ||
+                        sdc->start > aiocb->aio_offset + aiocb->aio_nbytes))
+    {
+        sdc->valid = false;
+    }
+
     if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
 #ifdef BLKDISCARD
         do {
@@ -2424,6 +2448,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
                                             int64_t *map,
                                             BlockDriverState **file)
 {
+    BDRVRawState *s = bs->opaque;
+    struct seek_data_cache *sdc;
     off_t data = 0, hole = 0;
     int ret;
 
@@ -2439,6 +2465,14 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
         return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
     }
 
+    sdc = &s->seek_data_cache;
+    if (sdc->valid && sdc->start <= offset && sdc->end > offset) {
+        *pnum = MIN(bytes, sdc->end - offset);
+        *map = offset;
+        *file = bs;
+        return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+    }
+
     ret = find_allocation(bs, offset, &data, &hole);
     if (ret == -ENXIO) {
         /* Trailing hole */
@@ -2451,14 +2485,27 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
     } else if (data == offset) {
         /* On a data extent, compute bytes to the end of the extent,
          * possibly including a partial sector at EOF. */
-        *pnum = MIN(bytes, hole - offset);
+        *pnum = hole - offset;
         ret = BDRV_BLOCK_DATA;
     } else {
         /* On a hole, compute bytes to the beginning of the next extent.  */
         assert(hole == offset);
-        *pnum = MIN(bytes, data - offset);
+        *pnum = data - offset;
         ret = BDRV_BLOCK_ZERO;
     }
+
+    /* Caching allocated ranges is okay even if another process writes to the
+     * same file because we allow declaring things allocated even if there is a
+     * hole. However, we cannot cache holes without risking corruption. */
+    if (ret == BDRV_BLOCK_DATA) {
+        *sdc = (struct seek_data_cache) {
+            .valid  = true,
+            .start  = offset,
+            .end    = offset + *pnum,
+        };
+    }
+
+    *pnum = MIN(*pnum, bytes);
     *map = offset;
     *file = bs;
     return ret | BDRV_BLOCK_OFFSET_VALID;
-- 
2.20.1

             reply	other threads:[~2019-01-24 14:18 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-24 14:17 Kevin Wolf [this message]
2019-01-24 14:40 ` [Qemu-devel] [PATCH] file-posix: Cache lseek result for data regions Vladimir Sementsov-Ogievskiy
2019-01-24 15:11   ` Kevin Wolf
2019-01-24 15:22     ` Vladimir Sementsov-Ogievskiy
2019-01-24 15:42       ` Kevin Wolf
2019-01-25 10:10         ` Paolo Bonzini
2019-01-25 10:30           ` Kevin Wolf
2019-02-04 10:17             ` Paolo Bonzini
2019-01-24 15:56 ` Eric Blake
2019-01-29 10:56   ` Kevin Wolf
2019-01-29 21:03     ` Eric Blake
2019-01-24 16:18 ` Vladimir Sementsov-Ogievskiy
2019-01-24 16:36   ` Kevin Wolf
2019-01-25  9:13     ` Vladimir Sementsov-Ogievskiy
2019-01-25 13:26       ` Eric Blake

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190124141731.21509-1-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=eblake@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=vsementsov@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).