qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: qemu-block@nongnu.org
Cc: kwolf@redhat.com, stefanha@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PULL 21/58] file-posix: Add image locking to perm operations
Date: Thu, 11 May 2017 16:32:24 +0200	[thread overview]
Message-ID: <1494513181-7900-22-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1494513181-7900-1-git-send-email-kwolf@redhat.com>

From: Fam Zheng <famz@redhat.com>

This extends the permission bits of op blocker API to external using
Linux OFD locks.

Each permission in @perm and @shared_perm is represented by a locked
byte in the image file.  Requesting a permission in @perm is translated
to a shared lock of the corresponding byte; rejecting to share the same
permission is translated to a shared lock of a separate byte. With that,
we use 2x number of bytes of distinct permission types.

virtlockd in libvirt locks the first byte, so we do locking from a
higher offset.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/file-posix.c | 276 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 275 insertions(+), 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index d13e99c..a09055b 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -129,12 +129,23 @@ do { \
 
 #define MAX_BLOCKSIZE	4096
 
+/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
+ * leaving a few more bytes for its future use. */
+#define RAW_LOCK_PERM_BASE             100
+#define RAW_LOCK_SHARED_BASE           200
+
 typedef struct BDRVRawState {
     int fd;
+    int lock_fd;
+    bool use_lock;
     int type;
     int open_flags;
     size_t buf_align;
 
+    /* The current permissions. */
+    uint64_t perm;
+    uint64_t shared_perm;
+
 #ifdef CONFIG_XFS
     bool is_xfs:1;
 #endif
@@ -411,6 +422,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     BlockdevAioOptions aio, aio_default;
     int fd, ret;
     struct stat st;
+    OnOffAuto locking;
 
     opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -440,6 +452,37 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
     s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
 
+    locking = qapi_enum_parse(OnOffAuto_lookup, qemu_opt_get(opts, "locking"),
+                              ON_OFF_AUTO__MAX, ON_OFF_AUTO_AUTO, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+    switch (locking) {
+    case ON_OFF_AUTO_ON:
+        s->use_lock = true;
+#ifndef F_OFD_SETLK
+        fprintf(stderr,
+                "File lock requested but OFD locking syscall is unavailable, "
+                "falling back to POSIX file locks.\n"
+                "Due to the implementation, locks can be lost unexpectedly.\n");
+#endif
+        break;
+    case ON_OFF_AUTO_OFF:
+        s->use_lock = false;
+        break;
+    case ON_OFF_AUTO_AUTO:
+#ifdef F_OFD_SETLK
+        s->use_lock = true;
+#else
+        s->use_lock = false;
+#endif
+        break;
+    default:
+        abort();
+    }
+
     s->open_flags = open_flags;
     raw_parse_flags(bdrv_flags, &s->open_flags);
 
@@ -455,6 +498,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
     s->fd = fd;
 
+    s->lock_fd = -1;
+    if (s->use_lock) {
+        fd = qemu_open(filename, s->open_flags);
+        if (fd < 0) {
+            ret = -errno;
+            error_setg_errno(errp, errno, "Could not open '%s' for locking",
+                             filename);
+            qemu_close(s->fd);
+            goto fail;
+        }
+        s->lock_fd = fd;
+    }
+    s->perm = 0;
+    s->shared_perm = BLK_PERM_ALL;
+
 #ifdef CONFIG_LINUX_AIO
      /* Currently Linux does AIO only for files opened with O_DIRECT */
     if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -542,6 +600,161 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     return raw_open_common(bs, options, flags, 0, errp);
 }
 
+typedef enum {
+    RAW_PL_PREPARE,
+    RAW_PL_COMMIT,
+    RAW_PL_ABORT,
+} RawPermLockOp;
+
+#define PERM_FOREACH(i) \
+    for ((i) = 0; (1ULL << (i)) <= BLK_PERM_ALL; i++)
+
+/* Lock bytes indicated by @perm_lock_bits and @shared_perm_lock_bits in the
+ * file; if @unlock == true, also unlock the unneeded bytes.
+ * @shared_perm_lock_bits is the mask of all permissions that are NOT shared.
+ */
+static int raw_apply_lock_bytes(BDRVRawState *s,
+                                uint64_t perm_lock_bits,
+                                uint64_t shared_perm_lock_bits,
+                                bool unlock, Error **errp)
+{
+    int ret;
+    int i;
+
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        if (perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        if (shared_perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
+static int raw_check_lock_bytes(BDRVRawState *s,
+                                uint64_t perm, uint64_t shared_perm,
+                                Error **errp)
+{
+    int ret;
+    int i;
+
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (perm & p) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                char *perm_name = bdrv_perm_names(p);
+                error_setg(errp,
+                           "Failed to get \"%s\" lock",
+                           perm_name);
+                g_free(perm_name);
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    PERM_FOREACH(i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (!(shared_perm & p)) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                char *perm_name = bdrv_perm_names(p);
+                error_setg(errp,
+                           "Failed to get shared \"%s\" lock",
+                           perm_name);
+                g_free(perm_name);
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int raw_handle_perm_lock(BlockDriverState *bs,
+                                RawPermLockOp op,
+                                uint64_t new_perm, uint64_t new_shared,
+                                Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    if (!s->use_lock) {
+        return 0;
+    }
+
+    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
+        return 0;
+    }
+
+    assert(s->lock_fd > 0);
+
+    switch (op) {
+    case RAW_PL_PREPARE:
+        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
+                                   ~s->shared_perm | ~new_shared,
+                                   false, errp);
+        if (!ret) {
+            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
+            if (!ret) {
+                return 0;
+            }
+        }
+        op = RAW_PL_ABORT;
+        /* fall through to unlock bytes. */
+    case RAW_PL_ABORT:
+        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    case RAW_PL_COMMIT:
+        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    }
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
@@ -1410,6 +1623,10 @@ static void raw_close(BlockDriverState *bs)
         qemu_close(s->fd);
         s->fd = -1;
     }
+    if (s->lock_fd >= 0) {
+        qemu_close(s->lock_fd);
+        s->lock_fd = -1;
+    }
 }
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset, Error **errp)
@@ -1954,6 +2171,54 @@ static QemuOptsList raw_create_opts = {
     }
 };
 
+static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+}
+
+static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
+{
+    BDRVRawState *s = bs->opaque;
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    s->perm = perm;
+    s->shared_perm = shared;
+}
+
+static void raw_abort_perm_update(BlockDriverState *bs)
+{
+    raw_handle_perm_lock(bs, RAW_PL_ABORT, 0, 0, NULL);
+}
+
+static int raw_inactivate(BlockDriverState *bs)
+{
+    int ret;
+    uint64_t perm = 0;
+    uint64_t shared = BLK_PERM_ALL;
+
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL);
+    if (ret) {
+        return ret;
+    }
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    return 0;
+}
+
+
+static void raw_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE));
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm,
+                               errp);
+    if (ret) {
+        return;
+    }
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL);
+}
+
 BlockDriver bdrv_file = {
     .format_name = "file",
     .protocol_name = "file",
@@ -1984,7 +2249,11 @@ BlockDriver bdrv_file = {
     .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
-
+    .bdrv_inactivate = raw_inactivate,
+    .bdrv_invalidate_cache = raw_invalidate_cache,
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
     .create_opts = &raw_create_opts,
 };
 
@@ -2443,6 +2712,11 @@ static BlockDriver bdrv_host_device = {
     .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
+    .bdrv_inactivate = raw_inactivate,
+    .bdrv_invalidate_cache = raw_invalidate_cache,
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
     .bdrv_probe_blocksizes = hdev_probe_blocksizes,
     .bdrv_probe_geometry = hdev_probe_geometry,
 
-- 
1.8.3.1

  parent reply	other threads:[~2017-05-11 14:34 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-11 14:32 [Qemu-devel] [PULL 00/58] Block layer patches Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 01/58] block: Make bdrv_perm_names public Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 02/58] block: Add, parse and store "force-share" option Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 03/58] block: Respect "force-share" in perm propagating Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 04/58] qemu-img: Add --force-share option to subcommands Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 05/58] qemu-img: Update documentation for -U Kevin Wolf
2017-05-12 17:37   ` [Qemu-devel] [Qemu-block] " Max Reitz
2017-05-15  9:12     ` Fam Zheng
2017-05-11 14:32 ` [Qemu-devel] [PULL 06/58] qemu-io: Add --force-share option Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 07/58] iotests: 030: Prepare for image locking Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 08/58] iotests: 046: " Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 09/58] iotests: 055: Don't attach the target image already for drive-backup Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 10/58] iotests: 085: Avoid image locking conflict Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 11/58] iotests: 087: Don't attach test image twice Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 12/58] iotests: 091: Quit QEMU before checking image Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 13/58] iotests: 172: Use separate images for multiple devices Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 14/58] tests: Use null-co:// instead of /dev/null as the dummy image Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 15/58] file-posix: Add 'locking' option Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 16/58] file-win32: Error out if locking=on Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 17/58] tests: Disable image lock in test-replication Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 18/58] block: Reuse bs as backing hd for drive-backup sync=none Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 19/58] osdep: Add qemu_lock_fd and qemu_unlock_fd Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 20/58] osdep: Fall back to posix lock when OFD lock is unavailable Kevin Wolf
2017-05-11 14:32 ` Kevin Wolf [this message]
2017-05-11 14:32 ` [Qemu-devel] [PULL 22/58] qemu-iotests: Add test case 153 for image locking Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 23/58] tests: Add POSIX image locking test case 182 Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 24/58] qcow2: Fix preallocation size formula Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 25/58] qcow2: Reuse preallocated zero clusters Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 26/58] qcow2: Discard " Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 27/58] iotests: Extend test 066 Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 28/58] migration: Unify block node activation error handling Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 29/58] block: New BdrvChildRole.activate() for blk_resume_after_migration() Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 30/58] block: Drop permissions when migration completes Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 31/58] block: Inactivate parents before children Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 32/58] block: Fix write/resize permissions for inactive images Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 33/58] file-posix: Remove .bdrv_inactivate/invalidate_cache Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 34/58] qemu-img: wait for convert coroutines to complete Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 35/58] nvme: Implement Write Zeroes Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 36/58] blockdev: use drained_begin/end for qmp_block_resize Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 37/58] qemu-io: Improve alignment checks Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 38/58] qemu-io: Switch 'alloc' command to byte-based length Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 39/58] qemu-io: Switch 'map' output to byte-based reporting Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 40/58] blkdebug: Sanity check block layer guarantees Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 41/58] blkdebug: Refactor error injection Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 42/58] blkdebug: Add pass-through write_zero and discard support Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 43/58] blkdebug: Simplify override logic Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 44/58] blkdebug: Add ability to override unmap geometries Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 45/58] tests: Add coverage for recent block geometry fixes Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 46/58] qcow2: Nicer variable names in qcow2_update_snapshot_refcount() Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 47/58] qcow2: Use consistent switch indentation Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 48/58] block: Update comments on BDRV_BLOCK_* meanings Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 49/58] qcow2: Correctly report status of preallocated zero clusters Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 50/58] qcow2: Name typedef for cluster type Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 51/58] qcow2: Make distinction between zero cluster types obvious Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 52/58] qcow2: Optimize zero_single_l2() to minimize L2 churn Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 53/58] iotests: Improve _filter_qemu_img_map Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 54/58] iotests: Add test 179 to cover write zeroes with unmap Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 55/58] qcow2: Optimize write zero of unaligned tail cluster Kevin Wolf
2017-05-11 14:32 ` [Qemu-devel] [PULL 56/58] qcow2: Assert that cluster operations are aligned Kevin Wolf
2017-05-11 14:33 ` [Qemu-devel] [PULL 57/58] qcow2: Discard/zero clusters by byte count Kevin Wolf
2017-05-11 14:33 ` [Qemu-devel] [PULL 58/58] MAINTAINERS: Add qemu-progress to the block layer Kevin Wolf
2017-05-12 13:39 ` [Qemu-devel] [PULL 00/58] Block layer patches Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1494513181-7900-22-git-send-email-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).