From: Stefan Hajnoczi <stefanha@redhat.com>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>,
Peter Maydell <peter.maydell@linaro.org>,
Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>,
Stefan Hajnoczi <stefanha@redhat.com>,
Liu Yuan <namei.unix@gmail.com>,
MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Subject: [Qemu-devel] [PULL 37/42] sheepdog: fix vdi object update after live snapshot
Date: Fri, 6 Jun 2014 18:13:58 +0200 [thread overview]
Message-ID: <1402071243-16702-38-git-send-email-stefanha@redhat.com> (raw)
In-Reply-To: <1402071243-16702-1-git-send-email-stefanha@redhat.com>
From: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
sheepdog driver should decide a write request is COW or not based on inode
object which is active when the write request is issued.
Example of wrong inode update path in the previous driver:
1. drier issues an ordinal write request to an existing object
2. user creates a snapshot of the VDI before the write request is completed
3. the respones for the request is RDONLY, because the VDI is already a snapshot
4. the driver reload an inode object of the new active VDI, then issues a write
request again
5. the second write request can be completed
6. driver decide the request is COW or not with the below conditional branch:
if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
7. the ID of the written object and VID of the new active VDI is different, so
the driver updates data_vdi_id[idx] and writes inode object
8. the existing object cannot be seen by the new active VDI, it results object
leaking
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Liu Yuan <namei.unix@gmail.com>
Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
block/sheepdog.c | 40 +++++++++++++++++++++++-----------------
1 file changed, 23 insertions(+), 17 deletions(-)
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 9175cc2..5f7e025 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -282,6 +282,7 @@ typedef struct AIOReq {
unsigned int data_len;
uint8_t flags;
uint32_t id;
+ bool create;
QLIST_ENTRY(AIOReq) aio_siblings;
} AIOReq;
@@ -405,7 +406,7 @@ static const char * sd_strerror(int err)
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
uint64_t oid, unsigned int data_len,
- uint64_t offset, uint8_t flags,
+ uint64_t offset, uint8_t flags, bool create,
uint64_t base_oid, unsigned int iov_offset)
{
AIOReq *aio_req;
@@ -419,6 +420,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
aio_req->data_len = data_len;
aio_req->flags = flags;
aio_req->id = s->aioreq_seq_num++;
+ aio_req->create = create;
acb->nr_pending++;
return aio_req;
@@ -667,8 +669,8 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
}
static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
- struct iovec *iov, int niov, bool create,
- enum AIOCBState aiocb_type);
+ struct iovec *iov, int niov,
+ enum AIOCBState aiocb_type);
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
@@ -701,7 +703,7 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
/* move aio_req from pending list to inflight one */
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
}
}
@@ -800,7 +802,7 @@ static void coroutine_fn aio_read_response(void *opaque)
}
idx = data_oid_to_idx(aio_req->oid);
- if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+ if (aio_req->create) {
/*
* If the object is newly created one, we need to update
* the vdi object (metadata object). min_dirty_data_idx
@@ -1120,8 +1122,8 @@ out:
}
static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
- struct iovec *iov, int niov, bool create,
- enum AIOCBState aiocb_type)
+ struct iovec *iov, int niov,
+ enum AIOCBState aiocb_type)
{
int nr_copies = s->inode.nr_copies;
SheepdogObjReq hdr;
@@ -1132,6 +1134,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
uint64_t offset = aio_req->offset;
uint8_t flags = aio_req->flags;
uint64_t old_oid = aio_req->base_oid;
+ bool create = aio_req->create;
if (!nr_copies) {
error_report("bug");
@@ -1324,6 +1327,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
aio_req->flags = 0;
aio_req->base_oid = 0;
+ aio_req->create = false;
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
return true;
@@ -1336,7 +1340,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
- bool create = false;
+
+ aio_req->create = false;
/* check whether this request becomes a CoW one */
if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
@@ -1354,17 +1359,17 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
aio_req->flags |= SD_FLAG_CMD_COW;
}
- create = true;
+ aio_req->create = true;
}
out:
if (is_data_obj(aio_req->oid)) {
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
} else {
struct iovec iov;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
- add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
}
}
@@ -1877,9 +1882,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
- data_len, offset, 0, 0, offset);
+ data_len, offset, 0, false, 0, offset);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
acb->aio_done_func = sd_finish_aiocb;
acb->aiocb_type = AIOCB_WRITE_UDATA;
@@ -2078,7 +2083,8 @@ static int coroutine_fn sd_co_rw_vector(void *p)
DPRINTF("new oid %" PRIx64 "\n", oid);
}
- aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
+ aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
+ old_oid, done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
if (create) {
@@ -2087,7 +2093,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
}
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
done:
offset = 0;
@@ -2167,9 +2173,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
acb->aio_done_func = sd_finish_aiocb;
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
- 0, 0, 0, 0, 0);
+ 0, 0, 0, false, 0, 0);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
+ add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
qemu_coroutine_yield();
return acb->ret;
--
1.9.3
next prev parent reply other threads:[~2014-06-06 16:16 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-06 16:13 [Qemu-devel] [PULL 00/42] Block patches Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 01/42] aio: fix qemu_bh_schedule() bh->ctx race condition Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 02/42] block: use BlockDriverState AioContext Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 03/42] block: acquire AioContext in bdrv_*_all() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 04/42] block: acquire AioContext in bdrv_drain_all() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 05/42] block: add bdrv_set_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 06/42] blkdebug: use BlockDriverState's AioContext Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 07/42] blkverify: implement .bdrv_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 08/42] curl: " Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 09/42] gluster: use BlockDriverState's AioContext Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 10/42] iscsi: implement .bdrv_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 11/42] nbd: " Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 12/42] nfs: " Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 13/42] qed: use BlockDriverState's AioContext Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 14/42] quorum: implement .bdrv_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 15/42] block/raw-posix: " Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 16/42] block/linux-aio: fix memory and fd leak Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 17/42] block/raw-win32: create one QEMUWin32AIOState per BDRVRawState Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 18/42] block/raw-win32: implement .bdrv_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 19/42] rbd: use BlockDriverState's AioContext Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 20/42] sheepdog: implement .bdrv_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 21/42] ssh: use BlockDriverState's AioContext Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 22/42] vmdk: implement .bdrv_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 23/42] dataplane: use the QEMU block layer for I/O Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 24/42] dataplane: delete IOQueue since it is no longer used Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 25/42] dataplane: implement async flush Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 26/42] raw-posix: drop raw_get_aio_fd() since it is no longer used Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 27/42] block: Move declaration of bdrv_get_aio_context to block.h Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 28/42] virtio-blk: Allow config-wce in dataplane Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 29/42] virtio-blk: Factor out virtio_blk_handle_scsi_req from virtio_blk_handle_scsi Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 30/42] dataplane: Support VIRTIO_BLK_T_SCSI_CMD Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 31/42] throttle: add throttle_detach/attach_aio_context() Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 32/42] throttle: add detach/attach test case Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 33/42] blockdev: acquire AioContext in block_set_io_throttle Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 34/42] block: fix wrong order in live block migration setup Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 35/42] qemu-img: Document check exit codes Stefan Hajnoczi
2014-06-06 16:13 ` [Qemu-devel] [PULL 36/42] rbd: Fix leaks in rbd_start_aio() error path Stefan Hajnoczi
2014-06-06 16:13 ` Stefan Hajnoczi [this message]
2014-06-06 16:13 ` [Qemu-devel] [PULL 38/42] sheepdog: reload only header in a case of live snapshot Stefan Hajnoczi
2014-06-06 16:14 ` [Qemu-devel] [PULL 39/42] qapi: Extract qapi/common.json definitions Stefan Hajnoczi
2014-06-06 16:14 ` [Qemu-devel] [PULL 40/42] qapi: create two block related json modules Stefan Hajnoczi
2014-06-06 16:14 ` [Qemu-devel] [PULL 41/42] qapi: Extract qapi/block-core.json definitions Stefan Hajnoczi
2014-06-06 16:14 ` [Qemu-devel] [PULL 42/42] qapi: Extract qapi/block.json definitions Stefan Hajnoczi
2014-06-09 12:04 ` [Qemu-devel] [PULL 00/42] Block patches Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1402071243-16702-38-git-send-email-stefanha@redhat.com \
--to=stefanha@redhat.com \
--cc=kwolf@redhat.com \
--cc=mitake.hitoshi@lab.ntt.co.jp \
--cc=morita.kazutaka@lab.ntt.co.jp \
--cc=namei.unix@gmail.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).