qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kevin Wolf <kwolf@redhat.com>
To: anthony@codemonkey.ws
Cc: kwolf@redhat.com, qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 33/46] sheepdog: implement SD_OP_FLUSH_VDI operation
Date: Thu,  5 Apr 2012 17:52:11 +0200	[thread overview]
Message-ID: <1333641144-13612-34-git-send-email-kwolf@redhat.com> (raw)
In-Reply-To: <1333641144-13612-1-git-send-email-kwolf@redhat.com>

From: Liu Yuan <tailai.ly@taobao.com>

Flush operation is supposed to flush the write-back cache of
sheepdog cluster.

By issuing flush operation, we can assure the Guest of data
reaching the sheepdog cluster storage.

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/sheepdog.c |  142 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 128 insertions(+), 14 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 00276f6f..1248534 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -32,9 +32,11 @@
 #define SD_OP_RELEASE_VDI    0x13
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
+#define SD_OP_FLUSH_VDI      0x16
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
+#define SD_FLAG_CMD_CACHE    0x04
 
 #define SD_RES_SUCCESS       0x00 /* Success */
 #define SD_RES_UNKNOWN       0x01 /* Unknown error */
@@ -293,10 +295,12 @@ typedef struct BDRVSheepdogState {
 
     char name[SD_MAX_VDI_LEN];
     int is_snapshot;
+    uint8_t cache_enabled;
 
     char *addr;
     char *port;
     int fd;
+    int flush_fd;
 
     CoMutex lock;
     Coroutine *co_send;
@@ -516,6 +520,23 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data,
     return ret;
 }
 
+static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
+                       unsigned int *wlen)
+{
+    int ret;
+
+    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
+    if (ret < sizeof(*hdr)) {
+        error_report("failed to send a req, %s", strerror(errno));
+    }
+
+    ret = qemu_co_send(sockfd, data, *wlen);
+    if (ret < *wlen) {
+        error_report("failed to send a req, %s", strerror(errno));
+    }
+
+    return ret;
+}
 static int do_req(int sockfd, SheepdogReq *hdr, void *data,
                   unsigned int *wlen, unsigned int *rlen)
 {
@@ -550,6 +571,40 @@ out:
     return ret;
 }
 
+static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
+                     unsigned int *wlen, unsigned int *rlen)
+{
+    int ret;
+
+    socket_set_block(sockfd);
+    ret = send_co_req(sockfd, hdr, data, wlen);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
+    if (ret < sizeof(*hdr)) {
+        error_report("failed to get a rsp, %s", strerror(errno));
+        goto out;
+    }
+
+    if (*rlen > hdr->data_length) {
+        *rlen = hdr->data_length;
+    }
+
+    if (*rlen) {
+        ret = qemu_co_recv(sockfd, data, *rlen);
+        if (ret < *rlen) {
+            error_report("failed to get the data, %s", strerror(errno));
+            goto out;
+        }
+    }
+    ret = 0;
+out:
+    socket_set_nonblock(sockfd);
+    return ret;
+}
+
 static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
                            struct iovec *iov, int niov, int create,
                            enum AIOCBState aiocb_type);
@@ -900,6 +955,10 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
     }
 
+    if (s->cache_enabled) {
+        hdr.flags |= SD_FLAG_CMD_CACHE;
+    }
+
     hdr.oid = oid;
     hdr.cow_oid = old_oid;
     hdr.copies = s->inode.nr_copies;
@@ -942,7 +1001,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
 static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
                              unsigned int datalen, uint64_t offset,
-                             int write, int create)
+                             int write, int create, uint8_t cache)
 {
     SheepdogObjReq hdr;
     SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -965,6 +1024,11 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
         rlen = datalen;
         hdr.opcode = SD_OP_READ_OBJ;
     }
+
+    if (cache) {
+        hdr.flags |= SD_FLAG_CMD_CACHE;
+    }
+
     hdr.oid = oid;
     hdr.data_length = datalen;
     hdr.offset = offset;
@@ -986,15 +1050,18 @@ static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
 }
 
 static int read_object(int fd, char *buf, uint64_t oid, int copies,
-                       unsigned int datalen, uint64_t offset)
+                       unsigned int datalen, uint64_t offset, uint8_t cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
+                             cache);
 }
 
 static int write_object(int fd, char *buf, uint64_t oid, int copies,
-                        unsigned int datalen, uint64_t offset, int create)
+                        unsigned int datalen, uint64_t offset, int create,
+                        uint8_t cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
+                             cache);
 }
 
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1026,6 +1093,15 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
         goto out;
     }
 
+    if (flags & BDRV_O_CACHE_WB) {
+        s->cache_enabled = 1;
+        s->flush_fd = connect_to_sdog(s->addr, s->port);
+        if (s->flush_fd < 0) {
+            error_report("failed to connect");
+            goto out;
+        }
+    }
+
     if (snapid) {
         dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
         s->is_snapshot = 1;
@@ -1038,7 +1114,8 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags)
     }
 
     buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
+    ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
+                      s->cache_enabled);
 
     closesocket(fd);
 
@@ -1272,6 +1349,9 @@ static void sd_close(BlockDriverState *bs)
 
     qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
     closesocket(s->fd);
+    if (s->cache_enabled) {
+        closesocket(s->flush_fd);
+    }
     g_free(s->addr);
 }
 
@@ -1305,7 +1385,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
     s->inode.vdi_size = offset;
     ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0);
+                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
     close(fd);
 
     if (ret < 0) {
@@ -1387,7 +1467,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
     }
 
     ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
-                      SD_INODE_SIZE, 0);
+                      SD_INODE_SIZE, 0, s->cache_enabled);
 
     closesocket(fd);
 
@@ -1575,6 +1655,36 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
     return acb->ret;
 }
 
+static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
+{
+    BDRVSheepdogState *s = bs->opaque;
+    SheepdogObjReq hdr = { 0 };
+    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
+    SheepdogInode *inode = &s->inode;
+    int ret;
+    unsigned int wlen = 0, rlen = 0;
+
+    if (!s->cache_enabled) {
+        return 0;
+    }
+
+    hdr.opcode = SD_OP_FLUSH_VDI;
+    hdr.oid = vid_to_vdi_oid(inode->vdi_id);
+
+    ret = do_co_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
+    if (ret) {
+        error_report("failed to send a request to the sheep");
+        return ret;
+    }
+
+    if (rsp->result != SD_RES_SUCCESS) {
+        error_report("%s", sd_strerror(rsp->result));
+        return -EIO;
+    }
+
+    return 0;
+}
+
 static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 {
     BDRVSheepdogState *s = bs->opaque;
@@ -1610,7 +1720,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     }
 
     ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0);
+                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
     if (ret < 0) {
         error_report("failed to write snapshot's inode.");
         ret = -EIO;
@@ -1629,7 +1739,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     inode = (SheepdogInode *)g_malloc(datalen);
 
     ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
-                      s->inode.nr_copies, datalen, 0);
+                      s->inode.nr_copies, datalen, 0, s->cache_enabled);
 
     if (ret < 0) {
         error_report("failed to read new inode info. %s", strerror(errno));
@@ -1684,7 +1794,7 @@ static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 
     buf = g_malloc(SD_INODE_SIZE);
     ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
-                      SD_INODE_SIZE, 0);
+                      SD_INODE_SIZE, 0, s->cache_enabled);
 
     closesocket(fd);
 
@@ -1779,7 +1889,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 
         /* we don't need to read entire object */
         ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
-                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
+                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
+                          s->cache_enabled);
 
         if (ret) {
             continue;
@@ -1835,10 +1946,12 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
         create = (offset == 0);
         if (load) {
             ret = read_object(fd, (char *)data, vmstate_oid,
-                              s->inode.nr_copies, data_len, offset);
+                              s->inode.nr_copies, data_len, offset,
+                              s->cache_enabled);
         } else {
             ret = write_object(fd, (char *)data, vmstate_oid,
-                               s->inode.nr_copies, data_len, offset, create);
+                               s->inode.nr_copies, data_len, offset, create,
+                               s->cache_enabled);
         }
 
         if (ret < 0) {
@@ -1904,6 +2017,7 @@ BlockDriver bdrv_sheepdog = {
 
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
+    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
-- 
1.7.6.5

  parent reply	other threads:[~2012-04-05 16:35 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-05 15:51 [Qemu-devel] [PULL 00/46] Block patches Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 01/46] trace-events: Rename 'next' argument Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 02/46] tracetool: Forbid argument name 'next' Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 03/46] qcow2: Remove unused parameter in get_cluster_table() Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 04/46] qemu-io: add option to enable tracing Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 05/46] block: push recursive flushing up from drivers Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 07/46] ide: IDENTIFY word 86 bit 14 is reserved Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 08/46] ide: Add "model=s" qdev option Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 09/46] ide: Change serial number strncpy() to pstrcpy() Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 10/46] ide: Adds wwn=hex qdev option Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 11/46] block/vpc: write checksum back to footer after check Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 12/46] qerror: fix QERR_PROPERTY_VALUE_OUT_OF_RANGE description Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 13/46] qdev: add blocksize property type Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 14/46] block: enforce constraints on block size properties Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 15/46] vdi: basic conversion to coroutines Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 16/46] vdi: move end-of-I/O handling at the end Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 17/46] vdi: merge aio_read_cb and aio_write_cb into callers Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 18/46] vdi: move aiocb fields to locals Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 19/46] vdi: leave bounce buffering to block layer Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 20/46] vdi: do not create useless iovecs Kevin Wolf
2012-04-05 15:51 ` [Qemu-devel] [PATCH 21/46] vdi: change goto to loop Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 22/46] Use DMADirection type for dma_bdrv_io Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 23/46] block: disable I/O throttling on sync api Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 25/46] block: fix streaming/closing race Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 27/46] block: document job API Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 28/46] qemu-img: add image fragmentation statistics Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 29/46] qed: " Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 30/46] qemu-img: add dirty flag status Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 31/46] qed: track " Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 32/46] block: bdrv_append() fixes Kevin Wolf
2012-04-05 15:52 ` Kevin Wolf [this message]
2012-04-05 15:52 ` [Qemu-devel] [PATCH 34/46] sheepdog: fix send req helpers Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 36/46] qemu-iotests: Test unknown qcow2 header extensions Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 37/46] qemu-iotests: Fix call syntax for qemu-img Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 38/46] qemu-iotests: Fix call syntax for qemu-io Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 40/46] block: Add new BDRV_O_INCOMING flag to notice incoming live migration Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 42/46] blockdev: open images with BDRV_O_INCOMING on " Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 43/46] qed: add bdrv_invalidate_cache to be called after " Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 44/46] migration: clear BDRV_O_INCOMING flags on end of " Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 45/46] qed: honor BDRV_O_INCOMING for " Kevin Wolf
2012-04-05 15:52 ` [Qemu-devel] [PATCH 46/46] qed: remove incoming live migration blocker Kevin Wolf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1333641144-13612-34-git-send-email-kwolf@redhat.com \
    --to=kwolf@redhat.com \
    --cc=anthony@codemonkey.ws \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).