qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH For SD and QEMU] Add discard/trim support for Sheepdog
@ 2013-04-13 11:27 Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 1/3] sheep: remove unused 'flags' in remove_object() Liu Yuan
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-13 11:27 UTC (permalink / raw)
  To: sheepdog, qemu-devel

From: Liu Yuan <tailai.ly@taobao.com>

Trim/discard is a command that allows VM to inform underlying storage system to
release unused space. Sheepdog takes use of this command to release unused
objects for better thin-provision.

This patch set aims to add trim/discard support to both Sheepdog and QEMU's
Sheepdog block driver.

There are first 3 patches for Sheepdog and the last one is for QEMU.

Thanks,
Yuan
-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 1/3] sheep: remove unused 'flags' in remove_object()
  2013-04-13 11:27 [Qemu-devel] [PATCH For SD and QEMU] Add discard/trim support for Sheepdog Liu Yuan
@ 2013-04-13 11:27 ` Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 2/3] sheep: teach sheep to discard unused objects Liu Yuan
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-13 11:27 UTC (permalink / raw)
  To: sheepdog, qemu-devel

From: Liu Yuan <tailai.ly@taobao.com>

Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
 sheep/object_cache.c |    2 +-
 sheep/sheep_priv.h   |    4 ++--
 sheep/store.c        |    6 +++---
 sheep/vdi.c          |   16 ++++++++--------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/sheep/object_cache.c b/sheep/object_cache.c
index 93ecfe7..1b80a41 100644
--- a/sheep/object_cache.c
+++ b/sheep/object_cache.c
@@ -1097,7 +1097,7 @@ err:
 }
 
 int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
-		       uint64_t offset, uint16_t flags, bool create)
+		       uint64_t offset, bool create)
 {
 	uint32_t vid = oid_to_vid(oid);
 	uint32_t idx = object_cache_oid_to_idx(oid);
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index d1022ac..ba8e6b2 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -303,7 +303,7 @@ bool node_in_recovery(void);
 int read_backend_object(uint64_t oid, char *data, unsigned int datalen,
 		       uint64_t offset, int nr_copies);
 int write_object(uint64_t oid, char *data, unsigned int datalen,
-		 uint64_t offset, uint16_t flags, bool create, int nr_copies);
+		 uint64_t offset, bool create, int nr_copies);
 int read_object(uint64_t oid, char *data, unsigned int datalen,
 		uint64_t offset, int nr_copies);
 int remove_object(uint64_t oid, int nr_copies);
@@ -373,7 +373,7 @@ bool object_is_cached(uint64_t oid);
 void object_cache_try_to_reclaim(int);
 int object_cache_handle_request(struct request *req);
 int object_cache_write(uint64_t oid, char *data, unsigned int datalen,
-		       uint64_t offset, uint16_t flags, bool create);
+		       uint64_t offset, bool create);
 int object_cache_read(uint64_t oid, char *data, unsigned int datalen,
 		      uint64_t offset);
 int object_cache_flush_vdi(uint32_t vid);
diff --git a/sheep/store.c b/sheep/store.c
index f8eee59..d95eb01 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -420,14 +420,14 @@ int init_global_pathnames(const char *d, char *argp)
 
 /* Write data to both local object cache (if enabled) and backends */
 int write_object(uint64_t oid, char *data, unsigned int datalen,
-		 uint64_t offset, uint16_t flags, bool create, int nr_copies)
+		 uint64_t offset, bool create, int nr_copies)
 {
 	struct sd_req hdr;
 	int ret;
 
 	if (sys->enable_object_cache && object_is_cached(oid)) {
 		ret = object_cache_write(oid, data, datalen, offset,
-					 flags, create);
+					 create);
 		if (ret == SD_RES_NO_CACHE)
 			goto forward_write;
 
@@ -443,7 +443,7 @@ forward_write:
 		sd_init_req(&hdr, SD_OP_CREATE_AND_WRITE_OBJ);
 	else
 		sd_init_req(&hdr, SD_OP_WRITE_OBJ);
-	hdr.flags = flags | SD_FLAG_CMD_WRITE;
+	hdr.flags = SD_FLAG_CMD_WRITE;
 	hdr.data_length = datalen;
 
 	hdr.obj.oid = oid;
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 52f91e4..5f23846 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -269,7 +269,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 
 	if (iocb->create_snapshot && cur_vid != iocb->base_vid) {
 		ret = write_object(vid_to_vdi_oid(cur_vid), (char *)cur,
-				   SD_INODE_HEADER_SIZE, 0, 0, false, 0);
+				   SD_INODE_HEADER_SIZE, 0, false, 0);
 		if (ret != 0) {
 			sd_printf(SDOG_ERR, "failed");
 			ret = SD_RES_BASE_VDI_READ;
@@ -279,7 +279,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 
 	if (iocb->base_vid) {
 		ret = write_object(vid_to_vdi_oid(iocb->base_vid), (char *)base,
-				   SD_INODE_HEADER_SIZE, 0, 0, false, 0);
+				   SD_INODE_HEADER_SIZE, 0, false, 0);
 		if (ret != 0) {
 			sd_printf(SDOG_ERR, "failed");
 			ret = SD_RES_BASE_VDI_WRITE;
@@ -288,7 +288,7 @@ static int create_vdi_obj(struct vdi_iocb *iocb, uint32_t new_vid,
 	}
 
 	ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new),
-			   0, 0, true, iocb->nr_copies);
+			   0, true, iocb->nr_copies);
 	if (ret != 0)
 		ret = SD_RES_VDI_WRITE;
 
@@ -581,7 +581,7 @@ static int delete_inode(struct deletion_work *dw)
 	memset(inode->name, 0, sizeof(inode->name));
 
 	ret = write_object(vid_to_vdi_oid(dw->vid), (char *)inode,
-			   SD_INODE_HEADER_SIZE, 0, 0, false, dw->nr_copies);
+			   SD_INODE_HEADER_SIZE, 0, false, dw->nr_copies);
 	if (ret != 0) {
 		ret = SD_RES_EIO;
 		goto out;
@@ -669,7 +669,7 @@ static void delete_one(struct work *work)
 	memset(inode->name, 0, sizeof(inode->name));
 
 	write_object(vid_to_vdi_oid(vdi_id), (void *)inode,
-		     sizeof(*inode), 0, 0, false, nr_copies);
+		     sizeof(*inode), 0, false, nr_copies);
 out:
 	free(inode);
 }
@@ -881,7 +881,7 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
 
 		if (ret == SD_RES_NO_OBJ && wr) {
 			ret = write_object(oid, (char *)vattr,
-					   data_len, 0, 0, true, nr_copies);
+					   data_len, 0, true, nr_copies);
 			if (ret)
 				ret = SD_RES_EIO;
 			else
@@ -902,14 +902,14 @@ int get_vdi_attr(struct sheepdog_vdi_attr *vattr, int data_len,
 			else if (delete) {
 				ret = write_object(oid, (char *)"", 1,
 						   offsetof(struct sheepdog_vdi_attr, name),
-						   0, false, nr_copies);
+						   false, nr_copies);
 				if (ret)
 					ret = SD_RES_EIO;
 				else
 					ret = SD_RES_SUCCESS;
 			} else if (wr) {
 				ret = write_object(oid, (char *)vattr,
-						   SD_ATTR_OBJ_SIZE, 0, 0,
+						   SD_ATTR_OBJ_SIZE, 0,
 						   false, nr_copies);
 
 				if (ret)
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 2/3] sheep: teach sheep to discard unused objects
  2013-04-13 11:27 [Qemu-devel] [PATCH For SD and QEMU] Add discard/trim support for Sheepdog Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 1/3] sheep: remove unused 'flags' in remove_object() Liu Yuan
@ 2013-04-13 11:27 ` Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 3/3] tests: add 058 to test discard/trim Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
  3 siblings, 0 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-13 11:27 UTC (permalink / raw)
  To: sheepdog, qemu-devel

From: Liu Yuan <tailai.ly@taobao.com>

Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
 include/sheepdog_proto.h |    1 +
 sheep/ops.c              |   20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index f3d69e1..99a6627 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -35,6 +35,7 @@
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
+#define SD_OP_DISCARD        0x17
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
diff --git a/sheep/ops.c b/sheep/ops.c
index a0502b2..8f6b0c1 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -741,6 +741,20 @@ static int local_flush_vdi(struct request *req)
 	return ret;
 }
 
+static int local_discard(struct request *req)
+{
+	uint64_t oid = req->rq.obj.oid;
+	uint32_t vid = oid_to_vid(oid), zero = 0;
+	int ret, cp = get_vdi_copy_number(vid), idx = data_oid_to_idx(oid);
+
+	sd_dprintf("%"PRIx64, oid);
+	ret = remove_object(oid, cp);
+	if (ret != SD_RES_SUCCESS)
+		return ret;
+	return write_object(vid_to_vdi_oid(vid), (char *)&zero, sizeof(zero),
+			    SD_INODE_HEADER_SIZE + sizeof(vid) * idx, 0, cp);
+}
+
 static int local_flush_and_del(struct request *req)
 {
 	if (!sys->enable_object_cache)
@@ -1109,6 +1123,12 @@ static struct sd_op_template sd_ops[] = {
 		.process_work = local_flush_vdi,
 	},
 
+	[SD_OP_DISCARD] = {
+		.name = "DISCARD",
+		.type = SD_OP_TYPE_LOCAL,
+		.process_work = local_discard,
+	},
+
 	[SD_OP_FLUSH_DEL_CACHE] = {
 		.name = "DEL_CACHE",
 		.type = SD_OP_TYPE_LOCAL,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH 3/3] tests: add 058 to test discard/trim
  2013-04-13 11:27 [Qemu-devel] [PATCH For SD and QEMU] Add discard/trim support for Sheepdog Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 1/3] sheep: remove unused 'flags' in remove_object() Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 2/3] sheep: teach sheep to discard unused objects Liu Yuan
@ 2013-04-13 11:27 ` Liu Yuan
  2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
  3 siblings, 0 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-13 11:27 UTC (permalink / raw)
  To: sheepdog, qemu-devel

From: Liu Yuan <tailai.ly@taobao.com>

Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
 tests/058     |   30 ++++++++++++++++++++++++++++++
 tests/058.out |   44 ++++++++++++++++++++++++++++++++++++++++++++
 tests/group   |    1 +
 3 files changed, 75 insertions(+)
 create mode 100755 tests/058
 create mode 100644 tests/058.out

diff --git a/tests/058 b/tests/058
new file mode 100755
index 0000000..0eddc81
--- /dev/null
+++ b/tests/058
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# Test discard/trim
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1        # failure is the default!
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_cleanup
+
+for i in 0 1 2; do
+	_start_sheep $i "-s 1000"
+done
+_wait_for_sheep 3
+$COLLIE cluster format
+sleep 1
+$COLLIE vdi create test 100M -P
+$COLLIE node info
+qemu-io -c "discard 0 100m" sheepdog:test | _filter_qemu_io
+$COLLIE vdi check test
+for i in `seq 0 24`; do
+	$COLLIE vdi object test -i $i;
+done
+$COLLIE node info
diff --git a/tests/058.out b/tests/058.out
new file mode 100644
index 0000000..140e5c0
--- /dev/null
+++ b/tests/058.out
@@ -0,0 +1,44 @@
+QA output created by 058
+using backend farm store
+Id	Size	Used	Use%
+ 0	1000 MB	104 MB	 10%
+ 1	1000 MB	104 MB	 10%
+ 2	1000 MB	104 MB	 10%
+Total	2.9 GB	312 MB	 10%
+
+Total virtual image size	100 MB
+discard 104857600/104857600 bytes at offset 0
+100 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+finish check&repair test
+The inode object 0x7c2b25 idx 0 is not allocated
+The inode object 0x7c2b25 idx 1 is not allocated
+The inode object 0x7c2b25 idx 2 is not allocated
+The inode object 0x7c2b25 idx 3 is not allocated
+The inode object 0x7c2b25 idx 4 is not allocated
+The inode object 0x7c2b25 idx 5 is not allocated
+The inode object 0x7c2b25 idx 6 is not allocated
+The inode object 0x7c2b25 idx 7 is not allocated
+The inode object 0x7c2b25 idx 8 is not allocated
+The inode object 0x7c2b25 idx 9 is not allocated
+The inode object 0x7c2b25 idx 10 is not allocated
+The inode object 0x7c2b25 idx 11 is not allocated
+The inode object 0x7c2b25 idx 12 is not allocated
+The inode object 0x7c2b25 idx 13 is not allocated
+The inode object 0x7c2b25 idx 14 is not allocated
+The inode object 0x7c2b25 idx 15 is not allocated
+The inode object 0x7c2b25 idx 16 is not allocated
+The inode object 0x7c2b25 idx 17 is not allocated
+The inode object 0x7c2b25 idx 18 is not allocated
+The inode object 0x7c2b25 idx 19 is not allocated
+The inode object 0x7c2b25 idx 20 is not allocated
+The inode object 0x7c2b25 idx 21 is not allocated
+The inode object 0x7c2b25 idx 22 is not allocated
+The inode object 0x7c2b25 idx 23 is not allocated
+The inode object 0x7c2b25 idx 24 is not allocated
+Id	Size	Used	Use%
+ 0	1000 MB	4.0 MB	  0%
+ 1	1000 MB	4.0 MB	  0%
+ 2	1000 MB	4.0 MB	  0%
+Total	2.9 GB	12 MB	  0%
+
+Total virtual image size	100 MB
diff --git a/tests/group b/tests/group
index c82ddf9..09f2857 100644
--- a/tests/group
+++ b/tests/group
@@ -71,3 +71,4 @@
 055 auto cluster md
 056 auto quick cluster md
 057 auto quick cluster md
+058 auto quick cluster md
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog
  2013-04-13 11:27 [Qemu-devel] [PATCH For SD and QEMU] Add discard/trim support for Sheepdog Liu Yuan
                   ` (2 preceding siblings ...)
  2013-04-13 11:27 ` [Qemu-devel] [PATCH 3/3] tests: add 058 to test discard/trim Liu Yuan
@ 2013-04-13 11:27 ` Liu Yuan
  2013-04-14  5:07   ` [Qemu-devel] [PATCH for QEMU v2] " Liu Yuan
                     ` (3 more replies)
  3 siblings, 4 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-13 11:27 UTC (permalink / raw)
  To: sheepdog, qemu-devel; +Cc: Kevin Wolf, Stefan Hajnoczi, MORITA Kazutaka

From: Liu Yuan <tailai.ly@taobao.com>

The 'TRIM' command from VM that is to release underlying data storage for
better thin-provision is already supported by the Sheepdog.

This patch adds the TRIM support at QEMU part.

For older Sheepdog that doesn't support, we return EIO to upper layer.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>

diff --git a/block/sheepdog.c b/block/sheepdog.c
index bb67c4c..b4ba559 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -34,6 +34,7 @@
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
+#define SD_OP_DISCARD        0x17
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
@@ -269,6 +270,7 @@ enum AIOCBState {
     AIOCB_WRITE_UDATA,
     AIOCB_READ_UDATA,
     AIOCB_FLUSH_CACHE,
+    AIOCB_DISCARD,
 };
 
 struct SheepdogAIOCB {
@@ -656,7 +658,7 @@ static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    unsigned long idx;
+    uint64_t idx;
 
     if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
@@ -727,6 +729,18 @@ static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
         }
         break;
+    case AIOCB_DISCARD:
+        switch (rsp.result) {
+        case SD_RES_INVALID_PARMS:
+            error_report("you are running the old sheep that doesn't support "
+                         "discard command.\n");
+            break;
+        case SD_RES_SUCCESS:
+            idx = data_oid_to_idx(aio_req->oid);
+            s->inode.data_vdi_id[idx] = 0;
+            break;
+        }
+        break;
     }
 
     if (rsp.result != SD_RES_SUCCESS) {
@@ -1016,6 +1030,9 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         wlen = datalen;
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
+    case AIOCB_DISCARD:
+        hdr.opcode = SD_OP_DISCARD;
+        break;
     }
 
     if (s->cache_flags) {
@@ -1633,6 +1650,11 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                 flags = SD_FLAG_CMD_COW;
             }
             break;
+        case AIOCB_DISCARD:
+            /* We discard it only when the whole object is trimmed */
+            if (len != SD_DATA_OBJ_SIZE) {
+                goto done;
+            }
         default:
             break;
         }
@@ -2071,6 +2093,28 @@ static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
 }
 
 
+static int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors)
+{
+    SheepdogAIOCB *acb;
+    QEMUIOVector dummy;
+    int ret;
+
+    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    acb->aiocb_type = AIOCB_DISCARD;
+    acb->aio_done_func = sd_finish_aiocb;
+
+    ret = sd_co_rw_vector(acb);
+    if (ret <= 0) {
+        qemu_aio_release(acb);
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+
+    return acb->ret;
+}
+
 static QEMUOptionParameter sd_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2103,6 +2147,7 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2128,6 +2173,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2153,6 +2199,7 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH for QEMU v2] sheepdog: add discard/trim support for sheepdog
  2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
@ 2013-04-14  5:07   ` Liu Yuan
  2013-04-14  5:16   ` [Qemu-devel] [PATCH for QEMU v3] " Liu Yuan
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-14  5:07 UTC (permalink / raw)
  To: sheepdog, qemu-devel
  Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi, MORITA Kazutaka

From: Liu Yuan <tailai.ly@taobao.com>

The 'TRIM' command from VM that is to release underlying data storage for
better thin-provision is already supported by the Sheepdog.

This patch adds the TRIM support at QEMU part.

For older Sheepdog that doesn't support, we return EIO to upper layer.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
v2:
 - skip the object when it is not allocated

 block/sheepdog.c |   53 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 987018e..0d1b047 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -34,6 +34,7 @@
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
+#define SD_OP_DISCARD        0x17
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
@@ -269,6 +270,7 @@ enum AIOCBState {
     AIOCB_WRITE_UDATA,
     AIOCB_READ_UDATA,
     AIOCB_FLUSH_CACHE,
+    AIOCB_DISCARD,
 };
 
 struct SheepdogAIOCB {
@@ -656,7 +658,7 @@ static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    unsigned long idx;
+    uint64_t idx;
 
     if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
@@ -727,6 +729,18 @@ static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
         }
         break;
+    case AIOCB_DISCARD:
+        switch (rsp.result) {
+        case SD_RES_INVALID_PARMS:
+            error_report("you are running the old sheep that doesn't support "
+                         "discard command.\n");
+            break;
+        case SD_RES_SUCCESS:
+            idx = data_oid_to_idx(aio_req->oid);
+            s->inode.data_vdi_id[idx] = 0;
+            break;
+        }
+        break;
     }
 
     if (rsp.result != SD_RES_SUCCESS) {
@@ -1016,6 +1030,9 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         wlen = datalen;
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
+    case AIOCB_DISCARD:
+        hdr.opcode = SD_OP_DISCARD;
+        break;
     }
 
     if (s->cache_flags) {
@@ -1633,7 +1650,14 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                 flags = SD_FLAG_CMD_COW;
             }
             break;
-        default:
+        case AIOCB_DISCARD:
+            /*
+             * We discard the object only when the whole object is
+             * 1) allocated 2) trimmed. Otherwise, simply skip it.
+             */
+            if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
+                goto done;
+            }
             break;
         }
 
@@ -2071,6 +2095,28 @@ static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
 }
 
 
+static int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors)
+{
+    SheepdogAIOCB *acb;
+    QEMUIOVector dummy;
+    int ret;
+
+    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    acb->aiocb_type = AIOCB_DISCARD;
+    acb->aio_done_func = sd_finish_aiocb;
+
+    ret = sd_co_rw_vector(acb);
+    if (ret <= 0) {
+        qemu_aio_release(acb);
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+
+    return acb->ret;
+}
+
 static QEMUOptionParameter sd_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2103,6 +2149,7 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2128,6 +2175,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2153,6 +2201,7 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH for QEMU v3] sheepdog: add discard/trim support for sheepdog
  2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
  2013-04-14  5:07   ` [Qemu-devel] [PATCH for QEMU v2] " Liu Yuan
@ 2013-04-14  5:16   ` Liu Yuan
  2013-04-15 15:10     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
  2013-04-15 15:52   ` [Qemu-devel] [PATCH v4] " Liu Yuan
  2013-04-15 16:15   ` [Qemu-devel] [PATCH v5] " Liu Yuan
  3 siblings, 1 reply; 15+ messages in thread
From: Liu Yuan @ 2013-04-14  5:16 UTC (permalink / raw)
  To: sheepdog, qemu-devel
  Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi, MORITA Kazutaka

From: Liu Yuan <tailai.ly@taobao.com>

The 'TRIM' command from VM that is to release underlying data storage for
better thin-provision is already supported by the Sheepdog.

This patch adds the TRIM support at QEMU part.

For older Sheepdog that doesn't support it, we return EIO to upper layer.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
v3:
 - fix a silly accidental deletion of 'default' in switch clause.
v2:
 - skip the object when it is not allocated

 block/sheepdog.c |   53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 987018e..e852d4e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -34,6 +34,7 @@
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
+#define SD_OP_DISCARD        0x17
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
@@ -269,6 +270,7 @@ enum AIOCBState {
     AIOCB_WRITE_UDATA,
     AIOCB_READ_UDATA,
     AIOCB_FLUSH_CACHE,
+    AIOCB_DISCARD,
 };
 
 struct SheepdogAIOCB {
@@ -656,7 +658,7 @@ static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    unsigned long idx;
+    uint64_t idx;
 
     if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
@@ -727,6 +729,18 @@ static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
         }
         break;
+    case AIOCB_DISCARD:
+        switch (rsp.result) {
+        case SD_RES_INVALID_PARMS:
+            error_report("you are running the old sheep that doesn't support "
+                         "discard command.\n");
+            break;
+        case SD_RES_SUCCESS:
+            idx = data_oid_to_idx(aio_req->oid);
+            s->inode.data_vdi_id[idx] = 0;
+            break;
+        }
+        break;
     }
 
     if (rsp.result != SD_RES_SUCCESS) {
@@ -1016,6 +1030,9 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         wlen = datalen;
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
+    case AIOCB_DISCARD:
+        hdr.opcode = SD_OP_DISCARD;
+        break;
     }
 
     if (s->cache_flags) {
@@ -1633,6 +1650,15 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                 flags = SD_FLAG_CMD_COW;
             }
             break;
+        case AIOCB_DISCARD:
+            /*
+             * We discard the object only when the whole object is
+             * 1) allocated 2) trimmed. Otherwise, simply skip it.
+             */
+            if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
+                goto done;
+            }
+            break;
         default:
             break;
         }
@@ -2071,6 +2097,28 @@ static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
 }
 
 
+static int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors)
+{
+    SheepdogAIOCB *acb;
+    QEMUIOVector dummy;
+    int ret;
+
+    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    acb->aiocb_type = AIOCB_DISCARD;
+    acb->aio_done_func = sd_finish_aiocb;
+
+    ret = sd_co_rw_vector(acb);
+    if (ret <= 0) {
+        qemu_aio_release(acb);
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+
+    return acb->ret;
+}
+
 static QEMUOptionParameter sd_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2103,6 +2151,7 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2128,6 +2177,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2153,6 +2203,7 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [sheepdog] [PATCH for QEMU v3] sheepdog: add discard/trim support for sheepdog
  2013-04-14  5:16   ` [Qemu-devel] [PATCH for QEMU v3] " Liu Yuan
@ 2013-04-15 15:10     ` MORITA Kazutaka
  0 siblings, 0 replies; 15+ messages in thread
From: MORITA Kazutaka @ 2013-04-15 15:10 UTC (permalink / raw)
  To: Liu Yuan; +Cc: Kevin Wolf, Paolo Bonzini, sheepdog, qemu-devel, Stefan Hajnoczi

At Sun, 14 Apr 2013 13:16:44 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly@taobao.com>
> 
> The 'TRIM' command from VM that is to release underlying data storage for
> better thin-provision is already supported by the Sheepdog.
> 
> This patch adds the TRIM support at QEMU part.
> 
> For older Sheepdog that doesn't support it, we return EIO to upper layer.

I think we can safely return 0 without doing anything when the server
doesn't support SD_OP_DISCARD.  Actually, if the block driver doesn't
support the discard operation, bdrv_co_discard() in block.c returns 0.


> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 987018e..e852d4e 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -34,6 +34,7 @@
>  #define SD_OP_GET_VDI_INFO   0x14
>  #define SD_OP_READ_VDIS      0x15
>  #define SD_OP_FLUSH_VDI      0x16
> +#define SD_OP_DISCARD        0x17

This is an opcode for objects, so I prefer SD_OP_DISCARD_OBJ.

>  
> +static int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
> +                         int nb_sectors)

Should add coroutine_fn.

Thanks,

Kazutaka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v4] sheepdog: add discard/trim support for sheepdog
  2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
  2013-04-14  5:07   ` [Qemu-devel] [PATCH for QEMU v2] " Liu Yuan
  2013-04-14  5:16   ` [Qemu-devel] [PATCH for QEMU v3] " Liu Yuan
@ 2013-04-15 15:52   ` Liu Yuan
  2013-04-15 16:03     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
  2013-04-15 16:15   ` [Qemu-devel] [PATCH v5] " Liu Yuan
  3 siblings, 1 reply; 15+ messages in thread
From: Liu Yuan @ 2013-04-15 15:52 UTC (permalink / raw)
  To: sheepdog, qemu-devel
  Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi, MORITA Kazutaka

From: Liu Yuan <tailai.ly@taobao.com>

The 'TRIM' command from VM that is to release underlying data storage for
better thin-provision is already supported by the Sheepdog.

This patch adds the TRIM support at QEMU part.

For older Sheepdog that doesn't support it, we return 0(success) to upper layer.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
v4:
 - adjust discard macro
 - return success when operation is not supported by sheep
 - add coroutine_fn marker
v3:
 - fix a silly accidental deletion of 'default' in switch clause.
v2:
 - skip the object when it is not allocated

 block/sheepdog.c |   55 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 987018e..362244a 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -34,6 +34,7 @@
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
+#define SD_OP_DISCARD_OBJ    0x17
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
@@ -269,6 +270,7 @@ enum AIOCBState {
     AIOCB_WRITE_UDATA,
     AIOCB_READ_UDATA,
     AIOCB_FLUSH_CACHE,
+    AIOCB_DISCARD_OBJ,
 };
 
 struct SheepdogAIOCB {
@@ -656,7 +658,7 @@ static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    unsigned long idx;
+    uint64_t idx;
 
     if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
@@ -727,6 +729,20 @@ static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
         }
         break;
+    case AIOCB_DISCARD_OBJ:
+        switch (rsp.result) {
+        case SD_RES_INVALID_PARMS:
+            error_report("you are running the old sheep that doesn't support "
+                         "discard command.\n");
+            rsp.result = SD_RES_SUCCESS;
+            break;
+        case SD_RES_SUCCESS:
+            idx = data_oid_to_idx(aio_req->oid);
+            s->inode.data_vdi_id[idx] = 0;
+            break;
+        default:
+            break;
+        }
     }
 
     if (rsp.result != SD_RES_SUCCESS) {
@@ -1016,6 +1032,9 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         wlen = datalen;
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
+    case AIOCB_DISCARD_OBJ:
+        hdr.opcode = SD_OP_DISCARD_OBJ;
+        break;
     }
 
     if (s->cache_flags) {
@@ -1633,6 +1652,15 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                 flags = SD_FLAG_CMD_COW;
             }
             break;
+        case AIOCB_DISCARD_OBJ:
+            /*
+             * We discard the object only when the whole object is
+             * 1) allocated 2) trimmed. Otherwise, simply skip it.
+             */
+            if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
+                goto done;
+            }
+            break;
         default:
             break;
         }
@@ -2071,6 +2099,28 @@ static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
 }
 
 
+static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors)
+{
+    SheepdogAIOCB *acb;
+    QEMUIOVector dummy;
+    int ret;
+
+    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    acb->aiocb_type = AIOCB_DISCARD_OBJ;
+    acb->aio_done_func = sd_finish_aiocb;
+
+    ret = sd_co_rw_vector(acb);
+    if (ret <= 0) {
+        qemu_aio_release(acb);
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+
+    return acb->ret;
+}
+
 static QEMUOptionParameter sd_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2103,6 +2153,7 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2128,6 +2179,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2153,6 +2205,7 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [sheepdog] [PATCH v4] sheepdog: add discard/trim support for sheepdog
  2013-04-15 15:52   ` [Qemu-devel] [PATCH v4] " Liu Yuan
@ 2013-04-15 16:03     ` MORITA Kazutaka
  0 siblings, 0 replies; 15+ messages in thread
From: MORITA Kazutaka @ 2013-04-15 16:03 UTC (permalink / raw)
  To: Liu Yuan; +Cc: Kevin Wolf, Paolo Bonzini, sheepdog, qemu-devel, Stefan Hajnoczi

At Mon, 15 Apr 2013 23:52:40 +0800,
Liu Yuan wrote:
> 
> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 987018e..362244a 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -34,6 +34,7 @@
>  #define SD_OP_GET_VDI_INFO   0x14
>  #define SD_OP_READ_VDIS      0x15
>  #define SD_OP_FLUSH_VDI      0x16
> +#define SD_OP_DISCARD_OBJ    0x17

I think SD_OP_DISCARD_OBJ should be 0x05 since we are using a value
less than 0x10 for SD_OP_*_OBJ.

The other parts look good to me.

Thanks,

Kazutaka

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Qemu-devel] [PATCH v5] sheepdog: add discard/trim support for sheepdog
  2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
                     ` (2 preceding siblings ...)
  2013-04-15 15:52   ` [Qemu-devel] [PATCH v4] " Liu Yuan
@ 2013-04-15 16:15   ` Liu Yuan
  2013-04-15 16:25     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
  2013-04-16  8:18     ` [Qemu-devel] " Stefan Hajnoczi
  3 siblings, 2 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-15 16:15 UTC (permalink / raw)
  To: sheepdog, qemu-devel
  Cc: Kevin Wolf, Paolo Bonzini, Stefan Hajnoczi, MORITA Kazutaka

From: Liu Yuan <tailai.ly@taobao.com>

The 'TRIM' command from VM that is to release underlying data storage for
better thin-provision is already supported by the Sheepdog.

This patch adds the TRIM support at QEMU part.

For older Sheepdog that doesn't support it, we return 0(success) to upper layer.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
v5:
 - adjust macro numbering
v4:
 - adjust discard macro
 - return success when operation is not supported by sheep
 - add coroutine_fn marker
v3:
 - fix a silly accidental deletion of 'default' in switch clause.
v2:
 - skip the object when it is not allocated

 block/sheepdog.c |   56 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 987018e..3b64690 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -27,6 +27,8 @@
 #define SD_OP_CREATE_AND_WRITE_OBJ  0x01
 #define SD_OP_READ_OBJ       0x02
 #define SD_OP_WRITE_OBJ      0x03
+/* 0x04 is used internally by Sheepdog */
+#define SD_OP_DISCARD_OBJ    0x05
 
 #define SD_OP_NEW_VDI        0x11
 #define SD_OP_LOCK_VDI       0x12
@@ -269,6 +271,7 @@ enum AIOCBState {
     AIOCB_WRITE_UDATA,
     AIOCB_READ_UDATA,
     AIOCB_FLUSH_CACHE,
+    AIOCB_DISCARD_OBJ,
 };
 
 struct SheepdogAIOCB {
@@ -656,7 +659,7 @@ static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    unsigned long idx;
+    uint64_t idx;
 
     if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
@@ -727,6 +730,20 @@ static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
         }
         break;
+    case AIOCB_DISCARD_OBJ:
+        switch (rsp.result) {
+        case SD_RES_INVALID_PARMS:
+            error_report("you are running the old sheep that doesn't support "
+                         "discard command.\n");
+            rsp.result = SD_RES_SUCCESS;
+            break;
+        case SD_RES_SUCCESS:
+            idx = data_oid_to_idx(aio_req->oid);
+            s->inode.data_vdi_id[idx] = 0;
+            break;
+        default:
+            break;
+        }
     }
 
     if (rsp.result != SD_RES_SUCCESS) {
@@ -1016,6 +1033,9 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         wlen = datalen;
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
+    case AIOCB_DISCARD_OBJ:
+        hdr.opcode = SD_OP_DISCARD_OBJ;
+        break;
     }
 
     if (s->cache_flags) {
@@ -1633,6 +1653,15 @@ static int coroutine_fn sd_co_rw_vector(void *p)
                 flags = SD_FLAG_CMD_COW;
             }
             break;
+        case AIOCB_DISCARD_OBJ:
+            /*
+             * We discard the object only when the whole object is
+             * 1) allocated 2) trimmed. Otherwise, simply skip it.
+             */
+            if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
+                goto done;
+            }
+            break;
         default:
             break;
         }
@@ -2071,6 +2100,28 @@ static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
 }
 
 
+static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors)
+{
+    SheepdogAIOCB *acb;
+    QEMUIOVector dummy;
+    int ret;
+
+    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    acb->aiocb_type = AIOCB_DISCARD_OBJ;
+    acb->aio_done_func = sd_finish_aiocb;
+
+    ret = sd_co_rw_vector(acb);
+    if (ret <= 0) {
+        qemu_aio_release(acb);
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+
+    return acb->ret;
+}
+
 static QEMUOptionParameter sd_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2103,6 +2154,7 @@ static BlockDriver bdrv_sheepdog = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2128,6 +2180,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2153,6 +2206,7 @@ static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [sheepdog] [PATCH v5] sheepdog: add discard/trim support for sheepdog
  2013-04-15 16:15   ` [Qemu-devel] [PATCH v5] " Liu Yuan
@ 2013-04-15 16:25     ` MORITA Kazutaka
  2013-04-16  8:18     ` [Qemu-devel] " Stefan Hajnoczi
  1 sibling, 0 replies; 15+ messages in thread
From: MORITA Kazutaka @ 2013-04-15 16:25 UTC (permalink / raw)
  To: Liu Yuan; +Cc: Kevin Wolf, Paolo Bonzini, sheepdog, qemu-devel, Stefan Hajnoczi

At Tue, 16 Apr 2013 00:15:04 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly@taobao.com>
> 
> The 'TRIM' command from VM that is to release underlying data storage for
> better thin-provision is already supported by the Sheepdog.
> 
> This patch adds the TRIM support at QEMU part.
> 
> For older Sheepdog that doesn't support it, we return 0(success) to upper layer.
> 
> Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
> ---
> v5:
>  - adjust macro numbering
> v4:
>  - adjust discard macro
>  - return success when operation is not supported by sheep
>  - add coroutine_fn marker
> v3:
>  - fix a silly accidental deletion of 'default' in switch clause.
> v2:
>  - skip the object when it is not allocated
> 
>  block/sheepdog.c |   56 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 55 insertions(+), 1 deletion(-)

Reviewed-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v5] sheepdog: add discard/trim support for sheepdog
  2013-04-15 16:15   ` [Qemu-devel] [PATCH v5] " Liu Yuan
  2013-04-15 16:25     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
@ 2013-04-16  8:18     ` Stefan Hajnoczi
  2013-04-16  8:47       ` Kevin Wolf
  1 sibling, 1 reply; 15+ messages in thread
From: Stefan Hajnoczi @ 2013-04-16  8:18 UTC (permalink / raw)
  To: Liu Yuan
  Cc: Kevin Wolf, sheepdog, qemu-devel, Stefan Hajnoczi, Paolo Bonzini,
	MORITA Kazutaka

On Tue, Apr 16, 2013 at 12:15:04AM +0800, Liu Yuan wrote:
> @@ -727,6 +730,20 @@ static void coroutine_fn aio_read_response(void *opaque)
>              rsp.result = SD_RES_SUCCESS;
>          }
>          break;
> +    case AIOCB_DISCARD_OBJ:
> +        switch (rsp.result) {
> +        case SD_RES_INVALID_PARMS:
> +            error_report("you are running the old sheep that doesn't support "
> +                         "discard command.\n");

error_report() does not need '\n'.

The recently added ssh block driver has a similar case when the server
does not support fsync.  It does the following:

1. Print the error message once only per volume, avoid filling up logs
   on the host.
2. Include details of the volume/server in case the users is connected
   to multiple volumes/servers.  This allows them to figure out which
   server is outdated.

This makes the error messages safe from denial-of-service and includes
more useful information.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v5] sheepdog: add discard/trim support for sheepdog
  2013-04-16  8:18     ` [Qemu-devel] " Stefan Hajnoczi
@ 2013-04-16  8:47       ` Kevin Wolf
  2013-04-16  9:08         ` Liu Yuan
  0 siblings, 1 reply; 15+ messages in thread
From: Kevin Wolf @ 2013-04-16  8:47 UTC (permalink / raw)
  To: Stefan Hajnoczi
  Cc: sheepdog, qemu-devel, Stefan Hajnoczi, Paolo Bonzini, Liu Yuan,
	MORITA Kazutaka

Am 16.04.2013 um 10:18 hat Stefan Hajnoczi geschrieben:
> On Tue, Apr 16, 2013 at 12:15:04AM +0800, Liu Yuan wrote:
> > @@ -727,6 +730,20 @@ static void coroutine_fn aio_read_response(void *opaque)
> >              rsp.result = SD_RES_SUCCESS;
> >          }
> >          break;
> > +    case AIOCB_DISCARD_OBJ:
> > +        switch (rsp.result) {
> > +        case SD_RES_INVALID_PARMS:
> > +            error_report("you are running the old sheep that doesn't support "
> > +                         "discard command.\n");
> 
> error_report() does not need '\n'.
> 
> The recently added ssh block driver has a similar case when the server
> does not support fsync.  It does the following:
> 
> 1. Print the error message once only per volume, avoid filling up logs
>    on the host.
> 2. Include details of the volume/server in case the users is connected
>    to multiple volumes/servers.  This allows them to figure out which
>    server is outdated.
> 
> This makes the error messages safe from denial-of-service and includes
> more useful information.

Or if we can check whether discard works during bdrv_open(), we could
already fail there for discard=on.

Kevin

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] [PATCH v5] sheepdog: add discard/trim support for sheepdog
  2013-04-16  8:47       ` Kevin Wolf
@ 2013-04-16  9:08         ` Liu Yuan
  0 siblings, 0 replies; 15+ messages in thread
From: Liu Yuan @ 2013-04-16  9:08 UTC (permalink / raw)
  To: Kevin Wolf
  Cc: sheepdog, Stefan Hajnoczi, qemu-devel, Stefan Hajnoczi,
	Paolo Bonzini, MORITA Kazutaka

On 04/16/2013 04:47 PM, Kevin Wolf wrote:
> Am 16.04.2013 um 10:18 hat Stefan Hajnoczi geschrieben:
>> > On Tue, Apr 16, 2013 at 12:15:04AM +0800, Liu Yuan wrote:
>>> > > @@ -727,6 +730,20 @@ static void coroutine_fn aio_read_response(void *opaque)
>>> > >              rsp.result = SD_RES_SUCCESS;
>>> > >          }
>>> > >          break;
>>> > > +    case AIOCB_DISCARD_OBJ:
>>> > > +        switch (rsp.result) {
>>> > > +        case SD_RES_INVALID_PARMS:
>>> > > +            error_report("you are running the old sheep that doesn't support "
>>> > > +                         "discard command.\n");
>> > 
>> > error_report() does not need '\n'.
>> > 
>> > The recently added ssh block driver has a similar case when the server
>> > does not support fsync.  It does the following:
>> > 
>> > 1. Print the error message once only per volume, avoid filling up logs
>> >    on the host.

All the request for the volumes are firstly handled by the sheep daemon
this QEMU connects to, so we can say that if one discard request for any
volume return SD_RES_INVALID_PARMS, then all the volumes attatched to
this VM can't support discard operation.

>> > 2. Include details of the volume/server in case the users is connected
>> >    to multiple volumes/servers.  This allows them to figure out which
>> >    server is outdated.
>> > 

Multi-connections aren't supported yet (though planned), so this doesn't
apply for current SD.

>> > This makes the error messages safe from denial-of-service and includes
>> > more useful information.
> Or if we can check whether discard works during bdrv_open(), we could
> already fail there for discard=on.

Hmm, SD doesn't support a feature negotiation request. The most simple
way I can come up is add a s->enable_discard flag that set false when it
is reported discard operation isn't supported by the server connected.
What do you think?

Thanks,
Yuan

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-04-16  9:08 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-13 11:27 [Qemu-devel] [PATCH For SD and QEMU] Add discard/trim support for Sheepdog Liu Yuan
2013-04-13 11:27 ` [Qemu-devel] [PATCH 1/3] sheep: remove unused 'flags' in remove_object() Liu Yuan
2013-04-13 11:27 ` [Qemu-devel] [PATCH 2/3] sheep: teach sheep to discard unused objects Liu Yuan
2013-04-13 11:27 ` [Qemu-devel] [PATCH 3/3] tests: add 058 to test discard/trim Liu Yuan
2013-04-13 11:27 ` [Qemu-devel] [PATCH for QEMU] sheepdog: add discard/trim support for sheepdog Liu Yuan
2013-04-14  5:07   ` [Qemu-devel] [PATCH for QEMU v2] " Liu Yuan
2013-04-14  5:16   ` [Qemu-devel] [PATCH for QEMU v3] " Liu Yuan
2013-04-15 15:10     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
2013-04-15 15:52   ` [Qemu-devel] [PATCH v4] " Liu Yuan
2013-04-15 16:03     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
2013-04-15 16:15   ` [Qemu-devel] [PATCH v5] " Liu Yuan
2013-04-15 16:25     ` [Qemu-devel] [sheepdog] " MORITA Kazutaka
2013-04-16  8:18     ` [Qemu-devel] " Stefan Hajnoczi
2013-04-16  8:47       ` Kevin Wolf
2013-04-16  9:08         ` Liu Yuan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).