All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] IO hint for no caching object for cache-tiering
@ 2015-05-25  3:32 Li Wang
  2015-05-25  3:32 ` [PATCH 1/2] Rados: cache-tierng support CEPH_OP_FLAG_TIER_NOCACHE Li Wang
  2015-05-25  3:32 ` [PATCH 2/2] Rados: add test case for CEPH_OP_FLAG_TIER_NOCACHE Li Wang
  0 siblings, 2 replies; 3+ messages in thread
From: Li Wang @ 2015-05-25  3:32 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel, Min Chen, Li Wang

The conventional io hints by fadvise() is to give chance
for applications to manipulate page cache. This patch extends
io hint ability to control cache pool behavior to avoid cache 
pollution. For example, under WRITEBACK mode, consider the 
following operation series, WRITE A; WRITE B; READ A, 
if B is never accessed later, then the write of B pollutes 
the cache by forcing A evicted, the reuse of A causes a cache miss. 
With the io hint, we could explicitly tell rados to not to 
cache B, thereby avoid cache pollution.

The patches:
https://github.com/ceph/ceph/pull/4754

Min Chen (2):
  Rados: cache-tierng support CEPH_OP_FLAG_TIER_NOCACHE
  Rados: add test case for CEPH_OP_FLAG_TIER_NOCACHE

 src/include/rados.h            |   1 +
 src/include/rados/librados.h   |   1 +
 src/include/rados/librados.hpp |   1 +
 src/librados/librados.cc       |   2 +
 src/osd/ReplicatedPG.cc        |   5 ++
 src/osd/osd_types.cc           |   1 +
 src/test/librados/tier.cc      | 176 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 187 insertions(+)

-- 
1.9.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] Rados: cache-tierng support CEPH_OP_FLAG_TIER_NOCACHE
  2015-05-25  3:32 [PATCH 0/2] IO hint for no caching object for cache-tiering Li Wang
@ 2015-05-25  3:32 ` Li Wang
  2015-05-25  3:32 ` [PATCH 2/2] Rados: add test case for CEPH_OP_FLAG_TIER_NOCACHE Li Wang
  1 sibling, 0 replies; 3+ messages in thread
From: Li Wang @ 2015-05-25  3:32 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel, Min Chen

From: Min Chen <minchen@ubuntukylin.com>

Signed-off-by: Min Chen <minchen@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
 src/include/rados.h            | 1 +
 src/include/rados/librados.h   | 1 +
 src/include/rados/librados.hpp | 1 +
 src/librados/librados.cc       | 2 ++
 src/osd/ReplicatedPG.cc        | 5 +++++
 src/osd/osd_types.cc           | 1 +
 6 files changed, 11 insertions(+)

diff --git a/src/include/rados.h b/src/include/rados.h
index 424bef1..203e9ad 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -410,6 +410,7 @@ enum {
 						      pool uses pool snaps */
 	CEPH_OSD_FLAG_REDIRECTED   = 0x200000,  /* op has been redirected */
 	CEPH_OSD_FLAG_KNOWN_REDIR = 0x400000,  /* redirect bit is authoritative */
+	CEPH_OSD_FLAG_TIER_NOCACHE = 0x800000,  /* DO NOT cache data in any cache-mode */
 };
 
 enum {
diff --git a/src/include/rados/librados.h b/src/include/rados/librados.h
index 8d8d11b..41c3b14 100644
--- a/src/include/rados/librados.h
+++ b/src/include/rados/librados.h
@@ -120,6 +120,7 @@ enum {
   LIBRADOS_OPERATION_IGNORE_CACHE       = 8,
   LIBRADOS_OPERATION_SKIPRWLOCKS        = 16,
   LIBRADOS_OPERATION_IGNORE_OVERLAY     = 32,
+  LIBRADOS_OPERATION_TIER_NOCACHE     = 64,
 };
 /** @} */
 
diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp
index 6a0253d..3aeecc9 100644
--- a/src/include/rados/librados.hpp
+++ b/src/include/rados/librados.hpp
@@ -259,6 +259,7 @@ namespace librados
     OPERATION_IGNORE_CACHE       = LIBRADOS_OPERATION_IGNORE_CACHE,
     OPERATION_SKIPRWLOCKS        = LIBRADOS_OPERATION_SKIPRWLOCKS,
     OPERATION_IGNORE_OVERLAY     = LIBRADOS_OPERATION_IGNORE_OVERLAY,
+    OPERATION_TIER_NOCACHE     = LIBRADOS_OPERATION_TIER_NOCACHE,
   };
 
   /*
diff --git a/src/librados/librados.cc b/src/librados/librados.cc
index a9eadfa..d5dd5fb 100644
--- a/src/librados/librados.cc
+++ b/src/librados/librados.cc
@@ -1290,6 +1290,8 @@ static int translate_flags(int flags)
     op_flags |= CEPH_OSD_FLAG_SKIPRWLOCKS;
   if (flags & librados::OPERATION_IGNORE_OVERLAY)
     op_flags |= CEPH_OSD_FLAG_IGNORE_OVERLAY;
+  if (flags & librados::OPERATION_TIER_NOCACHE)
+    op_flags |= CEPH_OSD_FLAG_TIER_NOCACHE;
 
   return op_flags;
 }
diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc
index 4c549a5..36a6269 100644
--- a/src/osd/ReplicatedPG.cc
+++ b/src/osd/ReplicatedPG.cc
@@ -1807,6 +1807,11 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
   MOSDOp *m = static_cast<MOSDOp*>(op->get_req());
   const object_locator_t& oloc = m->get_object_locator();
 
+  if (m->has_flag(CEPH_OSD_FLAG_TIER_NOCACHE)) {
+    do_cache_redirect(op);
+    return true;
+  }
+
   if (must_promote || op->need_promote()) {
     promote_object(obc, missing_oid, oloc, op);
     return true;
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index a73b46f..8caae62 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -50,6 +50,7 @@ const char *ceph_osd_flag_name(unsigned flag)
   case CEPH_OSD_FLAG_ENFORCE_SNAPC: return "enforce_snapc";
   case CEPH_OSD_FLAG_REDIRECTED: return "redirected";
   case CEPH_OSD_FLAG_KNOWN_REDIR: return "known_if_redirected";
+  case CEPH_OSD_FLAG_TIER_NOCACHE: return "tier_nocache";
   default: return "???";
   }
 }
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] Rados: add test case for CEPH_OP_FLAG_TIER_NOCACHE
  2015-05-25  3:32 [PATCH 0/2] IO hint for no caching object for cache-tiering Li Wang
  2015-05-25  3:32 ` [PATCH 1/2] Rados: cache-tierng support CEPH_OP_FLAG_TIER_NOCACHE Li Wang
@ 2015-05-25  3:32 ` Li Wang
  1 sibling, 0 replies; 3+ messages in thread
From: Li Wang @ 2015-05-25  3:32 UTC (permalink / raw)
  To: Sage Weil; +Cc: ceph-devel, Min Chen

From: Min Chen <minchen@ubuntukylin.com>

Signed-off-by: Min Chen <minchen@ubuntukylin.com>
Reviewed-by: Li Wang <liwang@ubuntukylin.com>
---
 src/test/librados/tier.cc | 176 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)

diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc
index 60b56f9..0b3368e 100644
--- a/src/test/librados/tier.cc
+++ b/src/test/librados/tier.cc
@@ -2390,6 +2390,94 @@ TEST_F(LibRadosTwoPoolsPP, ProxyRead) {
   cluster.wait_for_latest_osdmap();
 }
 
+TEST_F(LibRadosTwoPoolsPP, TierNocache) {
+  // configure cache
+  bufferlist inbl;
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
+    "\", \"tierpool\": \"" + cache_pool_name +
+    "\", \"force_nonempty\": \"--force-nonempty\" }",
+    inbl, NULL, NULL));
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
+    "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
+    inbl, NULL, NULL));
+
+  std::string cache_modes[] = {"writeback", "forward", "readonly", "readforward", "readproxy"};
+  int count = (int) sizeof(cache_modes)/sizeof(cache_modes[0]);
+  int i;
+  // test write/read with TierNocache in each cache-mode
+  for (i = 0; i < count; i++)
+  {
+    std::cout << "set cache-mode:" + cache_modes[i] << std::endl;
+    ASSERT_EQ(0, cluster.mon_command(
+      "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
+      "\", \"mode\": \"" + cache_modes[i] + "\"}",
+      inbl, NULL, NULL));
+
+    // wait for maps to settle
+    cluster.wait_for_latest_osdmap();
+
+    std::string content(cache_modes[i]);
+    std::string object(cache_modes[i]+"_obj");
+    ObjectWriteOperation wr;
+    librados::AioCompletion *completion = cluster.aio_create_completion();
+
+    //writeback: create a new object
+    bufferlist bl;
+    bl.append(content);
+    wr.write_full(bl);
+    ioctx.aio_operate(object, completion, &wr, librados::OPERATION_TIER_NOCACHE);
+    completion->wait_for_safe();
+    completion->release();
+
+    // verify the object is NOT present in the cache tier
+    {
+      NObjectIterator it = cache_ioctx.nobjects_begin();
+      ASSERT_TRUE(it == cache_ioctx.nobjects_end());
+    }
+
+    //writeback: read the object content
+    ObjectReadOperation rd;
+    uint64_t len = bl.length();
+    completion = cluster.aio_create_completion();
+    bufferlist bl2;
+    bufferlist bl3;
+    rd.read(0, len+1, &bl3, NULL);
+    ASSERT_EQ(0, ioctx.aio_operate(
+	object, completion, &rd,
+	librados::OPERATION_TIER_NOCACHE, NULL));
+    completion->wait_for_complete();
+
+    ASSERT_EQ(0, completion->get_return_value());
+    uint64_t n = 0;
+    for (n = 0; n < len; n++) {
+	bl2.append(bl3[n]);
+    }
+    ASSERT_EQ(content, bl2.c_str());
+    completion->release();
+
+    // verify the object is NOT present in the cache tier
+    {
+      NObjectIterator it = cache_ioctx.nobjects_begin();
+      ASSERT_TRUE(it == cache_ioctx.nobjects_end());
+    }
+  }
+
+  // tear down tiers
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
+    "\"}",
+    inbl, NULL, NULL));
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
+    "\", \"tierpool\": \"" + cache_pool_name + "\"}",
+    inbl, NULL, NULL));
+
+  // wait for maps to settle before next test
+  cluster.wait_for_latest_osdmap();
+}
+
 class LibRadosTwoPoolsECPP : public RadosTestECPP
 {
 public:
@@ -4439,6 +4527,94 @@ TEST_F(LibRadosTwoPoolsECPP, ProxyRead) {
   cluster.wait_for_latest_osdmap();
 }
 
+TEST_F(LibRadosTwoPoolsECPP, TierNocache) {
+  // configure cache
+  bufferlist inbl;
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
+    "\", \"tierpool\": \"" + cache_pool_name +
+    "\", \"force_nonempty\": \"--force-nonempty\" }",
+    inbl, NULL, NULL));
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
+    "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
+    inbl, NULL, NULL));
+
+  std::string cache_modes[] = {"writeback", "forward", "readonly", "readforward", "readproxy"};
+  int count = (int) sizeof(cache_modes)/sizeof(cache_modes[0]);
+  int i;
+  // test write/read with TierNocache in each cache-mode
+  for (i = 0; i < count; i++)
+  {
+    std::cout << "set cache-mode:" + cache_modes[i] << std::endl;
+    ASSERT_EQ(0, cluster.mon_command(
+      "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
+      "\", \"mode\": \"" + cache_modes[i] + "\"}",
+      inbl, NULL, NULL));
+
+    // wait for maps to settle
+    cluster.wait_for_latest_osdmap();
+
+    std::string content(cache_modes[i]);
+    std::string object(cache_modes[i]+"_obj");
+    ObjectWriteOperation wr;
+    librados::AioCompletion *completion = cluster.aio_create_completion();
+
+    //writeback: create a new object
+    bufferlist bl;
+    bl.append(content);
+    wr.write_full(bl);
+    ioctx.aio_operate(object, completion, &wr, librados::OPERATION_TIER_NOCACHE);
+    completion->wait_for_safe();
+    completion->release();
+
+    // verify the object is NOT present in the cache tier
+    {
+      NObjectIterator it = cache_ioctx.nobjects_begin();
+      ASSERT_TRUE(it == cache_ioctx.nobjects_end());
+    }
+
+    //writeback: read the object content
+    ObjectReadOperation rd;
+    uint64_t len = bl.length();
+    completion = cluster.aio_create_completion();
+    bufferlist bl2;
+    bufferlist bl3;
+    rd.read(0, len+1, &bl3, NULL);
+    ASSERT_EQ(0, ioctx.aio_operate(
+	object, completion, &rd,
+	librados::OPERATION_TIER_NOCACHE, NULL));
+    completion->wait_for_complete();
+
+    ASSERT_EQ(0, completion->get_return_value());
+    uint64_t n = 0;
+    for (n = 0; n < len; n++) {
+	bl2.append(bl3[n]);
+    }
+    ASSERT_EQ(content, bl2.c_str());
+    completion->release();
+
+    // verify the object is NOT present in the cache tier
+    {
+      NObjectIterator it = cache_ioctx.nobjects_begin();
+      ASSERT_TRUE(it == cache_ioctx.nobjects_end());
+    }
+  }
+
+  // tear down tiers
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
+    "\"}",
+    inbl, NULL, NULL));
+  ASSERT_EQ(0, cluster.mon_command(
+    "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
+    "\", \"tierpool\": \"" + cache_pool_name + "\"}",
+    inbl, NULL, NULL));
+
+  // wait for maps to settle before next test
+  cluster.wait_for_latest_osdmap();
+}
+
 //Make ecpool as cache pool; no-ecpool as data pool
 //Judge promote object which has omap from no-ecpool into ecpool.
 TEST_F(LibRadosTwoPoolsECPP, OmapOperation) {
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-05-25  3:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-25  3:32 [PATCH 0/2] IO hint for no caching object for cache-tiering Li Wang
2015-05-25  3:32 ` [PATCH 1/2] Rados: cache-tierng support CEPH_OP_FLAG_TIER_NOCACHE Li Wang
2015-05-25  3:32 ` [PATCH 2/2] Rados: add test case for CEPH_OP_FLAG_TIER_NOCACHE Li Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.