* [PATCH 1/3] *v2* librbd: copy-on-read for clones, add an option for copy-on-read
2014-05-13 9:12 [PATCH 0/3] librbd copy-on-read v2 Min Chen
@ 2014-05-13 9:12 ` Min Chen
2014-05-13 9:12 ` [PATCH 2/3] *v2* librbd: copy-on-read for clones, read entire object from parent Min Chen
2014-05-13 9:12 ` [PATCH 3/3] *v2* librbd: copy-on-read for clones, write entire object into child asychronously Min Chen
2 siblings, 0 replies; 4+ messages in thread
From: Min Chen @ 2014-05-13 9:12 UTC (permalink / raw)
To: josh.durgin; +Cc: ceph-devel
Add an option rbd_clone_cor to enable copy-on-read
Signed-off-by: Min Chen <minchen@ubuntukylin.com>
Signed-off-by: Li Wang <liwang@ubuntukylin.com>
Signed-off-by: Yunchuan Wen Chen <yunchuanwen@ubuntukylin.com>
---
src/common/config_opts.h | 1 +
src/librbd/AioRequest.cc | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index a065a77..15438ce 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -711,6 +711,7 @@ OPTION(rbd_balance_snap_reads, OPT_BOOL, false)
OPTION(rbd_localize_snap_reads, OPT_BOOL, false)
OPTION(rbd_balance_parent_reads, OPT_BOOL, false)
OPTION(rbd_localize_parent_reads, OPT_BOOL, true)
+OPTION(rbd_clone_cor, OPT_BOOL, false)//copy-on-read option for rbd clones
/*
* The following options change the behavior for librbd's image creation methods that
diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc
index 5cf9a11..4cc6b4e 100644
--- a/src/librbd/AioRequest.cc
+++ b/src/librbd/AioRequest.cc
@@ -59,6 +59,10 @@ namespace librbd {
ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len
<< " r = " << r << dendl;
+ //get copy-on-read option and check image if read_only
+ bool COR = (m_ictx->cct->_conf->rbd_clone_cor) && (!m_ictx->read_only);
+ ldout(m_ictx->cct, 20) << "should_complete COR = " << COR << " read_only = " << m_ictx->read_only << dendl;
+
if (!m_tried_parent && r == -ENOENT) {
RWLock::RLocker l(m_ictx->snap_lock);
RWLock::RLocker l2(m_ictx->parent_lock);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 2/3] *v2* librbd: copy-on-read for clones, read entire object from parent
2014-05-13 9:12 [PATCH 0/3] librbd copy-on-read v2 Min Chen
2014-05-13 9:12 ` [PATCH 1/3] *v2* librbd: copy-on-read for clones, add an option for copy-on-read Min Chen
@ 2014-05-13 9:12 ` Min Chen
2014-05-13 9:12 ` [PATCH 3/3] *v2* librbd: copy-on-read for clones, write entire object into child asychronously Min Chen
2 siblings, 0 replies; 4+ messages in thread
From: Min Chen @ 2014-05-13 9:12 UTC (permalink / raw)
To: josh.durgin; +Cc: ceph-devel
rbd copy-on-read is implmented in AioRequest. If object extent doesn't exist in clone, then read
the entire range of object from parent. Bufferlist m_entire_data is added to AioRequest to save object
instead of m_read_data which just keeps data read
Signed-off-by: Min Chen <minchen@ubuntukylin.com>
Signed-off-by: Li Wang <liwang@ubuntukylin.com>
Signed-off-by: Yunchuan Wen Chen <yunchuanwen@ubuntukylin.com>
---
src/librbd/AioRequest.cc | 51 +++++++++++++++++++++++++++++++++++++++++++---
src/librbd/AioRequest.h | 3 +++
2 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc
index 4cc6b4e..1a372aa 100644
--- a/src/librbd/AioRequest.cc
+++ b/src/librbd/AioRequest.cc
@@ -22,7 +22,7 @@ namespace librbd {
m_ictx(NULL), m_ioctx(NULL),
m_object_no(0), m_object_off(0), m_object_len(0),
m_snap_id(CEPH_NOSNAP), m_completion(NULL), m_parent_completion(NULL),
- m_hide_enoent(false) {}
+ m_hide_enoent(false), m_parent_completion_cor(NULL){}
AioRequest::AioRequest(ImageCtx *ictx, const std::string &oid,
uint64_t objectno, uint64_t off, uint64_t len,
librados::snap_t snap_id,
@@ -31,13 +31,17 @@ namespace librbd {
m_ictx(ictx), m_ioctx(&ictx->data_ctx), m_oid(oid), m_object_no(objectno),
m_object_off(off), m_object_len(len), m_snap_id(snap_id),
m_completion(completion), m_parent_completion(NULL),
- m_hide_enoent(hide_enoent) {}
+ m_hide_enoent(hide_enoent), m_parent_completion_cor(NULL) {}
AioRequest::~AioRequest() {
if (m_parent_completion) {
m_parent_completion->release();
m_parent_completion = NULL;
}
+ if (m_parent_completion_cor) {
+ m_parent_completion_cor->release();
+ m_parent_completion_cor = NULL;
+ }
}
void AioRequest::read_from_parent(vector<pair<uint64_t,uint64_t> >& image_extents)
@@ -52,6 +56,19 @@ namespace librbd {
m_parent_completion);
}
+ //copy-on-read : read the entire object from parent, using bufferlist m_entire_object
+ void AioRequest::read_from_parent_COR(vector<pair<uint64_t,uint64_t> >& image_extents)
+ {
+ assert(!m_parent_completion_cor);
+ m_parent_completion_cor = aio_create_completion_internal(this, rbd_req_cb);
+ ldout(m_ictx->cct, 20) << "read_from_parent_COR this = " << this
+ << " parent completion cor " << m_parent_completion_cor
+ << " extents " << image_extents
+ << dendl;
+ aio_read(m_ictx->parent, image_extents, NULL, &m_entire_object,
+ m_parent_completion_cor);
+ }
+
/** read **/
bool AioRead::should_complete(int r)
@@ -81,11 +98,39 @@ namespace librbd {
uint64_t object_overlap = m_ictx->prune_parent_extents(image_extents, image_overlap);
if (object_overlap) {
m_tried_parent = true;
- read_from_parent(image_extents);
+ if (COR) {//copy-on-read option
+ vector<pair<uint64_t,uint64_t> > extend_image_extents;
+ //extend range to entire object
+ Striper::extent_to_file(m_ictx->cct, &m_ictx->layout,
+ m_object_no, 0, m_ictx->layout.fl_object_size,
+ extend_image_extents);
+ //read entire object from parent , and put it in m_entire_object
+ read_from_parent_COR(extend_image_extents);
+ } else {
+ read_from_parent(image_extents);
+ }
return false;
}
}
+ if (COR) {//copy-on-read option
+ //if read entire object from parent success
+ if (m_tried_parent && r > 0) {
+ vector<pair<uint64_t,uint64_t> > image_extents;
+ Striper::extent_to_file(m_ictx->cct, &m_ictx->layout,
+ m_object_no, m_object_off, m_object_len,
+ image_extents);
+ uint64_t image_overlap = 0;
+ int r = m_ictx->get_parent_overlap(m_snap_id, &image_overlap);
+ if (r < 0) {
+ assert(0 == "FIXME");
+ }
+ m_ictx->prune_parent_extents(image_extents, image_overlap);
+ // copy the read range to m_read_data
+ m_read_data.substr_of(m_entire_object, m_object_off, m_object_len);
+ }
+ }
+
return true;
}
diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h
index d6103f9..00349b2 100644
--- a/src/librbd/AioRequest.h
+++ b/src/librbd/AioRequest.h
@@ -47,6 +47,7 @@ namespace librbd {
protected:
void read_from_parent(vector<pair<uint64_t,uint64_t> >& image_extents);
+ void read_from_parent_COR(vector<pair<uint64_t,uint64_t> >& image_extents);
ImageCtx *m_ictx;
librados::IoCtx *m_ioctx;
@@ -57,6 +58,8 @@ namespace librbd {
AioCompletion *m_parent_completion;
ceph::bufferlist m_read_data;
bool m_hide_enoent;
+ ceph::bufferlist m_entire_object;//copy-on-read : store the entire object
+ AioCompletion *m_parent_completion_cor;//copy-on-read : AioCompletion for read from parent
};
class AioRead : public AioRequest {
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 3/3] *v2* librbd: copy-on-read for clones, write entire object into child asychronously
2014-05-13 9:12 [PATCH 0/3] librbd copy-on-read v2 Min Chen
2014-05-13 9:12 ` [PATCH 1/3] *v2* librbd: copy-on-read for clones, add an option for copy-on-read Min Chen
2014-05-13 9:12 ` [PATCH 2/3] *v2* librbd: copy-on-read for clones, read entire object from parent Min Chen
@ 2014-05-13 9:12 ` Min Chen
2 siblings, 0 replies; 4+ messages in thread
From: Min Chen @ 2014-05-13 9:12 UTC (permalink / raw)
To: josh.durgin; +Cc: ceph-devel
The object has been already saved in m_entire_object bufferlist.
Send "copyup" to osd with m_entire_object, then cls_rbd copyup will write the object to child.
Put AioCompletion in xlist, and remove it after write request finished.
Add a function xlist::iterator:: item *get_cur() {return cur;} in include/xlist.h to support removing item in xlist.
Signed-off-by: Min Chen <minchen@ubuntukylin.com>
Signed-off-by: Li Wang <liwang@ubuntukylin.com>
Signed-off-by: Yunchuan Wen Chen <yunchuanwen@ubuntukylin.com>
---
src/include/xlist.h | 1 +
src/librbd/AioRequest.cc | 33 ++++++++++++++++++++++
src/librbd/AioRequest.h | 1 +
src/librbd/ImageCtx.cc | 68 ++++++++++++++++++++++++++++++++++++++++++++++
src/librbd/ImageCtx.h | 6 ++++
src/librbd/internal.cc | 4 +++
6 files changed, 113 insertions(+)
diff --git a/src/include/xlist.h b/src/include/xlist.h
index 5384561..3932c40 100644
--- a/src/include/xlist.h
+++ b/src/include/xlist.h
@@ -157,6 +157,7 @@ public:
return *this;
}
bool end() const { return cur == 0; }
+ item *get_cur() const { return cur; }
};
iterator begin() { return iterator(_front); }
diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc
index 1a372aa..89b7c3b 100644
--- a/src/librbd/AioRequest.cc
+++ b/src/librbd/AioRequest.cc
@@ -71,6 +71,38 @@ namespace librbd {
/** read **/
+ //copy-on-read: after read entire object, just write it into child
+ ssize_t AioRead::write_COR()
+ {
+ ldout(m_ictx->cct, 20) << "write_COR" << dendl;
+ int ret = 0;
+
+ m_ictx->snap_lock.get_read();
+ ::SnapContext snapc = m_ictx->snapc;
+ m_ictx->snap_lock.put_read();
+
+ librados::ObjectWriteOperation copyup_cor;
+ copyup_cor.exec("rbd", "copyup", m_entire_object);
+
+ std::vector<librados::snap_t> m_snaps;
+ for (std::vector<snapid_t>::const_iterator it = snapc.snaps.begin();
+ it != snapc.snaps.end(); ++it) {
+ m_snaps.push_back(it->val);
+ }
+
+ librados::AioCompletion *cor_completion =
+ librados::Rados::aio_create_completion(m_ictx, librbd::cor_completion_callback, NULL);
+
+ xlist<librados::AioCompletion *>::item *comp =
+ new xlist<librados::AioCompletion *>::item(cor_completion);
+
+ m_ictx->add_cor_completion(comp);//add cor_completion to xlist
+ //asynchronously write object
+ ret = m_ictx->md_ctx.aio_operate(m_oid, cor_completion, ©up_cor, snapc.seq.val, m_snaps);
+
+ return ret;
+ }
+
bool AioRead::should_complete(int r)
{
ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len
@@ -128,6 +160,7 @@ namespace librbd {
m_ictx->prune_parent_extents(image_extents, image_overlap);
// copy the read range to m_read_data
m_read_data.substr_of(m_entire_object, m_object_off, m_object_len);
+ write_COR();
}
}
diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h
index 00349b2..f5daada 100644
--- a/src/librbd/AioRequest.h
+++ b/src/librbd/AioRequest.h
@@ -75,6 +75,7 @@ namespace librbd {
m_tried_parent(false), m_sparse(sparse) {
}
virtual ~AioRead() {}
+ ssize_t write_COR();
virtual bool should_complete(int r);
virtual int send();
diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc
index 6477e8d..f74eafb 100644
--- a/src/librbd/ImageCtx.cc
+++ b/src/librbd/ImageCtx.cc
@@ -45,6 +45,7 @@ namespace librbd {
snap_lock("librbd::ImageCtx::snap_lock"),
parent_lock("librbd::ImageCtx::parent_lock"),
refresh_lock("librbd::ImageCtx::refresh_lock"),
+ cor_lock("librbd::ImageCtx::cor_lock"),
extra_read_flags(0),
old_format(true),
order(0), size(0), features(0),
@@ -96,6 +97,7 @@ namespace librbd {
object_set->return_enoent = true;
object_cacher->start();
}
+ cor_completions = new xlist<librados::AioCompletion*>();
}
ImageCtx::~ImageCtx() {
@@ -112,6 +114,10 @@ namespace librbd {
delete object_set;
object_set = NULL;
}
+ if (cor_completions) {
+ delete cor_completions;
+ cor_completions = NULL;
+ }
delete[] format_string;
}
@@ -648,4 +654,66 @@ namespace librbd {
<< " from image extents " << objectx << dendl;
return len;
}
+
+ void ImageCtx::add_cor_completion(xlist<librados::AioCompletion*>::item *comp)
+ {
+ if(!comp)
+ return;
+
+ cor_lock.Lock();
+ cor_completions->push_back(comp);
+ cor_lock.Unlock();
+
+ ldout(cct, 10) << "add_cor_completion:: size = "<< cor_completions->size() << dendl;
+ }
+
+ void ImageCtx::wait_last_completions()
+ {
+ ldout(cct, 10) << "wait_last_completions:: cor_completions = " << cor_completions << " size = " << cor_completions->size() << dendl;
+ xlist<librados::AioCompletion*>::iterator itr;
+ xlist<librados::AioCompletion*>::item *ptr;
+
+ while (!cor_completions->empty()){
+ cor_lock.Lock();
+ librados::AioCompletion *comp = cor_completions->front();
+ comp->wait_for_complete();
+ itr = cor_completions->begin();
+ ptr = itr.get_cur();
+ cor_completions->pop_front();
+ delete ptr;
+ ptr = NULL;
+ cor_lock.Unlock();
+ }
+ ldout(cct, 10) << "wait_last_completions:: after clear cor_completions = " << cor_completions << " size = " << cor_completions->size() << dendl;
+ }
+
+ void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg)
+ {
+ librbd::ImageCtx * ictx = (librbd::ImageCtx *)arg;
+
+ ictx->cor_lock.Lock();
+ xlist<librados::AioCompletion*> *completions = ictx->cor_completions;
+ ictx->cor_lock.Unlock();
+
+ ldout(ictx->cct, 10) << "cor_completion_callback:: cor_completions = " << completions << " size = "<< completions->size() << dendl;
+ if (!completions)
+ return;
+
+ //find current AioCompletion item in xlist, and remove it
+ for (xlist<librados::AioCompletion*>::iterator itr = completions->begin(); !(itr.end()); ++itr) {
+ if (aio_completion_impl == (*itr)->pc){
+ xlist<librados::AioCompletion*>::item *ptr = itr.get_cur();
+
+ ictx->cor_lock.Lock();
+ completions->remove(ptr);
+ ictx->cor_lock.Unlock();
+
+ delete ptr;//delete xlist<librados::AioCompletion*>::item *
+ ptr = NULL;
+ break;
+ }
+ }
+ ldout(ictx->cct, 10) << "cor_completion_callback:: after remove item, size = " << completions->size() << dendl;
+ }
+
}
diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h
index 026a3e0..e1d08c9 100644
--- a/src/librbd/ImageCtx.h
+++ b/src/librbd/ImageCtx.h
@@ -68,6 +68,7 @@ namespace librbd {
RWLock snap_lock; // protects snapshot-related member variables:
RWLock parent_lock; // protects parent_md and parent
Mutex refresh_lock; // protects refresh_seq and last_refresh
+ Mutex cor_lock; //protects cor_completions for copy-on-read
unsigned extra_read_flags;
@@ -89,6 +90,8 @@ namespace librbd {
LibrbdWriteback *writeback_handler;
ObjectCacher::ObjectSet *object_set;
+ xlist<librados::AioCompletion*> *cor_completions; //copy-on-read AioCompletions
+
/**
* Either image_name or image_id must be set.
* If id is not known, pass the empty std::string,
@@ -148,7 +151,10 @@ namespace librbd {
uint64_t prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx,
uint64_t overlap);
+ void add_cor_completion(xlist<librados::AioCompletion*>::item *comp);
+ void wait_last_completions();//wait for uncompleted asynchronous write which is still in xlist
};
+ void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg);
}
#endif
diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc
index 127be38..defbb46 100644
--- a/src/librbd/internal.cc
+++ b/src/librbd/internal.cc
@@ -2101,6 +2101,10 @@ reprotect_and_return_err:
void close_image(ImageCtx *ictx)
{
ldout(ictx->cct, 20) << "close_image " << ictx << dendl;
+
+ if(ictx->cor_completions)
+ ictx->wait_last_completions();//copy-on-read: wait for unfinished AioCompletion requests
+
if (ictx->object_cacher)
ictx->shutdown_cache(); // implicitly flushes
else
--
1.7.10.4
^ permalink raw reply related [flat|nested] 4+ messages in thread