All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Yan, Zheng" <zheng.z.yan@intel.com>
To: "Yan, Zheng" <zheng.z.yan@intel.com>
Cc: ceph-devel@vger.kernel.org, sage@inktank.com, greg@inktank.com,
	sam.lang@inktank.com
Subject: Re: [PATCH 29/30] mds: open inode by ino
Date: Mon, 27 May 2013 10:23:04 +0800	[thread overview]
Message-ID: <51A2C388.5060404@intel.com> (raw)
In-Reply-To: <1369296418-14871-30-git-send-email-zheng.z.yan@intel.com>

updated version. The main change is introducing a "want_xlocked" mode.
The mode is for opening remote link.

---
From 8bbc796572fbafb308f605432297ede547ba6b60 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 15 May 2013 10:28:58 +0800
Subject: [PATCH 30/33] mds: open inode by ino

This patch adds "open-by-ino" helper. It utilizes backtrace to find
inode's path and open the inode. The algorithm looks like:

1. Check MDS peers. If any MDS has the inode in its cache, goto step 6.
2. Fetch backtrace. If backtrace was previously fetched and get the
   same backtrace again, return -EIO.
3. Traverse the path in backtrace. If the inode is found, goto step 6;
   if non-auth dirfrag is encountered, goto next step. If fail to find
   the inode in its parent dir, goto step 1.
4. Request MDS peers to traverse the path in backtrace. If the inode
   is found, goto step 6. If MDS peer encounters non-auth dirfrag, it
   stops traversing. If any MDS peer fails to find the inode in its
   parent dir, goto step 1.
5. Use the same algorithm to open the inode's parent. Goto step 3 if
   succeeds; goto step 1 if fails.
6. return the inode's auth MDS ID.

The algorithm has two main assumptions:
1. If an inode is in its auth MDS's cache, its on-disk backtrace
   can be out of date.
2. If an inode is not in any MDS's cache, its on-disk backtrace
   must be up to date.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 src/mds/MDCache.cc              | 440 +++++++++++++++++++++++++++++++++++++++-
 src/mds/MDCache.h               |  43 ++++
 src/mds/MDS.cc                  |  15 +-
 src/mds/MDSMap.h                |   7 +
 src/mds/inode_backtrace.h       |   4 +
 src/messages/MMDSOpenIno.h      |  46 +++++
 src/messages/MMDSOpenInoReply.h |  53 +++++
 src/msg/Message.cc              |   9 +
 src/msg/Message.h               |   2 +
 9 files changed, 612 insertions(+), 7 deletions(-)
 create mode 100644 src/messages/MMDSOpenIno.h
 create mode 100644 src/messages/MMDSOpenInoReply.h

diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index cd47786..8965e1b 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -79,6 +79,9 @@
 #include "messages/MMDSFindIno.h"
 #include "messages/MMDSFindInoReply.h"
 
+#include "messages/MMDSOpenIno.h"
+#include "messages/MMDSOpenInoReply.h"
+
 #include "messages/MClientRequest.h"
 #include "messages/MClientCaps.h"
 #include "messages/MClientSnap.h"
@@ -2725,6 +2728,7 @@ void MDCache::handle_mds_failure(int who)
   }
 
   kick_find_ino_peers(who);
+  kick_open_ino_peers(who);
 
   show_subtrees();  
 }
@@ -2784,7 +2788,7 @@ void MDCache::handle_mds_recovery(int who)
   }
 
   kick_discovers(who);
-
+  kick_open_ino_peers(who);
   kick_find_ino_peers(who);
 
   // queue them up.
@@ -7030,6 +7034,13 @@ void MDCache::dispatch(Message *m)
   case MSG_MDS_FINDINOREPLY:
     handle_find_ino_reply(static_cast<MMDSFindInoReply *>(m));
     break;
+
+  case MSG_MDS_OPENINO:
+    handle_open_ino(static_cast<MMDSOpenIno *>(m));
+    break;
+  case MSG_MDS_OPENINOREPLY:
+    handle_open_ino_reply(static_cast<MMDSOpenInoReply *>(m));
+    break;
     
   default:
     dout(7) << "cache unknown message " << m->get_type() << dendl;
@@ -7730,6 +7741,433 @@ void MDCache::make_trace(vector<CDentry*>& trace, CInode *in)
 }
 
 
+// -------------------------------------------------------------------------------
+// Open inode by inode number
+
+class C_MDC_OpenInoBacktraceFetched : public Context {
+  MDCache *cache;
+  inodeno_t ino;
+  public:
+  bufferlist bl;
+  C_MDC_OpenInoBacktraceFetched(MDCache *c, inodeno_t i) :
+    cache(c), ino(i) {}
+  void finish(int r) {
+    cache->_open_ino_backtrace_fetched(ino, bl, r);
+  }
+};
+
+struct C_MDC_OpenInoTraverseDir : public Context {
+  MDCache *cache;
+  inodeno_t ino;
+  public:
+  C_MDC_OpenInoTraverseDir(MDCache *c, inodeno_t i) : cache(c), ino(i) {}
+  void finish(int r) {
+    assert(cache->opening_inodes.count(ino));
+    cache->_open_ino_traverse_dir(ino, cache->opening_inodes[ino], r);
+  }
+};
+
+struct C_MDC_OpenInoParentOpened : public Context {
+  MDCache *cache;
+  inodeno_t ino;
+  public:
+  C_MDC_OpenInoParentOpened(MDCache *c, inodeno_t i) : cache(c), ino(i) {}
+  void finish(int r) {
+    cache->_open_ino_parent_opened(ino, r);
+  }
+};
+
+void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err)
+{
+  dout(10) << "_open_ino_backtrace_fetched ino " << ino << " errno " << err << dendl;
+
+  assert(opening_inodes.count(ino));
+  open_ino_info_t& info = opening_inodes[ino];
+
+  CInode *in = get_inode(ino);
+  if (in) {
+    dout(10) << " found cached " << *in << dendl;
+    open_ino_finish(ino, info, in->authority().first);
+    return;
+  }
+
+  inode_backtrace_t backtrace;
+  if (err == 0) {
+    ::decode(backtrace, bl);
+    if (backtrace.pool != info.pool) {
+      dout(10) << " old object in pool " << info.pool
+	       << ", retrying pool " << backtrace.pool << dendl;
+      info.pool = backtrace.pool;
+      C_MDC_OpenInoBacktraceFetched *fin = new C_MDC_OpenInoBacktraceFetched(this, ino);
+      fetch_backtrace(ino, info.pool, fin->bl, fin);
+      return;
+    }
+  } else if (err == -ENOENT) {
+    int64_t meta_pool = mds->mdsmap->get_metadata_pool();
+    if (info.pool != meta_pool) {
+      dout(10) << " no object in pool " << info.pool
+	       << ", retrying pool " << meta_pool << dendl;
+      info.pool = meta_pool;
+      C_MDC_OpenInoBacktraceFetched *fin = new C_MDC_OpenInoBacktraceFetched(this, ino);
+      fetch_backtrace(ino, info.pool, fin->bl, fin);
+      return;
+    }
+  }
+
+  if (err == 0) {
+    if (backtrace.ancestors.empty()) {
+      dout(10) << " got empty backtrace " << dendl;
+      err = -EIO;
+    } else if (!info.ancestors.empty()) {
+      if (info.ancestors[0] == backtrace.ancestors[0]) {
+	dout(10) << " got same parents " << info.ancestors[0] << " 2 times" << dendl;
+	err = -EINVAL;
+      }
+    }
+  }
+  if (err) {
+    dout(10) << " failed to open ino " << ino << dendl;
+    open_ino_finish(ino, info, err);
+    return;
+  }
+
+  dout(10) << " got backtrace " << backtrace << dendl;
+  info.ancestors = backtrace.ancestors;
+
+  _open_ino_traverse_dir(ino, info, 0);
+}
+
+void MDCache::_open_ino_parent_opened(inodeno_t ino, int ret)
+{
+  dout(10) << "_open_ino_parent_opened ino " << ino << " ret " << ret << dendl;
+
+  assert(opening_inodes.count(ino));
+  open_ino_info_t& info = opening_inodes[ino];
+
+  CInode *in = get_inode(ino);
+  if (in) {
+    dout(10) << " found cached " << *in << dendl;
+    open_ino_finish(ino, info, in->authority().first);
+    return;
+  }
+
+  if (ret == mds->get_nodeid()) {
+    _open_ino_traverse_dir(ino, info, 0);
+  } else {
+    if (ret >= 0) {
+      info.check_peers = true;
+      info.auth_hint = ret;
+      info.checked.erase(ret);
+    }
+    do_open_ino(ino, info, ret);
+  }
+}
+
+Context* MDCache::_open_ino_get_waiter(inodeno_t ino, MMDSOpenIno *m)
+{
+  if (m)
+    return new C_MDS_RetryMessage(mds, m);
+  else
+    return new C_MDC_OpenInoTraverseDir(this, ino);
+}
+
+void MDCache::_open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int ret)
+{
+  dout(10) << "_open_ino_trvserse_dir ino " << ino << " ret " << ret << dendl;
+
+  CInode *in = get_inode(ino);
+  if (in) {
+    dout(10) << " found cached " << *in << dendl;
+    open_ino_finish(ino, info, in->authority().first);
+    return;
+  }
+
+  if (ret) {
+    do_open_ino(ino, info, ret);
+    return;
+  }
+
+  int hint = info.auth_hint;
+  ret = open_ino_traverse_dir(ino, NULL, info.ancestors,
+			      info.discover, info.want_xlocked, &hint);
+  if (ret > 0)
+    return;
+  if (hint != mds->get_nodeid())
+    info.auth_hint = hint;
+  do_open_ino(ino, info, ret);
+}
+
+int MDCache::open_ino_traverse_dir(inodeno_t ino, MMDSOpenIno *m,
+				   vector<inode_backpointer_t>& ancestors,
+				   bool discover, bool want_xlocked, int *hint)
+{
+  dout(10) << "open_ino_traverse_dir ino " << ino << " " << ancestors << dendl;
+  int err = 0;
+  for (unsigned i = 0; i < ancestors.size(); i++) {
+    CInode *diri = get_inode(ancestors[i].dirino);
+
+    if (!diri) {
+      if (discover && MDS_INO_IS_MDSDIR(ancestors[i].dirino)) {
+	open_foreign_mdsdir(ancestors[i].dirino, _open_ino_get_waiter(ino, m));
+	return 1;
+      }
+      continue;
+    }
+
+    if (!diri->is_dir()) {
+      dout(10) << " " << *diri << " is not dir" << dendl;
+      if (i == 0)
+	err = -ENOTDIR;
+      break;
+    }
+
+    string &name = ancestors[i].dname;
+    frag_t fg = diri->pick_dirfrag(name);
+    CDir *dir = diri->get_dirfrag(fg);
+    if (!dir) {
+      if (diri->is_auth()) {
+	if (diri->is_frozen()) {
+	  dout(10) << " " << *diri << " is frozen, waiting " << dendl;
+	  diri->add_waiter(CDir::WAIT_UNFREEZE, _open_ino_get_waiter(ino, m));
+	  return 1;
+	}
+	dir = diri->get_or_open_dirfrag(this, fg);
+      } else if (discover) {
+	open_remote_dirfrag(diri, fg, _open_ino_get_waiter(ino, m));
+	return 1;
+      }
+    }
+    if (dir) {
+      inodeno_t next_ino = i > 0 ? ancestors[i - 1].dirino : ino;
+      if (dir->is_auth()) {
+	CDentry *dn = dir->lookup(name);
+	CDentry::linkage_t *dnl = dn ? dn->get_linkage() : NULL;
+
+	if (!dnl && !dir->is_complete() &&
+	    (!dir->has_bloom() || dir->is_in_bloom(name))) {
+	  dout(10) << " fetching incomplete " << *dir << dendl;
+	  dir->fetch(_open_ino_get_waiter(ino, m));
+	  return 1;
+	}
+
+	dout(10) << " no ino " << next_ino << " in " << *dir << dendl;
+	if (i == 0)
+	  err = -ENOENT;
+      } else if (discover) {
+	discover_ino(dir, next_ino, _open_ino_get_waiter(ino, m),
+		     (i == 0 && want_xlocked));
+	return 1;
+      }
+    }
+    if (hint && i == 0)
+      *hint = dir ? dir->authority().first : diri->authority().first;
+    break;
+  }
+  return err;
+}
+
+void MDCache::open_ino_finish(inodeno_t ino, open_ino_info_t& info, int ret)
+{
+  dout(10) << "open_ino_finish ino " << ino << " ret " << ret << dendl;
+
+  finish_contexts(g_ceph_context, info.waiters, ret);
+  opening_inodes.erase(ino);
+}
+
+void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err)
+{
+  if (err < 0) {
+    info.checked.clear();
+    info.checked.insert(mds->get_nodeid());
+    info.checking = -1;
+    info.check_peers = true;
+    info.fetch_backtrace = true;
+    if (info.discover) {
+      info.discover = false;
+      info.ancestors.clear();
+    }
+  }
+
+  if (info.check_peers) {
+    info.check_peers = false;
+    info.checking = -1;
+    do_open_ino_peer(ino, info);
+  } else if (info.fetch_backtrace) {
+    info.check_peers = true;
+    info.fetch_backtrace = false;
+    info.checking = mds->get_nodeid();
+    info.checked.clear();
+    info.checked.insert(mds->get_nodeid());
+    C_MDC_OpenInoBacktraceFetched *fin = new C_MDC_OpenInoBacktraceFetched(this, ino);
+    fetch_backtrace(ino, info.pool, fin->bl, fin);
+  } else {
+    assert(!info.ancestors.empty());
+    info.checking = mds->get_nodeid();
+    open_ino(info.ancestors[0].dirino, mds->mdsmap->get_metadata_pool(),
+	     new C_MDC_OpenInoParentOpened(this, ino), info.want_replica);
+  }
+}
+
+void MDCache::do_open_ino_peer(inodeno_t ino, open_ino_info_t& info)
+{
+  set<int> all, active;
+  mds->mdsmap->get_mds_set(all);
+  mds->mdsmap->get_clientreplay_or_active_or_stopping_mds_set(active);
+  if (mds->get_state() == MDSMap::STATE_REJOIN)
+    mds->mdsmap->get_mds_set(active, MDSMap::STATE_REJOIN);
+
+  dout(10) << "do_open_ino_peer " << ino << " active " << active
+	   << " all " << all << " checked " << info.checked << dendl;
+
+  int peer = -1;
+  if (info.auth_hint >= 0) {
+    if (active.count(info.auth_hint)) {
+      peer = info.auth_hint;
+      info.auth_hint = -1;
+    }
+  } else {
+    for (set<int>::iterator p = active.begin(); p != active.end(); ++p)
+      if (*p != mds->get_nodeid() && info.checked.count(*p) == 0) {
+	peer = *p;
+	break;
+      }
+  }
+  if (peer < 0) {
+    if (all.size() > active.size() && all != info.checked) {
+      dout(10) << " waiting for more peers to be active" << dendl;
+    } else {
+      dout(10) << " all MDS peers have been checked " << dendl;
+      do_open_ino(ino, info, 0);
+    }
+  } else {
+    info.checking = peer;
+    mds->send_message_mds(new MMDSOpenIno(info.tid, ino, info.ancestors), peer);
+  }
+}
+
+void MDCache::handle_open_ino(MMDSOpenIno *m)
+{
+  dout(10) << "handle_open_ino " << *m << dendl;
+
+  inodeno_t ino = m->ino;
+  MMDSOpenInoReply *reply;
+  CInode *in = get_inode(ino);
+  if (in) {
+    dout(10) << " have " << *in << dendl;
+    reply = new MMDSOpenInoReply(m->get_tid(), ino, 0);
+    if (in->is_auth()) {
+      touch_inode(in);
+      while (1) {
+	CDentry *pdn = in->get_parent_dn();
+	if (!pdn)
+	  break;
+	CInode *diri = pdn->get_dir()->get_inode();
+	reply->ancestors.push_back(inode_backpointer_t(diri->ino(), pdn->name,
+						       in->inode.version));
+	in = diri;
+      }
+    } else {
+      reply->hint = in->authority().first;
+    }
+  } else {
+    int hint = -1;
+    int ret = open_ino_traverse_dir(ino, m, m->ancestors, false, false, &hint);
+    if (ret > 0)
+      return;
+    reply = new MMDSOpenInoReply(m->get_tid(), ino, hint, ret);
+  }
+  mds->messenger->send_message(reply, m->get_connection());
+  m->put();
+}
+
+void MDCache::handle_open_ino_reply(MMDSOpenInoReply *m)
+{
+  dout(10) << "handle_open_ino_reply " << *m << dendl;
+
+  inodeno_t ino = m->ino;
+  int from = m->get_source().num();
+  if (opening_inodes.count(ino)) {
+    open_ino_info_t& info = opening_inodes[ino];
+
+    if (info.checking == from)
+	info.checking = -1;
+    info.checked.insert(from);
+
+    CInode *in = get_inode(ino);
+    if (in) {
+      dout(10) << " found cached " << *in << dendl;
+      open_ino_finish(ino, info, in->authority().first);
+    } else if (!m->ancestors.empty()) {
+      dout(10) << " found ino " << ino << " on mds." << from << dendl;
+      if (!info.want_replica) {
+	open_ino_finish(ino, info, from);
+	return;
+      }
+
+      info.ancestors = m->ancestors;
+      info.auth_hint = from;
+      info.checking = mds->get_nodeid();
+      info.discover = true;
+      _open_ino_traverse_dir(ino, info, 0);
+    } else if (m->error) {
+      dout(10) << " error " << m->error << " from mds." << from << dendl;
+      do_open_ino(ino, info, m->error);
+    } else {
+      if (m->hint >= 0 && m->hint != mds->get_nodeid()) {
+	info.auth_hint = m->hint;
+	info.checked.erase(m->hint);
+      }
+      do_open_ino_peer(ino, info);
+    }
+  }
+  m->put();
+}
+
+void MDCache::kick_open_ino_peers(int who)
+{
+  dout(10) << "kick_open_ino_peers mds." << who << dendl;
+
+  for (map<inodeno_t, open_ino_info_t>::iterator p = opening_inodes.begin();
+       p != opening_inodes.end();
+       ++p) {
+    open_ino_info_t& info = p->second;
+    if (info.checking == who) {
+      dout(10) << "  kicking ino " << p->first << " who was checking mds." << who << dendl;
+      info.checking = -1;
+      do_open_ino_peer(p->first, info);
+    } else if (info.checking == -1) {
+      dout(10) << "  kicking ino " << p->first << " who was waiting" << dendl;
+      do_open_ino_peer(p->first, info);
+    }
+  }
+}
+
+void MDCache::open_ino(inodeno_t ino, int64_t pool, Context* fin,
+		       bool want_replica, bool want_xlocked)
+{
+  dout(10) << "open_ino " << ino << " pool " << pool << " want_replica "
+	   << want_replica << dendl;
+
+  if (opening_inodes.count(ino)) {
+    open_ino_info_t& info = opening_inodes[ino];
+    if (want_replica) {
+      info.want_replica = true;
+      if (want_xlocked)
+	info.want_xlocked = true;
+    }
+    info.waiters.push_back(fin);
+  } else {
+    open_ino_info_t& info = opening_inodes[ino];
+    info.checked.insert(mds->get_nodeid());
+    info.want_replica = want_replica;
+    info.want_xlocked = want_xlocked;
+    info.tid = ++open_ino_last_tid;
+    info.pool = pool >= 0 ? pool : mds->mdsmap->get_first_data_pool();
+    info.waiters.push_back(fin);
+    do_open_ino(ino, info, 0);
+  }
+}
+
 /* ---------------------------- */
 
 /*
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index b9a1ead..2aa1c69 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -53,6 +53,8 @@ class MDentryUnlink;
 class MLock;
 class MMDSFindIno;
 class MMDSFindInoReply;
+class MMDSOpenIno;
+class MMDSOpenInoReply;
 
 class Message;
 class MClientRequest;
@@ -756,6 +758,47 @@ public:
 				   C_GatherBuilder &gather_bld);
 
   void make_trace(vector<CDentry*>& trace, CInode *in);
+
+protected:
+  struct open_ino_info_t {
+    vector<inode_backpointer_t> ancestors;
+    set<int> checked;
+    int checking;
+    int auth_hint;
+    bool check_peers;
+    bool fetch_backtrace;
+    bool discover;
+    bool want_replica;
+    bool want_xlocked;
+    version_t tid;
+    int64_t pool;
+    list<Context*> waiters;
+    open_ino_info_t() : checking(-1), auth_hint(-1),
+      check_peers(true), fetch_backtrace(true), discover(false) {}
+  };
+  tid_t open_ino_last_tid;
+  map<inodeno_t,open_ino_info_t> opening_inodes;
+
+  void _open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err);
+  void _open_ino_parent_opened(inodeno_t ino, int ret);
+  void _open_ino_traverse_dir(inodeno_t ino, open_ino_info_t& info, int err);
+  Context* _open_ino_get_waiter(inodeno_t ino, MMDSOpenIno *m);
+  int open_ino_traverse_dir(inodeno_t ino, MMDSOpenIno *m,
+			    vector<inode_backpointer_t>& ancestors,
+			    bool discover, bool want_xlocked, int *hint);
+  void open_ino_finish(inodeno_t ino, open_ino_info_t& info, int err);
+  void do_open_ino(inodeno_t ino, open_ino_info_t& info, int err);
+  void do_open_ino_peer(inodeno_t ino, open_ino_info_t& info);
+  void handle_open_ino(MMDSOpenIno *m);
+  void handle_open_ino_reply(MMDSOpenInoReply *m);
+  friend class C_MDC_OpenInoBacktraceFetched;
+  friend class C_MDC_OpenInoTraverseDir;
+  friend class C_MDC_OpenInoParentOpened;
+
+public:
+  void kick_open_ino_peers(int who);
+  void open_ino(inodeno_t ino, int64_t pool, Context *fin,
+		bool want_replica=true, bool want_xlocked=false);
   
   // -- find_ino_peer --
   struct find_ino_peer_info_t {
diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc
index 16b857e..a7140c5 100644
--- a/src/mds/MDS.cc
+++ b/src/mds/MDS.cc
@@ -1011,12 +1011,7 @@ void MDS::handle_mds_map(MMDSMap *m)
     if (g_conf->mds_dump_cache_after_rejoin &&
 	oldmap->is_rejoining() && !mdsmap->is_rejoining()) 
       mdcache->dump_cache();      // for DEBUG only
-  }
-  if (oldmap->is_degraded() && !mdsmap->is_degraded() && state >= MDSMap::STATE_ACTIVE)
-    dout(1) << "cluster recovered." << dendl;
   
-  // did someone go active?
-  if (is_clientreplay() || is_active() || is_stopping()) {
     // ACTIVE|CLIENTREPLAY|REJOIN => we can discover from them.
     set<int> olddis, dis;
     oldmap->get_mds_set(olddis, MDSMap::STATE_ACTIVE);
@@ -1027,9 +1022,17 @@ void MDS::handle_mds_map(MMDSMap *m)
     mdsmap->get_mds_set(dis, MDSMap::STATE_REJOIN);
     for (set<int>::iterator p = dis.begin(); p != dis.end(); ++p) 
       if (*p != whoami &&            // not me
-	  olddis.count(*p) == 0)  // newly so?
+	  olddis.count(*p) == 0) {  // newly so?
 	mdcache->kick_discovers(*p);
+	mdcache->kick_open_ino_peers(*p);
+      }
+  }
+
+  if (oldmap->is_degraded() && !mdsmap->is_degraded() && state >= MDSMap::STATE_ACTIVE)
+    dout(1) << "cluster recovered." << dendl;
 
+  // did someone go active?
+  if (is_clientreplay() || is_active() || is_stopping()) {
     set<int> oldactive, active;
     oldmap->get_mds_set(oldactive, MDSMap::STATE_ACTIVE);
     oldmap->get_mds_set(oldactive, MDSMap::STATE_CLIENTREPLAY);
diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h
index c5bc1c3..3e2f67e 100644
--- a/src/mds/MDSMap.h
+++ b/src/mds/MDSMap.h
@@ -308,6 +308,13 @@ public:
       if (p->second.state >= STATE_REPLAY && p->second.state <= STATE_STOPPING)
 	s.insert(p->second.rank);
   }
+  void get_clientreplay_or_active_or_stopping_mds_set(set<int>& s) {
+    for (map<uint64_t,mds_info_t>::const_iterator p = mds_info.begin();
+	 p != mds_info.end();
+	 ++p)
+      if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING)
+	s.insert(p->second.rank);
+  }
   void get_mds_set(set<int>& s, int state) {
     for (map<uint64_t,mds_info_t>::const_iterator p = mds_info.begin();
 	 p != mds_info.end();
diff --git a/src/mds/inode_backtrace.h b/src/mds/inode_backtrace.h
index d223f72..2d80ae3 100644
--- a/src/mds/inode_backtrace.h
+++ b/src/mds/inode_backtrace.h
@@ -35,6 +35,10 @@ struct inode_backpointer_t {
 };
 WRITE_CLASS_ENCODER(inode_backpointer_t)
 
+inline bool operator==(const inode_backpointer_t& l, const inode_backpointer_t& r) {
+	return l.dirino == r.dirino && l.version == r.version && l.dname == r.dname;
+}
+
 inline ostream& operator<<(ostream& out, const inode_backpointer_t& ib) {
   return out << "<" << ib.dirino << "/" << ib.dname << " v" << ib.version << ">";
 }
diff --git a/src/messages/MMDSOpenIno.h b/src/messages/MMDSOpenIno.h
new file mode 100644
index 0000000..0918e87
--- /dev/null
+++ b/src/messages/MMDSOpenIno.h
@@ -0,0 +1,46 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDSOPENINO_H
+#define CEPH_MDSOPENINO_H
+
+#include "msg/Message.h"
+
+struct MMDSOpenIno : public Message {
+  inodeno_t ino;
+  vector<inode_backpointer_t> ancestors;
+
+  MMDSOpenIno() : Message(MSG_MDS_OPENINO) {}
+  MMDSOpenIno(tid_t t, inodeno_t i, vector<inode_backpointer_t>& a) :
+    Message(MSG_MDS_OPENINO), ino(i), ancestors(a) {
+    header.tid = t;
+  }
+
+  const char *get_type_name() const { return "openino"; }
+  void print(ostream &out) const {
+    out << "openino(" << header.tid << " " << ino << " " << ancestors << ")";
+  }
+
+  void encode_payload(uint64_t features) {
+    ::encode(ino, payload);
+    ::encode(ancestors, payload);
+  }
+  void decode_payload() {
+    bufferlist::iterator p = payload.begin();
+    ::decode(ino, p);
+    ::decode(ancestors, p);
+  }
+};
+
+#endif
diff --git a/src/messages/MMDSOpenInoReply.h b/src/messages/MMDSOpenInoReply.h
new file mode 100644
index 0000000..245027f
--- /dev/null
+++ b/src/messages/MMDSOpenInoReply.h
@@ -0,0 +1,53 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#ifndef CEPH_MDSOPENINOREPLY_H
+#define CEPH_MDSOPENINOREPLY_H
+
+#include "msg/Message.h"
+
+struct MMDSOpenInoReply : public Message {
+  inodeno_t ino;
+  vector<inode_backpointer_t> ancestors;
+  int32_t hint;
+  int32_t error;
+
+  MMDSOpenInoReply() : Message(MSG_MDS_OPENINOREPLY) {}
+  MMDSOpenInoReply(tid_t t, inodeno_t i, int h=-1, int e=0) :
+    Message(MSG_MDS_OPENINOREPLY), ino(i), hint(h), error(e) {
+    header.tid = t;
+  }
+
+  const char *get_type_name() const { return "openinoreply"; }
+  void print(ostream &out) const {
+    out << "openinoreply(" << header.tid << " "
+	<< ino << " " << hint << " " << ancestors << ")";
+  }
+
+  void encode_payload(uint64_t features) {
+    ::encode(ino, payload);
+    ::encode(ancestors, payload);
+    ::encode(hint, payload);
+    ::encode(error, payload);
+  }
+  void decode_payload() {
+    bufferlist::iterator p = payload.begin();
+    ::decode(ino, p);
+    ::decode(ancestors, p);
+    ::decode(hint, p);
+    ::decode(error, p);
+  }
+};
+
+#endif
diff --git a/src/msg/Message.cc b/src/msg/Message.cc
index 77be03a..a6889d3 100644
--- a/src/msg/Message.cc
+++ b/src/msg/Message.cc
@@ -112,6 +112,8 @@ using namespace std;
 #include "messages/MMDSCacheRejoin.h"
 #include "messages/MMDSFindIno.h"
 #include "messages/MMDSFindInoReply.h"
+#include "messages/MMDSOpenIno.h"
+#include "messages/MMDSOpenInoReply.h"
 
 #include "messages/MDirUpdate.h"
 #include "messages/MDiscover.h"
@@ -533,6 +535,13 @@ Message *decode_message(CephContext *cct, ceph_msg_header& header, ceph_msg_foot
     m = new MMDSFindInoReply;
     break;
 
+  case MSG_MDS_OPENINO:
+    m = new MMDSOpenIno;
+    break;
+  case MSG_MDS_OPENINOREPLY:
+    m = new MMDSOpenInoReply;
+    break;
+
   case MSG_MDS_FRAGMENTNOTIFY:
     m = new MMDSFragmentNotify;
     break;
diff --git a/src/msg/Message.h b/src/msg/Message.h
index 33d26b2..5efb608 100644
--- a/src/msg/Message.h
+++ b/src/msg/Message.h
@@ -124,6 +124,8 @@
 #define MSG_MDS_DENTRYLINK         0x20c
 #define MSG_MDS_FINDINO            0x20d
 #define MSG_MDS_FINDINOREPLY       0x20e
+#define MSG_MDS_OPENINO            0x20f
+#define MSG_MDS_OPENINOREPLY       0x210
 
 #define MSG_MDS_LOCK               0x300
 #define MSG_MDS_INODEFILECAPS      0x301
-- 
1.8.1.4



  reply	other threads:[~2013-05-27  2:23 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-23  8:06 [PATCH 0/30] mds: lookup-by-ino & fixes Yan, Zheng
2013-05-23  8:06 ` [PATCH 01/30] mds: journal new subtrees created by rename Yan, Zheng
2013-05-23  8:06 ` [PATCH 02/30] mds: fix underwater dentry cleanup Yan, Zheng
2013-05-23  8:06 ` [PATCH 03/30] mds: don't stop at export bounds when journaling dir context Yan, Zheng
2013-05-23  8:06 ` [PATCH 04/30] mds: adjust subtree auth if import aborts in PREPPED state Yan, Zheng
2013-05-23  8:06 ` [PATCH 05/30] mds: fix uncommitted master wait Yan, Zheng
2013-05-23  8:06 ` [PATCH 06/30] mds: fix slave commit tracking Yan, Zheng
2013-05-23  8:06 ` [PATCH 07/30] mds: fix straydn race Yan, Zheng
2013-05-23 16:44   ` Sage Weil
2013-05-27  2:09     ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 08/30] mds: fix import cancel race Yan, Zheng
2013-05-23  8:06 ` [PATCH 09/30] mds: fix typo in Server::do_rename_rollback Yan, Zheng
2013-05-23  8:06 ` [PATCH 10/30] mds: remove buggy cache rejoin code Yan, Zheng
2013-05-23  8:06 ` [PATCH 11/30] mds: unfreeze inode when after rename rollback finishes Yan, Zheng
2013-05-23  8:06 ` [PATCH 12/30] mds: send slave request after target MDS is active Yan, Zheng
2013-05-23  8:06 ` [PATCH 13/30] mds: export CInode::STATE_NEEDSRECOVER Yan, Zheng
2013-05-23 17:59   ` Sage Weil
2013-05-27  2:11     ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 14/30] mds: export CInode:mds_caps_wanted Yan, Zheng
2013-05-23 18:04   ` Sage Weil
2013-05-27  2:12     ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 15/30] mds: notify auth MDS when cap_wanted changes Yan, Zheng
2013-05-23  8:06 ` [PATCH 16/30] mds: fix Locker::request_inode_file_caps() Yan, Zheng
2013-05-23  8:06 ` [PATCH 17/30] mds: defer releasing cap if necessary Yan, Zheng
2013-05-23  8:06 ` [PATCH 18/30] mds: don't issue Fc cap from replica Yan, Zheng
2013-05-23 18:11   ` Sage Weil
2013-05-27  2:13     ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 19/30] mds: fix check for base inode discovery Yan, Zheng
2013-05-23  8:06 ` [PATCH 20/30] mds: slient MDCache::trim_non_auth() Yan, Zheng
2013-05-23  8:06 ` [PATCH 21/30] mds: warn on unconnected snap realms Yan, Zheng
2013-05-23  8:06 ` [PATCH 22/30] mds: reorder EMetaBlob::add_primary_dentry's parameters Yan, Zheng
2013-05-23  8:06 ` [PATCH 23/30] mds: journal backtrace update in EMetaBlob::fullbit Yan, Zheng
2013-05-28  3:08   ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 24/30] mds: rename last_renamed_version to backtrace_version Yan, Zheng
2013-05-23  8:06 ` [PATCH 25/30] mds: bring back old style backtrace handling Yan, Zheng
2013-05-23 22:58   ` Sage Weil
2013-05-24  0:57     ` Yan, Zheng
2013-05-24  1:01       ` Sage Weil
2013-05-27  2:17     ` Yan, Zheng
2013-05-27 20:08       ` Sage Weil
2013-05-28  6:04         ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 26/30] mds: update backtraces when unlinking inodes Yan, Zheng
2013-05-23  8:06 ` [PATCH 27/30] mds: remove old backtrace handling Yan, Zheng
2013-05-23 22:46   ` Sage Weil
2013-05-27  2:15     ` Yan, Zheng
2013-05-23  8:06 ` [PATCH 28/30] mds: move fetch_backtrace() to class MDCache Yan, Zheng
2013-05-23  8:06 ` [PATCH 29/30] mds: open inode by ino Yan, Zheng
2013-05-27  2:23   ` Yan, Zheng [this message]
2013-05-23  8:06 ` [PATCH 30/30] mds: open missing cap inodes Yan, Zheng
2013-05-23 18:22 ` [PATCH 0/30] mds: lookup-by-ino & fixes Sage Weil
2013-05-24  8:44   ` Yan, Zheng
2013-05-27 19:21     ` Sage Weil
2013-05-28  3:03       ` Yan, Zheng
2013-05-30  0:10         ` Sage Weil
2013-05-27  2:56   ` Yan, Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51A2C388.5060404@intel.com \
    --to=zheng.z.yan@intel.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=greg@inktank.com \
    --cc=sage@inktank.com \
    --cc=sam.lang@inktank.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.