All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luis Henriques <lhenriques@suse.com>
To: ceph-devel@vger.kernel.org
Cc: "Yan, Zheng" <zyan@redhat.com>, Jeff Layton <jlayton@redhat.com>,
	Jan Fajerski <jfajerski@suse.com>,
	Luis Henriques <lhenriques@suse.com>
Subject: [RFC v2 PATCH 1/4] ceph: add seqlock for snaprealm hierarchy change detection
Date: Mon, 18 Dec 2017 15:38:59 +0000	[thread overview]
Message-ID: <20171218153902.7455-2-lhenriques@suse.com> (raw)
In-Reply-To: <20171218153902.7455-1-lhenriques@suse.com>

It is possible to receive an update to the snaprealms hierarchy from an
MDS while walking through this hierarchy.  This patch adds a mechanism
similar to the one used in dcache to detect renames in lookups.  A new
seqlock is used to allow a retry in case a change has occurred while
walking through the snaprealms.

Link: http://tracker.ceph.com/issues/22372
Signed-off-by: Luis Henriques <lhenriques@suse.com>
---
 fs/ceph/snap.c  | 45 +++++++++++++++++++++++++++++++++++++++------
 fs/ceph/super.h |  2 ++
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 8a2ca41e4b97..8b9d6c7c0df4 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -54,6 +54,25 @@
  * console).
  */
 
+/*
+ * While walking through the snaprealm hierarchy it is possible that
+ * this hierarchy is updated (for ex, when a different client moves
+ * directories around).  snaprealm_lock isn't supposed to prevent this
+ * but, just like the rename_lock in dcache, to detect that this has
+ * happen so that a lookup can be retried.
+ *
+ * Here's a typical usage pattern for this lock:
+ *
+ * retry:
+ * 	seq = read_seqbegin(&snaprealm_lock);
+ *	realm = ci->i_snap_realm;
+ *	ceph_get_snap_realm(mdsc, realm);
+ *	... do stuff ...
+ *	ceph_put_snap_realm(mdsc, realm);
+ *	if (read_seqretry(&snaprealm_lock, seq))
+ *		goto retry;
+ */
+DEFINE_SEQLOCK(snaprealm_lock);
 
 /*
  * increase ref count for the realm
@@ -81,10 +100,12 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
 static void __insert_snap_realm(struct rb_root *root,
 				struct ceph_snap_realm *new)
 {
-	struct rb_node **p = &root->rb_node;
+	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct ceph_snap_realm *r = NULL;
 
+	write_seqlock(&snaprealm_lock);
+	p  = &root->rb_node;
 	while (*p) {
 		parent = *p;
 		r = rb_entry(parent, struct ceph_snap_realm, node);
@@ -98,6 +119,7 @@ static void __insert_snap_realm(struct rb_root *root,
 
 	rb_link_node(&new->node, parent, p);
 	rb_insert_color(&new->node, root);
+	write_sequnlock(&snaprealm_lock);
 }
 
 /*
@@ -136,9 +158,14 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
 static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc,
 						   u64 ino)
 {
-	struct rb_node *n = mdsc->snap_realms.rb_node;
-	struct ceph_snap_realm *r;
-
+	struct rb_node *n;
+	struct ceph_snap_realm *realm, *r;
+	unsigned seq;
+
+retry:
+	realm = NULL;
+	seq = read_seqbegin(&snaprealm_lock);
+	n = mdsc->snap_realms.rb_node;
 	while (n) {
 		r = rb_entry(n, struct ceph_snap_realm, node);
 		if (ino < r->ino)
@@ -147,10 +174,14 @@ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc,
 			n = n->rb_right;
 		else {
 			dout("lookup_snap_realm %llx %p\n", r->ino, r);
-			return r;
+			realm = r;
+			break;
 		}
 	}
-	return NULL;
+
+	if (read_seqretry(&snaprealm_lock, seq))
+		goto retry;
+	return realm;
 }
 
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
@@ -174,7 +205,9 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
 {
 	dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
 
+	write_seqlock(&snaprealm_lock);
 	rb_erase(&realm->node, &mdsc->snap_realms);
+	write_sequnlock(&snaprealm_lock);
 
 	if (realm->parent) {
 		list_del_init(&realm->child_item);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 2beeec07fa76..6474e8d875b7 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -760,6 +760,8 @@ static inline int default_congestion_kb(void)
 
 
 /* snap.c */
+extern seqlock_t snaprealm_lock;
+
 struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
 					       u64 ino);
 extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,

  reply	other threads:[~2017-12-18 15:39 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-18 15:38 [RFC v2 PATCH 0/4] ceph: kernel client cephfs quota support Luis Henriques
2017-12-18 15:38 ` Luis Henriques [this message]
2017-12-19  9:22   ` [RFC v2 PATCH 1/4] ceph: add seqlock for snaprealm hierarchy change detection Yan, Zheng
2017-12-19 10:57     ` Luis Henriques
2017-12-18 15:39 ` [RFC v2 PATCH 2/4] ceph: quota: add initial infrastructure to support cephfs quotas Luis Henriques
2017-12-19  9:24   ` Yan, Zheng
2017-12-19 10:59     ` Luis Henriques
2017-12-18 15:39 ` [RFC v2 PATCH 3/4] ceph: quotas: support for ceph.quota.max_files Luis Henriques
2017-12-18 15:39 ` [RFC v2 PATCH 4/4] ceph: quota: don't allow cross-quota renames Luis Henriques

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171218153902.7455-2-lhenriques@suse.com \
    --to=lhenriques@suse.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=jfajerski@suse.com \
    --cc=jlayton@redhat.com \
    --cc=zyan@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.