* [PATCH v3 1/5] ceph: clean up unsafe d_parent access in __choose_mds
2016-12-14 14:54 [PATCH v3 0/5] ceph: clean up some unsafe dentry->d_parent accesses Jeff Layton
@ 2016-12-14 14:54 ` Jeff Layton
2016-12-15 9:08 ` Yan, Zheng
2016-12-14 14:54 ` [PATCH v3 2/5] ceph: clean up unsafe d_parent accesses in build_dentry_path Jeff Layton
` (3 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2016-12-14 14:54 UTC (permalink / raw)
To: ceph-devel; +Cc: zyan, sage, idryomov, linux-fsdevel, dhowells, viro
__choose_mds exists to pick an MDS to use when issuing a call. Doing
that typically involves picking an inode and using the authoritative
MDS for it. In most cases, that's pretty straightforward, as we are
using an inode to which we hold a reference (usually represented by
r_dentry or r_inode in the request).
In the case of a snapshotted directory however, we need to fetch
the non-snapped parent, which involves walking back up the parents
in the tree. The dentries in the snapshot dir are effectively frozen
but the overall parent is _not_, and could vanish if a concurrent
rename were to occur.
Clean this code up and take special care to ensure the validity of
the entries we're working with. First, try to use the inode in
r_locked_dir if one exists. If not and all we have is r_dentry,
then we have to walk back up the tree. Use the rcu_read_lock for
this so we can ensure that any d_parent we find won't go away, and
take extra care to deal with the possibility that the dentries could
go negative.
Change get_nonsnap_parent to return an inode, and take a reference to
that inode before returning (if any). Change all of the other places
where we set "inode" in __choose_mds to also take a reference, and then
call iput on that inode before exiting the function.
Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
fs/ceph/mds_client.c | 67 +++++++++++++++++++++++++++++++++++-----------------
1 file changed, 45 insertions(+), 22 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 815acd1a56d4..e62b566d3817 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -667,6 +667,27 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
}
/*
+ * Walk back up the dentry tree until we hit a dentry representing a
+ * non-snapshot inode. We do this using the rcu_read_lock (which must be held
+ * when calling this) to ensure that the objects won't disappear while we're
+ * working with them. Once we hit a candidate dentry, we attempt to take a
+ * reference to it, and return that as the result.
+ */
+static struct inode *get_nonsnap_parent(struct dentry *dentry) { struct inode
+ *inode = NULL;
+
+ while (dentry && !IS_ROOT(dentry)) {
+ inode = d_inode_rcu(dentry);
+ if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
+ break;
+ dentry = dentry->d_parent;
+ }
+ if (inode)
+ inode = igrab(inode);
+ return inode;
+}
+
+/*
* Choose mds to send request to next. If there is a hint set in the
* request (e.g., due to a prior forward hint from the mds), use that.
* Otherwise, consult frag tree and/or caps to identify the
@@ -674,19 +695,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
- /*
- * we don't need to worry about protecting the d_parent access
- * here because we never renaming inside the snapped namespace
- * except to resplice to another snapdir, and either the old or new
- * result is a valid result.
- */
- while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
- dentry = dentry->d_parent;
- return dentry;
-}
-
static int __choose_mds(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
@@ -716,30 +724,42 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = NULL;
if (req->r_inode) {
inode = req->r_inode;
+ ihold(inode);
+ } else if (req->r_locked_dir) {
+ inode = req->r_locked_dir;
+ ihold(inode);
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
- struct dentry *parent = req->r_dentry->d_parent;
- struct inode *dir = d_inode(parent);
+ struct dentry *parent;
+ struct inode *dir;
+
+ rcu_read_lock();
+ parent = req->r_dentry->d_parent;
+ dir = d_inode_rcu(parent);
- if (dir->i_sb != mdsc->fsc->sb) {
- /* not this fs! */
+ if (!dir || dir->i_sb != mdsc->fsc->sb) {
+ /* not this fs or parent went negative */
inode = d_inode(req->r_dentry);
+ if (inode)
+ ihold(inode);
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
- struct dentry *dn = get_nonsnap_parent(parent);
- inode = d_inode(dn);
+ inode = get_nonsnap_parent(parent);
dout("__choose_mds using nonsnap parent %p\n", inode);
} else {
/* dentry target */
inode = d_inode(req->r_dentry);
if (!inode || mode == USE_AUTH_MDS) {
/* dir + name */
- inode = dir;
+ inode = igrab(dir);
hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
+ } else {
+ ihold(inode);
}
}
+ rcu_read_unlock();
}
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
@@ -768,7 +788,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
(int)r, frag.ndist);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
/* since this file/dir wasn't known to be
@@ -783,7 +803,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), frag.frag, mds);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
}
}
@@ -796,6 +816,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
+ iput(inode);
goto random;
}
mds = cap->session->s_mds;
@@ -803,6 +824,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), mds,
cap == ci->i_auth_cap ? "auth " : "", cap);
spin_unlock(&ci->i_ceph_lock);
+out:
+ iput(inode);
return mds;
random:
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3 1/5] ceph: clean up unsafe d_parent access in __choose_mds
2016-12-14 14:54 ` [PATCH v3 1/5] ceph: clean up unsafe d_parent access in __choose_mds Jeff Layton
@ 2016-12-15 9:08 ` Yan, Zheng
2016-12-15 11:30 ` Jeff Layton
0 siblings, 1 reply; 9+ messages in thread
From: Yan, Zheng @ 2016-12-15 9:08 UTC (permalink / raw)
To: Jeff Layton
Cc: ceph-devel, Sage Weil, Ilya Dryomov, linux-fsdevel, David Howells,
viro
> On 14 Dec 2016, at 22:54, Jeff Layton <jlayton@redhat.com> wrote:
>
> __choose_mds exists to pick an MDS to use when issuing a call. Doing
> that typically involves picking an inode and using the authoritative
> MDS for it. In most cases, that's pretty straightforward, as we are
> using an inode to which we hold a reference (usually represented by
> r_dentry or r_inode in the request).
>
> In the case of a snapshotted directory however, we need to fetch
> the non-snapped parent, which involves walking back up the parents
> in the tree. The dentries in the snapshot dir are effectively frozen
> but the overall parent is _not_, and could vanish if a concurrent
> rename were to occur.
>
> Clean this code up and take special care to ensure the validity of
> the entries we're working with. First, try to use the inode in
> r_locked_dir if one exists. If not and all we have is r_dentry,
> then we have to walk back up the tree. Use the rcu_read_lock for
> this so we can ensure that any d_parent we find won't go away, and
> take extra care to deal with the possibility that the dentries could
> go negative.
>
> Change get_nonsnap_parent to return an inode, and take a reference to
> that inode before returning (if any). Change all of the other places
> where we set "inode" in __choose_mds to also take a reference, and then
> call iput on that inode before exiting the function.
>
> Link: http://tracker.ceph.com/issues/18148
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
> fs/ceph/mds_client.c | 67 +++++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 45 insertions(+), 22 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 815acd1a56d4..e62b566d3817 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -667,6 +667,27 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
> }
>
> /*
> + * Walk back up the dentry tree until we hit a dentry representing a
> + * non-snapshot inode. We do this using the rcu_read_lock (which must be held
> + * when calling this) to ensure that the objects won't disappear while we're
> + * working with them. Once we hit a candidate dentry, we attempt to take a
> + * reference to it, and return that as the result.
> + */
> +static struct inode *get_nonsnap_parent(struct dentry *dentry) { struct inode
> + *inode = NULL;
> +
> + while (dentry && !IS_ROOT(dentry)) {
> + inode = d_inode_rcu(dentry);
> + if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
> + break;
> + dentry = dentry->d_parent;
> + }
> + if (inode)
> + inode = igrab(inode);
> + return inode;
> +}
> +
> +/*
> * Choose mds to send request to next. If there is a hint set in the
> * request (e.g., due to a prior forward hint from the mds), use that.
> * Otherwise, consult frag tree and/or caps to identify the
> @@ -674,19 +695,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
> *
> * Called under mdsc->mutex.
> */
> -static struct dentry *get_nonsnap_parent(struct dentry *dentry)
> -{
> - /*
> - * we don't need to worry about protecting the d_parent access
> - * here because we never renaming inside the snapped namespace
> - * except to resplice to another snapdir, and either the old or new
> - * result is a valid result.
> - */
> - while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
> - dentry = dentry->d_parent;
> - return dentry;
> -}
> -
> static int __choose_mds(struct ceph_mds_client *mdsc,
> struct ceph_mds_request *req)
> {
> @@ -716,30 +724,42 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> inode = NULL;
> if (req->r_inode) {
> inode = req->r_inode;
> + ihold(inode);
> + } else if (req->r_locked_dir) {
> + inode = req->r_locked_dir;
> + ihold(inode);
this seems incorrect. how about following incremental patch
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c834d7d..4abc85f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -725,9 +725,6 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (req->r_inode) {
inode = req->r_inode;
ihold(inode);
- } else if (req->r_locked_dir) {
- inode = req->r_locked_dir;
- ihold(inode);
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
struct dentry *parent;
@@ -735,7 +732,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
rcu_read_lock();
parent = req->r_dentry->d_parent;
- dir = d_inode_rcu(parent);
+ dir = req->r_locked_dir ? : d_inode_rcu(parent);
if (!dir || dir->i_sb != mdsc->fsc->sb) {
/* not this fs or parent went negative */
Regards
Yan, Zheng
> } else if (req->r_dentry) {
> /* ignore race with rename; old or new d_parent is okay */
> - struct dentry *parent = req->r_dentry->d_parent;
> - struct inode *dir = d_inode(parent);
> + struct dentry *parent;
> + struct inode *dir;
> +
> + rcu_read_lock();
> + parent = req->r_dentry->d_parent;
> + dir = d_inode_rcu(parent);
>
> - if (dir->i_sb != mdsc->fsc->sb) {
> - /* not this fs! */
> + if (!dir || dir->i_sb != mdsc->fsc->sb) {
> + /* not this fs or parent went negative */
> inode = d_inode(req->r_dentry);
> + if (inode)
> + ihold(inode);
> } else if (ceph_snap(dir) != CEPH_NOSNAP) {
> /* direct snapped/virtual snapdir requests
> * based on parent dir inode */
> - struct dentry *dn = get_nonsnap_parent(parent);
> - inode = d_inode(dn);
> + inode = get_nonsnap_parent(parent);
> dout("__choose_mds using nonsnap parent %p\n", inode);
> } else {
> /* dentry target */
> inode = d_inode(req->r_dentry);
> if (!inode || mode == USE_AUTH_MDS) {
> /* dir + name */
> - inode = dir;
> + inode = igrab(dir);
> hash = ceph_dentry_hash(dir, req->r_dentry);
> is_hash = true;
> + } else {
> + ihold(inode);
> }
> }
> + rcu_read_unlock();
> }
>
> dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
> @@ -768,7 +788,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> (int)r, frag.ndist);
> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
> CEPH_MDS_STATE_ACTIVE)
> - return mds;
> + goto out;
> }
>
> /* since this file/dir wasn't known to be
> @@ -783,7 +803,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> inode, ceph_vinop(inode), frag.frag, mds);
> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
> CEPH_MDS_STATE_ACTIVE)
> - return mds;
> + goto out;
> }
> }
> }
> @@ -796,6 +816,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
> if (!cap) {
> spin_unlock(&ci->i_ceph_lock);
> + iput(inode);
> goto random;
> }
> mds = cap->session->s_mds;
> @@ -803,6 +824,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> inode, ceph_vinop(inode), mds,
> cap == ci->i_auth_cap ? "auth " : "", cap);
> spin_unlock(&ci->i_ceph_lock);
> +out:
> + iput(inode);
> return mds;
>
> random:
> --
> 2.7.4
>
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3 1/5] ceph: clean up unsafe d_parent access in __choose_mds
2016-12-15 9:08 ` Yan, Zheng
@ 2016-12-15 11:30 ` Jeff Layton
0 siblings, 0 replies; 9+ messages in thread
From: Jeff Layton @ 2016-12-15 11:30 UTC (permalink / raw)
To: Yan, Zheng
Cc: ceph-devel, Sage Weil, Ilya Dryomov, linux-fsdevel, David Howells,
viro
On Thu, 2016-12-15 at 17:08 +0800, Yan, Zheng wrote:
> >
> > On 14 Dec 2016, at 22:54, Jeff Layton <jlayton@redhat.com> wrote:
> >
> > __choose_mds exists to pick an MDS to use when issuing a call. Doing
> > that typically involves picking an inode and using the authoritative
> > MDS for it. In most cases, that's pretty straightforward, as we are
> > using an inode to which we hold a reference (usually represented by
> > r_dentry or r_inode in the request).
> >
> > In the case of a snapshotted directory however, we need to fetch
> > the non-snapped parent, which involves walking back up the parents
> > in the tree. The dentries in the snapshot dir are effectively frozen
> > but the overall parent is _not_, and could vanish if a concurrent
> > rename were to occur.
> >
> > Clean this code up and take special care to ensure the validity of
> > the entries we're working with. First, try to use the inode in
> > r_locked_dir if one exists. If not and all we have is r_dentry,
> > then we have to walk back up the tree. Use the rcu_read_lock for
> > this so we can ensure that any d_parent we find won't go away, and
> > take extra care to deal with the possibility that the dentries could
> > go negative.
> >
> > Change get_nonsnap_parent to return an inode, and take a reference to
> > that inode before returning (if any). Change all of the other places
> > where we set "inode" in __choose_mds to also take a reference, and then
> > call iput on that inode before exiting the function.
> >
> > Link: http://tracker.ceph.com/issues/18148
> > Signed-off-by: Jeff Layton <jlayton@redhat.com>
> > ---
> > fs/ceph/mds_client.c | 67 +++++++++++++++++++++++++++++++++++-----------------
> > 1 file changed, 45 insertions(+), 22 deletions(-)
> >
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index 815acd1a56d4..e62b566d3817 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -667,6 +667,27 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
> > }
> >
> > /*
> > + * Walk back up the dentry tree until we hit a dentry representing a
> > + * non-snapshot inode. We do this using the rcu_read_lock (which must be held
> > + * when calling this) to ensure that the objects won't disappear while we're
> > + * working with them. Once we hit a candidate dentry, we attempt to take a
> > + * reference to it, and return that as the result.
> > + */
> > +static struct inode *get_nonsnap_parent(struct dentry *dentry) { struct inode
> > + *inode = NULL;
> > +
> > + while (dentry && !IS_ROOT(dentry)) {
> > + inode = d_inode_rcu(dentry);
> > + if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
> > + break;
> > + dentry = dentry->d_parent;
> > + }
> > + if (inode)
> > + inode = igrab(inode);
> > + return inode;
> > +}
> > +
> > +/*
> > * Choose mds to send request to next. If there is a hint set in the
> > * request (e.g., due to a prior forward hint from the mds), use that.
> > * Otherwise, consult frag tree and/or caps to identify the
> > @@ -674,19 +695,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
> > *
> > * Called under mdsc->mutex.
> > */
> > -static struct dentry *get_nonsnap_parent(struct dentry *dentry)
> > -{
> > - /*
> > - * we don't need to worry about protecting the d_parent access
> > - * here because we never renaming inside the snapped namespace
> > - * except to resplice to another snapdir, and either the old or new
> > - * result is a valid result.
> > - */
> > - while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
> > - dentry = dentry->d_parent;
> > - return dentry;
> > -}
> > -
> > static int __choose_mds(struct ceph_mds_client *mdsc,
> > struct ceph_mds_request *req)
> > {
> > @@ -716,30 +724,42 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> > inode = NULL;
> > if (req->r_inode) {
> > inode = req->r_inode;
> > + ihold(inode);
> > + } else if (req->r_locked_dir) {
> > + inode = req->r_locked_dir;
> > + ihold(inode);
>
>
> this seems incorrect. how about following incremental patch
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index c834d7d..4abc85f 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -725,9 +725,6 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> if (req->r_inode) {
> inode = req->r_inode;
> ihold(inode);
> - } else if (req->r_locked_dir) {
> - inode = req->r_locked_dir;
> - ihold(inode);
> } else if (req->r_dentry) {
> /* ignore race with rename; old or new d_parent is okay */
> struct dentry *parent;
> @@ -735,7 +732,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
>
> rcu_read_lock();
> parent = req->r_dentry->d_parent;
> - dir = d_inode_rcu(parent);
> + dir = req->r_locked_dir ? : d_inode_rcu(parent);
>
> if (!dir || dir->i_sb != mdsc->fsc->sb) {
> /* not this fs or parent went negative */
>
>
> Regards
> Yan, Zheng
Ahh ok...
My original concern there was that we wouldn't have any guarantee that
d_inode(parent) == req->r_locked_dir, and then the follow on checks
might be wrong.
But...if we can rely on the fact that we hold the parent's mutex there
(which I think is what r_locked_dir is supposed to indicate), then I
think we can be certain of it.
I'll fold your patch in with the original.
Thanks!
> >
> > } else if (req->r_dentry) {
> > /* ignore race with rename; old or new d_parent is okay */
> > - struct dentry *parent = req->r_dentry->d_parent;
> > - struct inode *dir = d_inode(parent);
> > + struct dentry *parent;
> > + struct inode *dir;
> > +
> > + rcu_read_lock();
> > + parent = req->r_dentry->d_parent;
> > + dir = d_inode_rcu(parent);
> >
> > - if (dir->i_sb != mdsc->fsc->sb) {
> > - /* not this fs! */
> > + if (!dir || dir->i_sb != mdsc->fsc->sb) {
> > + /* not this fs or parent went negative */
> > inode = d_inode(req->r_dentry);
> > + if (inode)
> > + ihold(inode);
> > } else if (ceph_snap(dir) != CEPH_NOSNAP) {
> > /* direct snapped/virtual snapdir requests
> > * based on parent dir inode */
> > - struct dentry *dn = get_nonsnap_parent(parent);
> > - inode = d_inode(dn);
> > + inode = get_nonsnap_parent(parent);
> > dout("__choose_mds using nonsnap parent %p\n", inode);
> > } else {
> > /* dentry target */
> > inode = d_inode(req->r_dentry);
> > if (!inode || mode == USE_AUTH_MDS) {
> > /* dir + name */
> > - inode = dir;
> > + inode = igrab(dir);
> > hash = ceph_dentry_hash(dir, req->r_dentry);
> > is_hash = true;
> > + } else {
> > + ihold(inode);
> > }
> > }
> > + rcu_read_unlock();
> > }
> >
> > dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
> > @@ -768,7 +788,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> > (int)r, frag.ndist);
> > if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
> > CEPH_MDS_STATE_ACTIVE)
> > - return mds;
> > + goto out;
> > }
> >
> > /* since this file/dir wasn't known to be
> > @@ -783,7 +803,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> > inode, ceph_vinop(inode), frag.frag, mds);
> > if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
> > CEPH_MDS_STATE_ACTIVE)
> > - return mds;
> > + goto out;
> > }
> > }
> > }
> > @@ -796,6 +816,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> > cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
> > if (!cap) {
> > spin_unlock(&ci->i_ceph_lock);
> > + iput(inode);
> > goto random;
> > }
> > mds = cap->session->s_mds;
> > @@ -803,6 +824,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
> > inode, ceph_vinop(inode), mds,
> > cap == ci->i_auth_cap ? "auth " : "", cap);
> > spin_unlock(&ci->i_ceph_lock);
> > +out:
> > + iput(inode);
> > return mds;
> >
> > random:
> > --
> > 2.7.4
> >
>
--
Jeff Layton <jlayton@redhat.com>
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 2/5] ceph: clean up unsafe d_parent accesses in build_dentry_path
2016-12-14 14:54 [PATCH v3 0/5] ceph: clean up some unsafe dentry->d_parent accesses Jeff Layton
2016-12-14 14:54 ` [PATCH v3 1/5] ceph: clean up unsafe d_parent access in __choose_mds Jeff Layton
@ 2016-12-14 14:54 ` Jeff Layton
2016-12-14 14:54 ` [PATCH v3 3/5] ceph: pass parent dir ino info to build_dentry_path Jeff Layton
` (2 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Jeff Layton @ 2016-12-14 14:54 UTC (permalink / raw)
To: ceph-devel; +Cc: zyan, sage, idryomov, linux-fsdevel, dhowells, viro
While we hold a reference to the dentry when build_dentry_path is
called, we could end up racing with a rename that changes d_parent.
Handle that situation correctly, by using the rcu_read_lock to
ensure that the parent dentry and inode stick around long enough
to safely check ceph_snap and ceph_ino.
Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
fs/ceph/mds_client.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e62b566d3817..2cd9e6b04e29 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1802,13 +1802,18 @@ static int build_dentry_path(struct dentry *dentry,
int *pfreepath)
{
char *path;
+ struct inode *dir;
- if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
- *pino = ceph_ino(d_inode(dentry->d_parent));
+ rcu_read_lock();
+ dir = d_inode_rcu(dentry->d_parent);
+ if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+ *pino = ceph_ino(dir);
+ rcu_read_unlock();
*ppath = dentry->d_name.name;
*ppathlen = dentry->d_name.len;
return 0;
}
+ rcu_read_unlock();
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
if (IS_ERR(path))
return PTR_ERR(path);
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 3/5] ceph: pass parent dir ino info to build_dentry_path
2016-12-14 14:54 [PATCH v3 0/5] ceph: clean up some unsafe dentry->d_parent accesses Jeff Layton
2016-12-14 14:54 ` [PATCH v3 1/5] ceph: clean up unsafe d_parent access in __choose_mds Jeff Layton
2016-12-14 14:54 ` [PATCH v3 2/5] ceph: clean up unsafe d_parent accesses in build_dentry_path Jeff Layton
@ 2016-12-14 14:54 ` Jeff Layton
2016-12-15 9:11 ` Yan, Zheng
2016-12-14 14:54 ` [PATCH v3 4/5] ceph: fix unsafe dcache access in ceph_encode_dentry_release Jeff Layton
2016-12-14 14:54 ` [PATCH v3 5/5] ceph: pass parent inode info to ceph_encode_dentry_release if we have it Jeff Layton
4 siblings, 1 reply; 9+ messages in thread
From: Jeff Layton @ 2016-12-14 14:54 UTC (permalink / raw)
To: ceph-devel; +Cc: zyan, sage, idryomov, linux-fsdevel, dhowells, viro
In the event that we have a parent inode reference in the request, we
can use that instead of mucking about in the dcache. Pass any parent
inode info we have down to build_dentry_path so it can make use of it.
Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
fs/ceph/mds_client.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2cd9e6b04e29..26cc17ee6d65 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1797,15 +1797,15 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
return path;
}
-static int build_dentry_path(struct dentry *dentry,
+static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
int *pfreepath)
{
char *path;
- struct inode *dir;
rcu_read_lock();
- dir = d_inode_rcu(dentry->d_parent);
+ if (!dir)
+ dir = d_inode_rcu(dentry->d_parent);
if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir);
rcu_read_unlock();
@@ -1849,8 +1849,8 @@ static int build_inode_path(struct inode *inode,
* an explicit ino+path.
*/
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
- const char *rpath, u64 rino,
- const char **ppath, int *pathlen,
+ struct inode *rdirino, const char *rpath,
+ u64 rino, const char **ppath, int *pathlen,
u64 *ino, int *freepath)
{
int r = 0;
@@ -1860,7 +1860,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
ceph_snap(rinode));
} else if (rdentry) {
- r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+ r = build_dentry_path(rdentry, rdirino, ppath, pathlen, ino, freepath);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -1893,7 +1893,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
int ret;
ret = set_request_path_attr(req->r_inode, req->r_dentry,
- req->r_path1, req->r_ino1.ino,
+ req->r_locked_dir, req->r_path1, req->r_ino1.ino,
&path1, &pathlen1, &ino1, &freepath1);
if (ret < 0) {
msg = ERR_PTR(ret);
@@ -1901,6 +1901,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
}
ret = set_request_path_attr(NULL, req->r_old_dentry,
+ req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path2, &pathlen2, &ino2, &freepath2);
if (ret < 0) {
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v3 3/5] ceph: pass parent dir ino info to build_dentry_path
2016-12-14 14:54 ` [PATCH v3 3/5] ceph: pass parent dir ino info to build_dentry_path Jeff Layton
@ 2016-12-15 9:11 ` Yan, Zheng
0 siblings, 0 replies; 9+ messages in thread
From: Yan, Zheng @ 2016-12-15 9:11 UTC (permalink / raw)
To: Jeff Layton
Cc: ceph-devel, Sage Weil, Ilya Dryomov, linux-fsdevel, David Howells,
viro
> On 14 Dec 2016, at 22:54, Jeff Layton <jlayton@redhat.com> wrote:
>
> In the event that we have a parent inode reference in the request, we
> can use that instead of mucking about in the dcache. Pass any parent
> inode info we have down to build_dentry_path so it can make use of it.
>
> Link: http://tracker.ceph.com/issues/18148
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
> fs/ceph/mds_client.c | 15 ++++++++-------
> 1 file changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 2cd9e6b04e29..26cc17ee6d65 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1797,15 +1797,15 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
> return path;
> }
>
> -static int build_dentry_path(struct dentry *dentry,
> +static int build_dentry_path(struct dentry *dentry, struct inode *dir,
> const char **ppath, int *ppathlen, u64 *pino,
> int *pfreepath)
> {
> char *path;
> - struct inode *dir;
>
> rcu_read_lock();
> - dir = d_inode_rcu(dentry->d_parent);
> + if (!dir)
> + dir = d_inode_rcu(dentry->d_parent);
> if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
> *pino = ceph_ino(dir);
> rcu_read_unlock();
> @@ -1849,8 +1849,8 @@ static int build_inode_path(struct inode *inode,
> * an explicit ino+path.
> */
> static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
> - const char *rpath, u64 rino,
> - const char **ppath, int *pathlen,
> + struct inode *rdirino, const char *rpath,
‘rdirino' looks strange (it’s not inode number), how about changing it to rdiri?
Regards
Yan, Zheng
> + u64 rino, const char **ppath, int *pathlen,
> u64 *ino, int *freepath)
> {
> int r = 0;
> @@ -1860,7 +1860,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
> dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
> ceph_snap(rinode));
> } else if (rdentry) {
> - r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
> + r = build_dentry_path(rdentry, rdirino, ppath, pathlen, ino, freepath);
> dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
> *ppath);
> } else if (rpath || rino) {
> @@ -1893,7 +1893,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
> int ret;
>
> ret = set_request_path_attr(req->r_inode, req->r_dentry,
> - req->r_path1, req->r_ino1.ino,
> + req->r_locked_dir, req->r_path1, req->r_ino1.ino,
> &path1, &pathlen1, &ino1, &freepath1);
> if (ret < 0) {
> msg = ERR_PTR(ret);
> @@ -1901,6 +1901,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
> }
>
> ret = set_request_path_attr(NULL, req->r_old_dentry,
> + req->r_old_dentry_dir,
> req->r_path2, req->r_ino2.ino,
> &path2, &pathlen2, &ino2, &freepath2);
> if (ret < 0) {
> --
> 2.7.4
>
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v3 4/5] ceph: fix unsafe dcache access in ceph_encode_dentry_release
2016-12-14 14:54 [PATCH v3 0/5] ceph: clean up some unsafe dentry->d_parent accesses Jeff Layton
` (2 preceding siblings ...)
2016-12-14 14:54 ` [PATCH v3 3/5] ceph: pass parent dir ino info to build_dentry_path Jeff Layton
@ 2016-12-14 14:54 ` Jeff Layton
2016-12-14 14:54 ` [PATCH v3 5/5] ceph: pass parent inode info to ceph_encode_dentry_release if we have it Jeff Layton
4 siblings, 0 replies; 9+ messages in thread
From: Jeff Layton @ 2016-12-14 14:54 UTC (permalink / raw)
To: ceph-devel; +Cc: zyan, sage, idryomov, linux-fsdevel, dhowells, viro
Accessing d_parent requires some sort of locking or it could vanish
out from under us. Since we take the d_lock anyway, use that to fetch
d_parent and take a reference to it, and then use that reference to
call ceph_encode_inode_release.
Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
fs/ceph/caps.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 16e6ded0b7f2..6bc5c1efbb26 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3834,7 +3834,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
int mds, int drop, int unless)
{
- struct inode *dir = d_inode(dentry->d_parent);
+ struct dentry *parent;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
@@ -3849,9 +3849,12 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
+ parent = dget(dentry->d_parent);
spin_unlock(&dentry->d_lock);
- ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+ ret = ceph_encode_inode_release(p, d_inode(parent), mds, drop,
+ unless, force);
+ dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v3 5/5] ceph: pass parent inode info to ceph_encode_dentry_release if we have it
2016-12-14 14:54 [PATCH v3 0/5] ceph: clean up some unsafe dentry->d_parent accesses Jeff Layton
` (3 preceding siblings ...)
2016-12-14 14:54 ` [PATCH v3 4/5] ceph: fix unsafe dcache access in ceph_encode_dentry_release Jeff Layton
@ 2016-12-14 14:54 ` Jeff Layton
4 siblings, 0 replies; 9+ messages in thread
From: Jeff Layton @ 2016-12-14 14:54 UTC (permalink / raw)
To: ceph-devel; +Cc: zyan, sage, idryomov, linux-fsdevel, dhowells, viro
If we have a parent inode reference already, then we don't need to
go back up the directory tree to find one.
Link: http://tracker.ceph.com/issues/18148
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
fs/ceph/caps.c | 11 +++++++----
fs/ceph/mds_client.c | 7 +++++--
fs/ceph/super.h | 1 +
3 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6bc5c1efbb26..e960b51dcae1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3832,9 +3832,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
}
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+ struct inode *dir,
int mds, int drop, int unless)
{
- struct dentry *parent;
+ struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
@@ -3849,11 +3850,13 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
- parent = dget(dentry->d_parent);
+ if (!dir) {
+ parent = dget(dentry->d_parent);
+ dir = d_inode(parent);
+ }
spin_unlock(&dentry->d_lock);
- ret = ceph_encode_inode_release(p, d_inode(parent), mds, drop,
- unless, force);
+ ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
dput(parent);
spin_lock(&dentry->d_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 26cc17ee6d65..593b22c74e82 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1955,10 +1955,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
mds, req->r_inode_drop, req->r_inode_unless, 0);
if (req->r_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_dentry,
- mds, req->r_dentry_drop, req->r_dentry_unless);
+ req->r_locked_dir, mds, req->r_dentry_drop,
+ req->r_dentry_unless);
if (req->r_old_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
- mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+ req->r_old_dentry_dir, mds,
+ req->r_old_dentry_drop,
+ req->r_old_dentry_unless);
if (req->r_old_inode_drop)
releases += ceph_encode_inode_release(&p,
d_inode(req->r_old_dentry),
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3e3fa9163059..e4ca0b718b6f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -901,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
int mds, int drop, int unless, int force);
extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+ struct inode *dir,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread