All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Luís Henriques" <lhenriques@suse.de>
To: xiubli@redhat.com
Cc: idryomov@gmail.com, ceph-devel@vger.kernel.org,
	jlayton@kernel.org, vshankar@redhat.com, mchangir@redhat.com,
	stable@vger.kernel.org
Subject: Re: [PATCH v3] ceph: fix potential use-after-free bug when trimming caps
Date: Tue, 18 Apr 2023 15:20:57 +0100	[thread overview]
Message-ID: <877cu9w9ti.fsf@brahms.olymp> (raw)
In-Reply-To: <20230418014506.95428-1-xiubli@redhat.com> (xiubli@redhat.com's message of "Tue, 18 Apr 2023 09:45:06 +0800")

xiubli@redhat.com writes:

> From: Xiubo Li <xiubli@redhat.com>
>
> When trimming the caps and just after the 'session->s_cap_lock' is
> released in ceph_iterate_session_caps() the cap maybe removed by
> another thread, and when using the stale cap memory in the callbacks
> it will trigger use-after-free crash.
>
> We need to check the existence of the cap just after the 'ci->i_ceph_lock'
> being acquired. And do nothing if it's already removed.

Your patch seems to be OK, but I'll be honest: the locking is *so* complex
that I can say for sure it really solves any problem :-(

ceph_put_cap() uses mdsc->caps_list_lock to protect the list, but I can't
be sure that holding ci->i_ceph_lock will protect a race in the case
you're trying to solve.

Is the issue in that bugzilla reproducible, or was that a one-time thing?

Cheers,
-- 
Luís


> Cc: stable@vger.kernel.org
> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2186264
> URL: https://tracker.ceph.com/issues/43272
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V3:
> - Fixed 3 issues, which reported by Luis and kernel test robot <lkp@intel.com>
>
>  fs/ceph/debugfs.c    |  7 ++++-
>  fs/ceph/mds_client.c | 68 +++++++++++++++++++++++++++++---------------
>  fs/ceph/mds_client.h |  2 +-
>  3 files changed, 52 insertions(+), 25 deletions(-)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index bec3c4549c07..5c0f07df5b02 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -248,14 +248,19 @@ static int metrics_caps_show(struct seq_file *s, void *p)
>  	return 0;
>  }
>  
> -static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
> +static int caps_show_cb(struct inode *inode, struct rb_node *ci_node, void *p)
>  {
> +	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct seq_file *s = p;
> +	struct ceph_cap *cap;
>  
> +	spin_lock(&ci->i_ceph_lock);
> +	cap = rb_entry(ci_node, struct ceph_cap, ci_node);
>  	seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
>  		   cap->session->s_mds,
>  		   ceph_cap_string(cap->issued),
>  		   ceph_cap_string(cap->implemented));
> +	spin_unlock(&ci->i_ceph_lock);
>  	return 0;
>  }
>  
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 294af79c25c9..fb777add2257 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1786,7 +1786,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
>   * Caller must hold session s_mutex.
>   */
>  int ceph_iterate_session_caps(struct ceph_mds_session *session,
> -			      int (*cb)(struct inode *, struct ceph_cap *,
> +			      int (*cb)(struct inode *, struct rb_node *ci_node,
>  					void *), void *arg)
>  {
>  	struct list_head *p;
> @@ -1799,6 +1799,8 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  	spin_lock(&session->s_cap_lock);
>  	p = session->s_caps.next;
>  	while (p != &session->s_caps) {
> +		struct rb_node *ci_node;
> +
>  		cap = list_entry(p, struct ceph_cap, session_caps);
>  		inode = igrab(&cap->ci->netfs.inode);
>  		if (!inode) {
> @@ -1806,6 +1808,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  			continue;
>  		}
>  		session->s_cap_iterator = cap;
> +		ci_node = &cap->ci_node;
>  		spin_unlock(&session->s_cap_lock);
>  
>  		if (last_inode) {
> @@ -1817,7 +1820,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  			old_cap = NULL;
>  		}
>  
> -		ret = cb(inode, cap, arg);
> +		ret = cb(inode, ci_node, arg);
>  		last_inode = inode;
>  
>  		spin_lock(&session->s_cap_lock);
> @@ -1850,20 +1853,26 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  	return ret;
>  }
>  
> -static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> +static int remove_session_caps_cb(struct inode *inode, struct rb_node *ci_node,
>  				  void *arg)
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	bool invalidate = false;
> -	int iputs;
> +	struct ceph_cap *cap;
> +	int iputs = 0;
>  
> -	dout("removing cap %p, ci is %p, inode is %p\n",
> -	     cap, ci, &ci->netfs.inode);
>  	spin_lock(&ci->i_ceph_lock);
> -	iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
> +	cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> +	if (cap) {
> +		dout(" removing cap %p, ci is %p, inode is %p\n",
> +		     cap, ci, &ci->netfs.inode);
> +
> +		iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
> +	}
>  	spin_unlock(&ci->i_ceph_lock);
>  
> -	wake_up_all(&ci->i_cap_wq);
> +	if (cap)
> +		wake_up_all(&ci->i_cap_wq);
>  	if (invalidate)
>  		ceph_queue_invalidate(inode);
>  	while (iputs--)
> @@ -1934,8 +1943,7 @@ enum {
>   *
>   * caller must hold s_mutex.
>   */
> -static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
> -			      void *arg)
> +static int wake_up_session_cb(struct inode *inode, struct rb_node *ci_node, void *arg)
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	unsigned long ev = (unsigned long)arg;
> @@ -1946,12 +1954,14 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
>  		ci->i_requested_max_size = 0;
>  		spin_unlock(&ci->i_ceph_lock);
>  	} else if (ev == RENEWCAPS) {
> -		if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) {
> -			/* mds did not re-issue stale cap */
> -			spin_lock(&ci->i_ceph_lock);
> +		struct ceph_cap *cap;
> +
> +		spin_lock(&ci->i_ceph_lock);
> +		cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> +		/* mds did not re-issue stale cap */
> +		if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen))
>  			cap->issued = cap->implemented = CEPH_CAP_PIN;
> -			spin_unlock(&ci->i_ceph_lock);
> -		}
> +		spin_unlock(&ci->i_ceph_lock);
>  	} else if (ev == FORCE_RO) {
>  	}
>  	wake_up_all(&ci->i_cap_wq);
> @@ -2113,16 +2123,22 @@ static bool drop_negative_children(struct dentry *dentry)
>   * Yes, this is a bit sloppy.  Our only real goal here is to respond to
>   * memory pressure from the MDS, though, so it needn't be perfect.
>   */
> -static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
> +static int trim_caps_cb(struct inode *inode, struct rb_node *ci_node, void *arg)
>  {
>  	int *remaining = arg;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	int used, wanted, oissued, mine;
> +	struct ceph_cap *cap;
>  
>  	if (*remaining <= 0)
>  		return -1;
>  
>  	spin_lock(&ci->i_ceph_lock);
> +	cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> +	if (!cap) {
> +		spin_unlock(&ci->i_ceph_lock);
> +		return 0;
> +	}
>  	mine = cap->issued | cap->implemented;
>  	used = __ceph_caps_used(ci);
>  	wanted = __ceph_caps_file_wanted(ci);
> @@ -4265,26 +4281,23 @@ static struct dentry* d_find_primary(struct inode *inode)
>  /*
>   * Encode information about a cap for a reconnect with the MDS.
>   */
> -static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
> +static int reconnect_caps_cb(struct inode *inode, struct rb_node *ci_node,
>  			  void *arg)
>  {
>  	union {
>  		struct ceph_mds_cap_reconnect v2;
>  		struct ceph_mds_cap_reconnect_v1 v1;
>  	} rec;
> -	struct ceph_inode_info *ci = cap->ci;
> +	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_reconnect_state *recon_state = arg;
>  	struct ceph_pagelist *pagelist = recon_state->pagelist;
>  	struct dentry *dentry;
> +	struct ceph_cap *cap;
>  	char *path;
> -	int pathlen = 0, err;
> +	int pathlen = 0, err = 0;
>  	u64 pathbase;
>  	u64 snap_follows;
>  
> -	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
> -	     inode, ceph_vinop(inode), cap, cap->cap_id,
> -	     ceph_cap_string(cap->issued));
> -
>  	dentry = d_find_primary(inode);
>  	if (dentry) {
>  		/* set pathbase to parent dir when msg_version >= 2 */
> @@ -4301,6 +4314,15 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	}
>  
>  	spin_lock(&ci->i_ceph_lock);
> +	cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> +	if (!cap) {
> +		spin_unlock(&ci->i_ceph_lock);
> +		goto out_err;
> +	}
> +	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
> +	     inode, ceph_vinop(inode), cap, cap->cap_id,
> +	     ceph_cap_string(cap->issued));
> +
>  	cap->seq = 0;        /* reset cap seq */
>  	cap->issue_seq = 0;  /* and issue_seq */
>  	cap->mseq = 0;       /* and migrate_seq */
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 0f70ca3cdb21..001b69d04307 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -569,7 +569,7 @@ extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
>  extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
>  extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
>  				     int (*cb)(struct inode *,
> -					       struct ceph_cap *, void *),
> +					       struct rb_node *ci_node, void *),
>  				     void *arg);
>  extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
>  
> -- 
>
> 2.39.2
>

  reply	other threads:[~2023-04-18 14:21 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-18  1:45 [PATCH v3] ceph: fix potential use-after-free bug when trimming caps xiubli
2023-04-18 14:20 ` Luís Henriques [this message]
2023-04-19  2:28   ` Xiubo Li
2023-04-19 13:22     ` Luís Henriques
2023-04-20  1:14       ` Xiubo Li
2023-04-30  8:39       ` Ilya Dryomov
2023-05-01 10:37         ` Luís Henriques

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877cu9w9ti.fsf@brahms.olymp \
    --to=lhenriques@suse.de \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=mchangir@redhat.com \
    --cc=stable@vger.kernel.org \
    --cc=vshankar@redhat.com \
    --cc=xiubli@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.