From: "Luís Henriques" <lhenriques@suse.de>
To: xiubli@redhat.com
Cc: idryomov@gmail.com, ceph-devel@vger.kernel.org,
jlayton@kernel.org, vshankar@redhat.com, mchangir@redhat.com,
stable@vger.kernel.org
Subject: Re: [PATCH v2] ceph: fix potential use-after-free bug when trimming caps
Date: Mon, 17 Apr 2023 16:55:52 +0100 [thread overview]
Message-ID: <87354yec53.fsf@brahms.olymp> (raw)
In-Reply-To: <20230417120850.60880-1-xiubli@redhat.com> (xiubli@redhat.com's message of "Mon, 17 Apr 2023 20:08:50 +0800")
xiubli@redhat.com writes:
> From: Xiubo Li <xiubli@redhat.com>
>
> When trimming the caps and just after the 'session->s_cap_lock' is
> released in ceph_iterate_session_caps() the cap maybe removed by
> another thread, and when using the stale cap memory in the callbacks
> it will trigger use-after-free crash.
>
> We need to check the existence of the cap just after the 'ci->i_ceph_lock'
> being acquired. And do nothing if it's already removed.
>
> Cc: stable@vger.kernel.org
> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2186264
I didn't had time to look closer at what this patch is fixing but the
above URL requires a account to access it. So I guess it should be
dropped or replaced by another one from the tracker...?
Also, just skimming through the patch, there are at least 2 obvious issues
with it. See below.
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V2:
> - Fix this in ceph_iterate_session_caps instead.
>
>
> fs/ceph/debugfs.c | 7 +++++-
> fs/ceph/mds_client.c | 56 ++++++++++++++++++++++++++++++--------------
> fs/ceph/mds_client.h | 2 +-
> 3 files changed, 46 insertions(+), 19 deletions(-)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index bec3c4549c07..5c0f07df5b02 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -248,14 +248,19 @@ static int metrics_caps_show(struct seq_file *s, void *p)
> return 0;
> }
>
> -static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
> +static int caps_show_cb(struct inode *inode, struct rb_node *ci_node, void *p)
> {
> + struct ceph_inode_info *ci = ceph_inode(inode);
> struct seq_file *s = p;
> + struct ceph_cap *cap;
>
> + spin_lock(&ci->i_ceph_lock);
> + cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
> cap->session->s_mds,
> ceph_cap_string(cap->issued),
> ceph_cap_string(cap->implemented));
> + spin_unlock(&ci->i_ceph_lock);
> return 0;
> }
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 294af79c25c9..7fcfbddd534d 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1786,7 +1786,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
> * Caller must hold session s_mutex.
> */
> int ceph_iterate_session_caps(struct ceph_mds_session *session,
> - int (*cb)(struct inode *, struct ceph_cap *,
> + int (*cb)(struct inode *, struct rb_node *ci_node,
> void *), void *arg)
> {
> struct list_head *p;
> @@ -1799,6 +1799,8 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
> spin_lock(&session->s_cap_lock);
> p = session->s_caps.next;
> while (p != &session->s_caps) {
> + struct rb_node *ci_node;
> +
> cap = list_entry(p, struct ceph_cap, session_caps);
> inode = igrab(&cap->ci->netfs.inode);
> if (!inode) {
> @@ -1806,6 +1808,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
> continue;
> }
> session->s_cap_iterator = cap;
> + ci_node = &cap->ci_node;
> spin_unlock(&session->s_cap_lock);
>
> if (last_inode) {
> @@ -1817,7 +1820,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
> old_cap = NULL;
> }
>
> - ret = cb(inode, cap, arg);
> + ret = cb(inode, ci_node, arg);
> last_inode = inode;
>
> spin_lock(&session->s_cap_lock);
> @@ -1850,17 +1853,22 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
> return ret;
> }
>
> -static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> +static int remove_session_caps_cb(struct inode *inode, struct rb_node *ci_node,
> void *arg)
> {
> struct ceph_inode_info *ci = ceph_inode(inode);
> bool invalidate = false;
> + struct ceph_cap *cap;
> int iputs;
>
> - dout("removing cap %p, ci is %p, inode is %p\n",
> - cap, ci, &ci->netfs.inode);
> spin_lock(&ci->i_ceph_lock);
> - iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
This will leave iputs uninitialized if the statement below returns NULL.
Which will cause issues later in the function.
> + cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> + if (cap) {
> + dout(" removing cap %p, ci is %p, inode is %p\n",
> + cap, ci, &ci->netfs.inode);
> +
> + iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
> + }
> spin_unlock(&ci->i_ceph_lock);
>
> wake_up_all(&ci->i_cap_wq);
> @@ -1934,11 +1942,11 @@ enum {
> *
> * caller must hold s_mutex.
> */
> -static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
> - void *arg)
> +static int wake_up_session_cb(struct inode *inode, struct rb_node *ci_node, void *arg)
> {
> struct ceph_inode_info *ci = ceph_inode(inode);
> unsigned long ev = (unsigned long)arg;
> + struct ceph_cap *cap;
>
> if (ev == RECONNECT) {
> spin_lock(&ci->i_ceph_lock);
> @@ -1949,7 +1957,9 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
> if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) {
Since we're replacing the 'cap' argument by the 'ci_node', the
above statement will have garbage in 'cap'.
Cheers,
--
Luís
> /* mds did not re-issue stale cap */
> spin_lock(&ci->i_ceph_lock);
> - cap->issued = cap->implemented = CEPH_CAP_PIN;
> + cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> + if (cap)
> + cap->issued = cap->implemented = CEPH_CAP_PIN;
> spin_unlock(&ci->i_ceph_lock);
> }
> } else if (ev == FORCE_RO) {
> @@ -2113,16 +2123,22 @@ static bool drop_negative_children(struct dentry *dentry)
> * Yes, this is a bit sloppy. Our only real goal here is to respond to
> * memory pressure from the MDS, though, so it needn't be perfect.
> */
> -static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
> +static int trim_caps_cb(struct inode *inode, struct rb_node *ci_node, void *arg)
> {
> int *remaining = arg;
> struct ceph_inode_info *ci = ceph_inode(inode);
> int used, wanted, oissued, mine;
> + struct ceph_cap *cap;
>
> if (*remaining <= 0)
> return -1;
>
> spin_lock(&ci->i_ceph_lock);
> + cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> + if (!cap) {
> + spin_unlock(&ci->i_ceph_lock);
> + return 0;
> + }
> mine = cap->issued | cap->implemented;
> used = __ceph_caps_used(ci);
> wanted = __ceph_caps_file_wanted(ci);
> @@ -4265,26 +4281,23 @@ static struct dentry* d_find_primary(struct inode *inode)
> /*
> * Encode information about a cap for a reconnect with the MDS.
> */
> -static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
> +static int reconnect_caps_cb(struct inode *inode, struct rb_node *ci_node,
> void *arg)
> {
> union {
> struct ceph_mds_cap_reconnect v2;
> struct ceph_mds_cap_reconnect_v1 v1;
> } rec;
> - struct ceph_inode_info *ci = cap->ci;
> + struct ceph_inode_info *ci = ceph_inode(inode);
> struct ceph_reconnect_state *recon_state = arg;
> struct ceph_pagelist *pagelist = recon_state->pagelist;
> struct dentry *dentry;
> + struct ceph_cap *cap;
> char *path;
> - int pathlen = 0, err;
> + int pathlen = 0, err = 0;
> u64 pathbase;
> u64 snap_follows;
>
> - dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
> - inode, ceph_vinop(inode), cap, cap->cap_id,
> - ceph_cap_string(cap->issued));
> -
> dentry = d_find_primary(inode);
> if (dentry) {
> /* set pathbase to parent dir when msg_version >= 2 */
> @@ -4301,6 +4314,15 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
> }
>
> spin_lock(&ci->i_ceph_lock);
> + cap = rb_entry(ci_node, struct ceph_cap, ci_node);
> + if (!cap) {
> + spin_lock(&ci->i_ceph_lock);
> + goto out_err;
> + }
> + dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
> + inode, ceph_vinop(inode), cap, cap->cap_id,
> + ceph_cap_string(cap->issued));
> +
> cap->seq = 0; /* reset cap seq */
> cap->issue_seq = 0; /* and issue_seq */
> cap->mseq = 0; /* and migrate_seq */
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index 0f70ca3cdb21..001b69d04307 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -569,7 +569,7 @@ extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
> extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
> extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
> int (*cb)(struct inode *,
> - struct ceph_cap *, void *),
> + struct rb_node *ci_node, void *),
> void *arg);
> extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
>
> --
>
> 2.39.2
>
next prev parent reply other threads:[~2023-04-17 15:56 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-17 12:08 [PATCH v2] ceph: fix potential use-after-free bug when trimming caps xiubli
2023-04-17 15:55 ` Luís Henriques [this message]
2023-04-18 0:46 ` Xiubo Li
2023-04-18 0:48 ` Xiubo Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87354yec53.fsf@brahms.olymp \
--to=lhenriques@suse.de \
--cc=ceph-devel@vger.kernel.org \
--cc=idryomov@gmail.com \
--cc=jlayton@kernel.org \
--cc=mchangir@redhat.com \
--cc=stable@vger.kernel.org \
--cc=vshankar@redhat.com \
--cc=xiubli@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.