All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Luís Henriques" <lhenriques@suse.de>
To: Xiubo Li <xiubli@redhat.com>
Cc: jlayton@kernel.org, idryomov@gmail.com, vshankar@redhat.com,
	ceph-devel@vger.kernel.org
Subject: Re: [PATCH 2/2] ceph: update the auth cap when the async create req is forwarded
Date: Mon, 13 Jun 2022 17:07:34 +0100	[thread overview]
Message-ID: <87r13seed5.fsf@brahms.olymp> (raw)
In-Reply-To: <20220610043140.642501-3-xiubli@redhat.com> (Xiubo Li's message of "Fri, 10 Jun 2022 12:31:40 +0800")

Xiubo Li <xiubli@redhat.com> writes:

> For async create we will always try to choose the auth MDS of frag
> the dentry belonged to of the parent directory to send the request
> and ususally this works fine, but if the MDS migrated the directory
> to another MDS before it could be handled the request will be
> forwarded. And then the auth cap will be changed.
>
> We need to update the auth cap in this case before the request is
> forwarded.
>
> URL: https://tracker.ceph.com/issues/55857
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/file.c       | 12 +++++++++
>  fs/ceph/mds_client.c | 58 ++++++++++++++++++++++++++++++++++++++++++++
>  fs/ceph/super.h      |  2 ++
>  3 files changed, 72 insertions(+)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 0e82a1c383ca..54acf76c5e9b 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -613,6 +613,7 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
>  	struct ceph_mds_reply_inode in = { };
>  	struct ceph_mds_reply_info_in iinfo = { .in = &in };
>  	struct ceph_inode_info *ci = ceph_inode(dir);
> +	struct ceph_dentry_info *di = ceph_dentry(dentry);
>  	struct timespec64 now;
>  	struct ceph_string *pool_ns;
>  	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
> @@ -709,6 +710,12 @@ static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
>  		file->f_mode |= FMODE_CREATED;
>  		ret = finish_open(file, dentry, ceph_open);
>  	}
> +
> +	spin_lock(&dentry->d_lock);
> +	di->flags &= ~CEPH_DENTRY_ASYNC_CREATE;
> +	wake_up_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT);
> +	spin_unlock(&dentry->d_lock);

Question: shouldn't we initialise 'di' *after* grabbing ->d_lock?  Ceph
code doesn't seem to be consistent with this regard, but my understanding
is that doing it this way is racy.  And if so, some other places may need
fixing.

Cheers,
-- 
Luís


> +
>  	return ret;
>  }
>  
> @@ -786,6 +793,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
>  				  try_prep_async_create(dir, dentry, &lo, &req->r_deleg_ino))) {
>  			struct ceph_vino vino = { .ino = req->r_deleg_ino,
>  						  .snap = CEPH_NOSNAP };
> +			struct ceph_dentry_info *di = ceph_dentry(dentry);
>  
>  			set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
>  			req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
> @@ -800,6 +808,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
>  			}
>  			WARN_ON_ONCE(!(new_inode->i_state & I_NEW));
>  
> +			spin_lock(&dentry->d_lock);
> +			di->flags |= CEPH_DENTRY_ASYNC_CREATE;
> +			spin_unlock(&dentry->d_lock);
> +
>  			err = ceph_mdsc_submit_request(mdsc, dir, req);
>  			if (!err) {
>  				err = ceph_finish_async_create(dir, new_inode, dentry,
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index fa7f719807d9..a413b389a535 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2993,6 +2993,64 @@ static void __do_request(struct ceph_mds_client *mdsc,
>  	if (req->r_request_started == 0)   /* note request start time */
>  		req->r_request_started = jiffies;
>  
> +	/*
> +	 * For async create we will choose the auth MDS of frag in parent
> +	 * directory to send the request and ususally this works fine, but
> +	 * if the migrated the dirtory to another MDS before it could handle
> +	 * it the request will be forwarded.
> +	 *
> +	 * And then the auth cap will be changed.
> +	 */
> +	if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) && req->r_num_fwd) {
> +		struct ceph_dentry_info *di = ceph_dentry(req->r_dentry);
> +		struct ceph_inode_info *ci;
> +		struct ceph_cap *cap;
> +
> +		/*
> +		 * The request maybe handled very fast and the new inode
> +		 * hasn't been linked to the dentry yet. We need to wait
> +		 * for the ceph_finish_async_create(), which shouldn't be
> +		 * stuck too long or fail in thoery, to finish when forwarding
> +		 * the request.
> +		 */
> +		if (!d_inode(req->r_dentry)) {
> +			err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT,
> +					  TASK_KILLABLE);
> +			if (err) {
> +				mutex_lock(&req->r_fill_mutex);
> +				set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
> +				mutex_unlock(&req->r_fill_mutex);
> +				goto out_session;
> +			}
> +		}
> +
> +		ci = ceph_inode(d_inode(req->r_dentry));
> +
> +		spin_lock(&ci->i_ceph_lock);
> +		cap = ci->i_auth_cap;
> +		if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) {
> +			dout("do_request session changed for auth cap %d -> %d\n",
> +			     cap->session->s_mds, session->s_mds);
> +
> +			/* Remove the auth cap from old session */
> +			spin_lock(&cap->session->s_cap_lock);
> +			cap->session->s_nr_caps--;
> +			list_del_init(&cap->session_caps);
> +			spin_unlock(&cap->session->s_cap_lock);
> +
> +			/* Add the auth cap to the new session */
> +			cap->mds = mds;
> +			cap->session = session;
> +			spin_lock(&session->s_cap_lock);
> +			session->s_nr_caps++;
> +			list_add_tail(&cap->session_caps, &session->s_caps);
> +			spin_unlock(&session->s_cap_lock);
> +
> +			change_auth_cap_ses(ci, session);
> +		}
> +		spin_unlock(&ci->i_ceph_lock);
> +	}
> +
>  	err = __send_request(session, req, false);
>  
>  out_session:
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 3bdd60a3e680..5ccafab21bbb 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -304,6 +304,8 @@ struct ceph_dentry_info {
>  #define CEPH_DENTRY_PRIMARY_LINK	(1 << 3)
>  #define CEPH_DENTRY_ASYNC_UNLINK_BIT	(4)
>  #define CEPH_DENTRY_ASYNC_UNLINK	(1 << CEPH_DENTRY_ASYNC_UNLINK_BIT)
> +#define CEPH_DENTRY_ASYNC_CREATE_BIT	(5)
> +#define CEPH_DENTRY_ASYNC_CREATE	(1 << CEPH_DENTRY_ASYNC_CREATE_BIT)
>  
>  struct ceph_inode_xattrs_info {
>  	/*
> -- 
>
> 2.36.0.rc1
>

  reply	other threads:[~2022-06-13 18:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-10  4:31 [PATCH 0/2] ceph: update the auth cap when the async create req is forwarded Xiubo Li
2022-06-10  4:31 ` [PATCH 1/2] ceph: make change_auth_cap_ses a global symbol Xiubo Li
2022-06-10  4:31 ` [PATCH 2/2] ceph: update the auth cap when the async create req is forwarded Xiubo Li
2022-06-13 16:07   ` Luís Henriques [this message]
2022-06-14  0:55     ` Xiubo Li
2022-06-14  8:36       ` Luís Henriques
2022-06-14  9:12         ` Xiubo Li
2022-06-14  1:02     ` Xiubo Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r13seed5.fsf@brahms.olymp \
    --to=lhenriques@suse.de \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=vshankar@redhat.com \
    --cc=xiubli@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.