linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: dai.ngo@oracle.com
To: "J. Bruce Fields" <bfields@fieldses.org>
Cc: chuck.lever@oracle.com, linux-nfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v3 2/3] nfsd: Initial implementation of NFSv4 Courteous Server
Date: Thu, 23 Sep 2021 10:09:35 -0700	[thread overview]
Message-ID: <9e33d9b7-5947-488d-343f-80c86a27fd84@oracle.com> (raw)
In-Reply-To: <20210923013458.GE22937@fieldses.org>

On 9/22/21 6:34 PM, J. Bruce Fields wrote:
> On Thu, Sep 16, 2021 at 02:22:11PM -0400, Dai Ngo wrote:
>> +/*
>> + * If the conflict happens due to a NFSv4 request then check for
>> + * courtesy client and set rq_conflict_client so that upper layer
>> + * can destroy the conflict client and retry the call.
>> + */
> I think we need a different approach.

I think nfsd_check_courtesy_client is used to handle conflict with
delegation. So instead of using rq_conflict_client to let the caller
knows and destroy the courtesy client as the current patch does, we
can ask the laundromat thread to do the destroy. In that case,
nfs4_get_vfs_file in nfsd4_process_open2 will either return no error
since the the laufromat destroyed the courtesy client or it gets
get nfserr_jukebox which causes the NFS client to retry. By the time
the retry comes the courtesy client should already be destroyed.

>   Wouldn't we need to take a
> reference on clp when we assign to rq_conflict_client?

we won't need rq_conflict_client with the new approach.

>
> What happens if there are multiple leases found in the loop in
> __break_lease?

this should no longer be a problem also.

>
> It doesn't seem right that we'd need to treat the case where nfsd is the
> breaker differently the case where it's another process.
>
> I'm not sure what to suggest instead, though....  Maybe as with locks we
> need two separate callbacks, one that tests whether there's a courtesy
> client that needs removing, one that does it after we've dropped the

I will try the new approach if you don't see any obvious problems
with it.

-Dai

> lock.
>
> --b.
>
>> +static bool
>> +nfsd_check_courtesy_client(struct nfs4_delegation *dp)
>> +{
>> +	struct svc_rqst *rqst;
>> +	struct nfs4_client *clp = dp->dl_recall.cb_clp;
>> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>> +	bool ret = false;
>> +
>> +	if (!i_am_nfsd()) {
>> +		if (test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags)) {
>> +			set_bit(NFSD4_DESTROY_COURTESY_CLIENT, &clp->cl_flags);
>> +			mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
>> +			return true;
>> +		}
>> +		return false;
>> +	}
>> +	rqst = kthread_data(current);
>> +	if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
>> +		return false;
>> +	rqst->rq_conflict_client = NULL;
>> +
>> +	spin_lock(&nn->client_lock);
>> +	if (test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags) &&
>> +				!mark_client_expired_locked(clp)) {
>> +		rqst->rq_conflict_client = clp;
>> +		ret = true;
>> +	}
>> +	spin_unlock(&nn->client_lock);
>> +	return ret;
>> +}
>> +
>>   /* Called from break_lease() with i_lock held. */
>>   static bool
>>   nfsd_break_deleg_cb(struct file_lock *fl)
>> @@ -4660,6 +4706,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
>>   	struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
>>   	struct nfs4_file *fp = dp->dl_stid.sc_file;
>>   
>> +	if (nfsd_check_courtesy_client(dp))
>> +		return false;
>>   	trace_nfsd_cb_recall(&dp->dl_stid);
>>   
>>   	/*
>> @@ -5279,6 +5327,22 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
>>   	 */
>>   }
>>   
>> +static bool
>> +nfs4_destroy_courtesy_client(struct nfs4_client *clp)
>> +{
>> +	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
>> +
>> +	spin_lock(&nn->client_lock);
>> +	if (!test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags) ||
>> +			mark_client_expired_locked(clp)) {
>> +		spin_unlock(&nn->client_lock);
>> +		return false;
>> +	}
>> +	spin_unlock(&nn->client_lock);
>> +	expire_client(clp);
>> +	return true;
>> +}
>> +
>>   __be32
>>   nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
>>   {
>> @@ -5328,7 +5392,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>>   			goto out;
>>   		}
>>   	} else {
>> +		rqstp->rq_conflict_client = NULL;
>>   		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
>> +		if (status == nfserr_jukebox && rqstp->rq_conflict_client) {
>> +			if (nfs4_destroy_courtesy_client(rqstp->rq_conflict_client))
>> +				status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
>> +		}
>> +
>>   		if (status) {
>>   			stp->st_stid.sc_type = NFS4_CLOSED_STID;
>>   			release_open_stateid(stp);
>> @@ -5562,6 +5632,47 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
>>   }
>>   #endif
>>   
>> +static
>> +bool nfs4_anylock_conflict(struct nfs4_client *clp)
>> +{
>> +	int i;
>> +	struct nfs4_stateowner *so, *tmp;
>> +	struct nfs4_lockowner *lo;
>> +	struct nfs4_ol_stateid *stp;
>> +	struct nfs4_file *nf;
>> +	struct inode *ino;
>> +	struct file_lock_context *ctx;
>> +	struct file_lock *fl;
>> +
>> +	for (i = 0; i < OWNER_HASH_SIZE; i++) {
>> +		/* scan each lock owner */
>> +		list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i],
>> +				so_strhash) {
>> +			if (so->so_is_open_owner)
>> +				continue;
>> +
>> +			/* scan lock states of this lock owner */
>> +			lo = lockowner(so);
>> +			list_for_each_entry(stp, &lo->lo_owner.so_stateids,
>> +					st_perstateowner) {
>> +				nf = stp->st_stid.sc_file;
>> +				ino = nf->fi_inode;
>> +				ctx = ino->i_flctx;
>> +				if (!ctx)
>> +					continue;
>> +				/* check each lock belongs to this lock state */
>> +				list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
>> +					if (fl->fl_owner != lo)
>> +						continue;
>> +					if (!list_empty(&fl->fl_blocked_requests))
>> +						return true;
>> +				}
>> +			}
>> +		}
>> +	}
>> +	return false;
>> +}
>> +
>>   static time64_t
>>   nfs4_laundromat(struct nfsd_net *nn)
>>   {
>> @@ -5577,7 +5688,9 @@ nfs4_laundromat(struct nfsd_net *nn)
>>   	};
>>   	struct nfs4_cpntf_state *cps;
>>   	copy_stateid_t *cps_t;
>> +	struct nfs4_stid *stid;
>>   	int i;
>> +	int id = 0;
>>   
>>   	if (clients_still_reclaiming(nn)) {
>>   		lt.new_timeo = 0;
>> @@ -5598,8 +5711,33 @@ nfs4_laundromat(struct nfsd_net *nn)
>>   	spin_lock(&nn->client_lock);
>>   	list_for_each_safe(pos, next, &nn->client_lru) {
>>   		clp = list_entry(pos, struct nfs4_client, cl_lru);
>> +		if (test_bit(NFSD4_DESTROY_COURTESY_CLIENT, &clp->cl_flags)) {
>> +			clear_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags);
>> +			goto exp_client;
>> +		}
>> +		if (test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags)) {
>> +			if (ktime_get_boottime_seconds() >= clp->courtesy_client_expiry)
>> +				goto exp_client;
>> +			/*
>> +			 * after umount, v4.0 client is still
>> +			 * around waiting to be expired
>> +			 */
>> +			if (clp->cl_minorversion)
>> +				continue;
>> +		}
>>   		if (!state_expired(&lt, clp->cl_time))
>>   			break;
>> +		spin_lock(&clp->cl_lock);
>> +		stid = idr_get_next(&clp->cl_stateids, &id);
>> +		spin_unlock(&clp->cl_lock);
>> +		if (stid && !nfs4_anylock_conflict(clp)) {
>> +			/* client still has states */
>> +			clp->courtesy_client_expiry =
>> +				ktime_get_boottime_seconds() + courtesy_client_expiry;
>> +			set_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags);
>> +			continue;
>> +		}
>> +exp_client:
>>   		if (mark_client_expired_locked(clp))
>>   			continue;
>>   		list_add(&clp->cl_lru, &reaplist);
>> @@ -5679,9 +5817,6 @@ nfs4_laundromat(struct nfsd_net *nn)
>>   	return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
>>   }
>>   
>> -static struct workqueue_struct *laundry_wq;
>> -static void laundromat_main(struct work_struct *);
>> -
>>   static void
>>   laundromat_main(struct work_struct *laundry)
>>   {
>> @@ -6486,6 +6621,19 @@ nfs4_transform_lock_offset(struct file_lock *lock)
>>   		lock->fl_end = OFFSET_MAX;
>>   }
>>   
>> +/* return true if lock was expired else return false */
>> +static bool
>> +nfsd4_fl_expire_lock(struct file_lock *fl, bool testonly)
>> +{
>> +	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
>> +	struct nfs4_client *clp = lo->lo_owner.so_client;
>> +
>> +	if (testonly)
>> +		return test_bit(NFSD4_COURTESY_CLIENT, &clp->cl_flags) ?
>> +			true : false;
>> +	return nfs4_destroy_courtesy_client(clp);
>> +}
>> +
>>   static fl_owner_t
>>   nfsd4_fl_get_owner(fl_owner_t owner)
>>   {
>> @@ -6533,6 +6681,7 @@ static const struct lock_manager_operations nfsd_posix_mng_ops  = {
>>   	.lm_notify = nfsd4_lm_notify,
>>   	.lm_get_owner = nfsd4_fl_get_owner,
>>   	.lm_put_owner = nfsd4_fl_put_owner,
>> +	.lm_expire_lock = nfsd4_fl_expire_lock,
>>   };
>>   
>>   static inline void
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index e73bdbb1634a..93e30b101578 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -345,6 +345,8 @@ struct nfs4_client {
>>   #define NFSD4_CLIENT_UPCALL_LOCK	(5)	/* upcall serialization */
>>   #define NFSD4_CLIENT_CB_FLAG_MASK	(1 << NFSD4_CLIENT_CB_UPDATE | \
>>   					 1 << NFSD4_CLIENT_CB_KILL)
>> +#define NFSD4_COURTESY_CLIENT		(6)	/* be nice to expired client */
>> +#define NFSD4_DESTROY_COURTESY_CLIENT	(7)
>>   	unsigned long		cl_flags;
>>   	const struct cred	*cl_cb_cred;
>>   	struct rpc_clnt		*cl_cb_client;
>> @@ -385,6 +387,7 @@ struct nfs4_client {
>>   	struct list_head	async_copies;	/* list of async copies */
>>   	spinlock_t		async_lock;	/* lock for async copies */
>>   	atomic_t		cl_cb_inflight;	/* Outstanding callbacks */
>> +	int			courtesy_client_expiry;
>>   };
>>   
>>   /* struct nfs4_client_reset
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index 064c96157d1f..349bf7bf20d2 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -306,6 +306,7 @@ struct svc_rqst {
>>   						 * net namespace
>>   						 */
>>   	void **			rq_lease_breaker; /* The v4 client breaking a lease */
>> +	void			*rq_conflict_client;
>>   };
>>   
>>   #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
>> -- 
>> 2.9.5

  reply	other threads:[~2021-09-23 17:09 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-16 18:22 [PATCH RFC v3 0/2] nfsd: Initial implementation of NFSv4 Courteous Server Dai Ngo
2021-09-16 18:22 ` [PATCH v3 1/3] fs/lock: add new callback, lm_expire_lock, to lock_manager_operations Dai Ngo
2021-09-16 18:22 ` [PATCH v3 2/3] nfsd: Initial implementation of NFSv4 Courteous Server Dai Ngo
2021-09-22 21:14   ` J. Bruce Fields
2021-09-22 22:16     ` dai.ngo
2021-09-23  1:18       ` J. Bruce Fields
2021-09-23 17:09         ` dai.ngo
2021-09-23  1:34   ` J. Bruce Fields
2021-09-23 17:09     ` dai.ngo [this message]
2021-09-23 19:32       ` J. Bruce Fields
2021-09-24 20:53         ` dai.ngo
2021-09-16 18:22 ` [PATCH v3 3/3] nfsd: back channel stuck in SEQ4_STATUS_CB_PATH_DOWN Dai Ngo
2021-09-16 19:00   ` Chuck Lever III
2021-09-16 19:55     ` Bruce Fields
2021-09-16 20:15       ` dai.ngo
2021-09-17 18:23         ` dai.ngo
2021-09-23  1:47 ` [PATCH RFC v3 0/2] nfsd: Initial implementation of NFSv4 Courteous Server J. Bruce Fields
2021-09-23 17:15   ` dai.ngo
2021-09-23 19:37     ` dai.ngo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9e33d9b7-5947-488d-343f-80c86a27fd84@oracle.com \
    --to=dai.ngo@oracle.com \
    --cc=bfields@fieldses.org \
    --cc=chuck.lever@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).