[PATCH] nfsd: Using guard() to simplify nfsd_cache

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
@ 2025-06-23 12:22 Su Hui
  2025-06-23 15:47 ` Dan Carpenter
  2025-06-24  0:19 ` NeilBrown
  0 siblings, 2 replies; 14+ messages in thread
From: Su Hui @ 2025-06-23 12:22 UTC (permalink / raw)
  To: chuck.lever, jlayton, neil, okorniev, Dai.Ngo, tom
  Cc: Su Hui, linux-nfs, linux-kernel, kernel-janitors

Using guard() to replace *unlock* label. guard() makes lock/unlock code
more clear. Change the order of the code to let all lock code in the
same scope. No functional changes.

Signed-off-by: Su Hui <suhui@nfschina.com>
---
 fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
 1 file changed, 48 insertions(+), 51 deletions(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ba9d326b3de6..2d92adf3e6b0 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
 
 	if (type == RC_NOCACHE) {
 		nfsd_stats_rc_nocache_inc(nn);
-		goto out;
+		return rtn;
 	}
 
 	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
@@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
 	 */
 	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
 	if (!rp)
-		goto out;
+		return rtn;
 
 	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
-	spin_lock(&b->cache_lock);
-	found = nfsd_cache_insert(b, rp, nn);
-	if (found != rp)
-		goto found_entry;
-	*cacherep = rp;
-	rp->c_state = RC_INPROG;
-	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
-	spin_unlock(&b->cache_lock);
+	scoped_guard(spinlock, &b->cache_lock) {
+		found = nfsd_cache_insert(b, rp, nn);
+		if (found == rp) {
+			*cacherep = rp;
+			rp->c_state = RC_INPROG;
+			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+			goto out;
+		}
+		/* We found a matching entry which is either in progress or done. */
+		nfsd_reply_cache_free_locked(NULL, rp, nn);
+		nfsd_stats_rc_hits_inc(nn);
+		rtn = RC_DROPIT;
+		rp = found;
+
+		/* Request being processed */
+		if (rp->c_state == RC_INPROG)
+			goto out_trace;
+
+		/* From the hall of fame of impractical attacks:
+		 * Is this a user who tries to snoop on the cache?
+		 */
+		rtn = RC_DOIT;
+		if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
+			goto out_trace;
 
+		/* Compose RPC reply header */
+		switch (rp->c_type) {
+		case RC_NOCACHE:
+			break;
+		case RC_REPLSTAT:
+			xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
+			rtn = RC_REPLY;
+			break;
+		case RC_REPLBUFF:
+			if (!nfsd_cache_append(rqstp, &rp->c_replvec))
+				return rtn; /* should not happen */
+			rtn = RC_REPLY;
+			break;
+		default:
+			WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
+		}
+
+out_trace:
+		trace_nfsd_drc_found(nn, rqstp, rtn);
+		return rtn;
+	}
+out:
 	nfsd_cacherep_dispose(&dispose);
 
 	nfsd_stats_rc_misses_inc(nn);
 	atomic_inc(&nn->num_drc_entries);
 	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
-	goto out;
-
-found_entry:
-	/* We found a matching entry which is either in progress or done. */
-	nfsd_reply_cache_free_locked(NULL, rp, nn);
-	nfsd_stats_rc_hits_inc(nn);
-	rtn = RC_DROPIT;
-	rp = found;
-
-	/* Request being processed */
-	if (rp->c_state == RC_INPROG)
-		goto out_trace;
-
-	/* From the hall of fame of impractical attacks:
-	 * Is this a user who tries to snoop on the cache? */
-	rtn = RC_DOIT;
-	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
-		goto out_trace;
-
-	/* Compose RPC reply header */
-	switch (rp->c_type) {
-	case RC_NOCACHE:
-		break;
-	case RC_REPLSTAT:
-		xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
-		rtn = RC_REPLY;
-		break;
-	case RC_REPLBUFF:
-		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
-			goto out_unlock; /* should not happen */
-		rtn = RC_REPLY;
-		break;
-	default:
-		WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
-	}
-
-out_trace:
-	trace_nfsd_drc_found(nn, rqstp, rtn);
-out_unlock:
-	spin_unlock(&b->cache_lock);
-out:
 	return rtn;
 }
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-23 12:22 [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup() Su Hui
@ 2025-06-23 15:47 ` Dan Carpenter
  2025-06-24  1:45   ` Su Hui
  2025-06-24  0:19 ` NeilBrown
  1 sibling, 1 reply; 14+ messages in thread
From: Dan Carpenter @ 2025-06-23 15:47 UTC (permalink / raw)
  To: Su Hui
  Cc: chuck.lever, jlayton, neil, okorniev, Dai.Ngo, tom, linux-nfs,
	linux-kernel, kernel-janitors

On Mon, Jun 23, 2025 at 08:22:27PM +0800, Su Hui wrote:
> Using guard() to replace *unlock* label. guard() makes lock/unlock code
> more clear. Change the order of the code to let all lock code in the
> same scope. No functional changes.
> 
> Signed-off-by: Su Hui <suhui@nfschina.com>
> ---
>  fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
>  1 file changed, 48 insertions(+), 51 deletions(-)
> 
> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
> index ba9d326b3de6..2d92adf3e6b0 100644
> --- a/fs/nfsd/nfscache.c
> +++ b/fs/nfsd/nfscache.c
> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>  
>  	if (type == RC_NOCACHE) {
>  		nfsd_stats_rc_nocache_inc(nn);
> -		goto out;
> +		return rtn;
>  	}
>  
>  	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>  	 */
>  	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
>  	if (!rp)
> -		goto out;
> +		return rtn;
>  
>  	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
> -	spin_lock(&b->cache_lock);
> -	found = nfsd_cache_insert(b, rp, nn);
> -	if (found != rp)
> -		goto found_entry;
> -	*cacherep = rp;
> -	rp->c_state = RC_INPROG;
> -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> -	spin_unlock(&b->cache_lock);
> +	scoped_guard(spinlock, &b->cache_lock) {
> +		found = nfsd_cache_insert(b, rp, nn);
> +		if (found == rp) {
> +			*cacherep = rp;
> +			rp->c_state = RC_INPROG;
> +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> +			goto out;

It took me a while to figure out why we've added a goto here.  In the
original code this "goto out;" was a "spin_unlock(&b->cache_lock);".
The spin_unlock() is more readable because you can immediately see that
it's trying to drop the lock where a "goto out;" is less obvious about
the intention.

I think this patch works fine, but I'm not sure it's an improvement.

regards,
dan carpenter

> +		}
> +		/* We found a matching entry which is either in progress or done. */
> +		nfsd_reply_cache_free_locked(NULL, rp, nn);
> +		nfsd_stats_rc_hits_inc(nn);
> +		rtn = RC_DROPIT;
> +		rp = found;
> +
> +		/* Request being processed */
> +		if (rp->c_state == RC_INPROG)
> +			goto out_trace;
> +
> +		/* From the hall of fame of impractical attacks:
> +		 * Is this a user who tries to snoop on the cache?
> +		 */
> +		rtn = RC_DOIT;
> +		if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
> +			goto out_trace;
>  
> +		/* Compose RPC reply header */
> +		switch (rp->c_type) {
> +		case RC_NOCACHE:
> +			break;
> +		case RC_REPLSTAT:
> +			xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
> +			rtn = RC_REPLY;
> +			break;
> +		case RC_REPLBUFF:
> +			if (!nfsd_cache_append(rqstp, &rp->c_replvec))
> +				return rtn; /* should not happen */
> +			rtn = RC_REPLY;
> +			break;
> +		default:
> +			WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
> +		}
> +
> +out_trace:
> +		trace_nfsd_drc_found(nn, rqstp, rtn);
> +		return rtn;
> +	}
> +out:
>  	nfsd_cacherep_dispose(&dispose);
>  
>  	nfsd_stats_rc_misses_inc(nn);
>  	atomic_inc(&nn->num_drc_entries);
>  	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
> -	goto out;
> -
> -found_entry:
> -	/* We found a matching entry which is either in progress or done. */
> -	nfsd_reply_cache_free_locked(NULL, rp, nn);
> -	nfsd_stats_rc_hits_inc(nn);
> -	rtn = RC_DROPIT;
> -	rp = found;
> -
> -	/* Request being processed */
> -	if (rp->c_state == RC_INPROG)
> -		goto out_trace;
> -
> -	/* From the hall of fame of impractical attacks:
> -	 * Is this a user who tries to snoop on the cache? */
> -	rtn = RC_DOIT;
> -	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
> -		goto out_trace;
> -
> -	/* Compose RPC reply header */
> -	switch (rp->c_type) {
> -	case RC_NOCACHE:
> -		break;
> -	case RC_REPLSTAT:
> -		xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
> -		rtn = RC_REPLY;
> -		break;
> -	case RC_REPLBUFF:
> -		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
> -			goto out_unlock; /* should not happen */
> -		rtn = RC_REPLY;
> -		break;
> -	default:
> -		WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
> -	}
> -
> -out_trace:
> -	trace_nfsd_drc_found(nn, rqstp, rtn);
> -out_unlock:
> -	spin_unlock(&b->cache_lock);
> -out:
>  	return rtn;
>  }
>  
> -- 
> 2.30.2
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-23 12:22 [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup() Su Hui
  2025-06-23 15:47 ` Dan Carpenter
@ 2025-06-24  0:19 ` NeilBrown
  2025-06-24  1:31   ` Su Hui
  2025-06-24 15:07   ` Chuck Lever
  1 sibling, 2 replies; 14+ messages in thread
From: NeilBrown @ 2025-06-24  0:19 UTC (permalink / raw)
  To: Su Hui
  Cc: chuck.lever, jlayton, okorniev, Dai.Ngo, tom, Su Hui, linux-nfs,
	linux-kernel, kernel-janitors

On Mon, 23 Jun 2025, Su Hui wrote:
> Using guard() to replace *unlock* label. guard() makes lock/unlock code
> more clear. Change the order of the code to let all lock code in the
> same scope. No functional changes.

While I agree that this code could usefully be cleaned up and that you
have made some improvements, I think the use of guard() is a nearly
insignificant part of the change.  You could easily do exactly the same
patch without using guard() but having and explicit spin_unlock() before
the new return.  That doesn't mean you shouldn't use guard(), but it
does mean that the comment explaining the change could be more usefully
focused on the "Change the order ..." part, and maybe explain what that
is important.

I actually think there is room for other changes which would make the
code even better:
- Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
  take the lock when needed, then drop it, then call
  nfsd_cacherep_dispose() - and return the count.
- change nfsd_cache_insert to also skip updating the chain length stats
  when it finds a match - in that case the "entries" isn't a chain
  length. So just  lru_put_end(), return.  Have it return NULL if
  no match was found
- after the found_entry label don't use nfsd_reply_cache_free_locked(),
  just free rp.  It has never been included in any rbtree or list, so it
  doesn't need to be removed.
- I'd be tempted to have nfsd_cache_insert() take the spinlock itself
  and call it under rcu_read_lock() - and use RCU to free the cached
  items. 
- put the chunk of code after the found_entry label into a separate
  function and instead just return RC_REPLY (and maybe rename that
  RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
  that function that has the found_entry code.

I think that would make the code a lot easier to follow.  Would you like
to have a go at that - I suspect it would be several patches - or shall
I do it?

Thanks,
NeilBrown



> 
> Signed-off-by: Su Hui <suhui@nfschina.com>
> ---
>  fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
>  1 file changed, 48 insertions(+), 51 deletions(-)
> 
> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
> index ba9d326b3de6..2d92adf3e6b0 100644
> --- a/fs/nfsd/nfscache.c
> +++ b/fs/nfsd/nfscache.c
> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>  
>  	if (type == RC_NOCACHE) {
>  		nfsd_stats_rc_nocache_inc(nn);
> -		goto out;
> +		return rtn;
>  	}
>  
>  	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>  	 */
>  	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
>  	if (!rp)
> -		goto out;
> +		return rtn;
>  
>  	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
> -	spin_lock(&b->cache_lock);
> -	found = nfsd_cache_insert(b, rp, nn);
> -	if (found != rp)
> -		goto found_entry;
> -	*cacherep = rp;
> -	rp->c_state = RC_INPROG;
> -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> -	spin_unlock(&b->cache_lock);
> +	scoped_guard(spinlock, &b->cache_lock) {
> +		found = nfsd_cache_insert(b, rp, nn);
> +		if (found == rp) {
> +			*cacherep = rp;
> +			rp->c_state = RC_INPROG;
> +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> +			goto out;
> +		}
> +		/* We found a matching entry which is either in progress or done. */
> +		nfsd_reply_cache_free_locked(NULL, rp, nn);
> +		nfsd_stats_rc_hits_inc(nn);
> +		rtn = RC_DROPIT;
> +		rp = found;
> +
> +		/* Request being processed */
> +		if (rp->c_state == RC_INPROG)
> +			goto out_trace;
> +
> +		/* From the hall of fame of impractical attacks:
> +		 * Is this a user who tries to snoop on the cache?
> +		 */
> +		rtn = RC_DOIT;
> +		if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
> +			goto out_trace;
>  
> +		/* Compose RPC reply header */
> +		switch (rp->c_type) {
> +		case RC_NOCACHE:
> +			break;
> +		case RC_REPLSTAT:
> +			xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
> +			rtn = RC_REPLY;
> +			break;
> +		case RC_REPLBUFF:
> +			if (!nfsd_cache_append(rqstp, &rp->c_replvec))
> +				return rtn; /* should not happen */
> +			rtn = RC_REPLY;
> +			break;
> +		default:
> +			WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
> +		}
> +
> +out_trace:
> +		trace_nfsd_drc_found(nn, rqstp, rtn);
> +		return rtn;
> +	}
> +out:
>  	nfsd_cacherep_dispose(&dispose);
>  
>  	nfsd_stats_rc_misses_inc(nn);
>  	atomic_inc(&nn->num_drc_entries);
>  	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
> -	goto out;
> -
> -found_entry:
> -	/* We found a matching entry which is either in progress or done. */
> -	nfsd_reply_cache_free_locked(NULL, rp, nn);
> -	nfsd_stats_rc_hits_inc(nn);
> -	rtn = RC_DROPIT;
> -	rp = found;
> -
> -	/* Request being processed */
> -	if (rp->c_state == RC_INPROG)
> -		goto out_trace;
> -
> -	/* From the hall of fame of impractical attacks:
> -	 * Is this a user who tries to snoop on the cache? */
> -	rtn = RC_DOIT;
> -	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
> -		goto out_trace;
> -
> -	/* Compose RPC reply header */
> -	switch (rp->c_type) {
> -	case RC_NOCACHE:
> -		break;
> -	case RC_REPLSTAT:
> -		xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
> -		rtn = RC_REPLY;
> -		break;
> -	case RC_REPLBUFF:
> -		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
> -			goto out_unlock; /* should not happen */
> -		rtn = RC_REPLY;
> -		break;
> -	default:
> -		WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
> -	}
> -
> -out_trace:
> -	trace_nfsd_drc_found(nn, rqstp, rtn);
> -out_unlock:
> -	spin_unlock(&b->cache_lock);
> -out:
>  	return rtn;
>  }
>  
> -- 
> 2.30.2
> 
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24  0:19 ` NeilBrown
@ 2025-06-24  1:31   ` Su Hui
  2025-06-24 13:41     ` Chuck Lever
  2025-06-24 15:07   ` Chuck Lever
  1 sibling, 1 reply; 14+ messages in thread
From: Su Hui @ 2025-06-24  1:31 UTC (permalink / raw)
  To: NeilBrown
  Cc: chuck.lever, jlayton, okorniev, Dai.Ngo, tom, linux-nfs,
	linux-kernel, kernel-janitors, Su Hui

On 2025/6/24 08:19, NeilBrown wrote:
> On Mon, 23 Jun 2025, Su Hui wrote:
>> Using guard() to replace *unlock* label. guard() makes lock/unlock code
>> more clear. Change the order of the code to let all lock code in the
>> same scope. No functional changes.
> While I agree that this code could usefully be cleaned up and that you
> have made some improvements, I think the use of guard() is a nearly
> insignificant part of the change.  You could easily do exactly the same
> patch without using guard() but having and explicit spin_unlock() before
> the new return.  That doesn't mean you shouldn't use guard(), but it
> does mean that the comment explaining the change could be more usefully
> focused on the "Change the order ..." part, and maybe explain what that
> is important.
Got it. I will focus on "Change the order ..." part in the next v2 patch.
> I actually think there is room for other changes which would make the
> code even better:
> - Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
>    take the lock when needed, then drop it, then call
>    nfsd_cacherep_dispose() - and return the count.
> - change nfsd_cache_insert to also skip updating the chain length stats
>    when it finds a match - in that case the "entries" isn't a chain
>    length. So just  lru_put_end(), return.  Have it return NULL if
>    no match was found
> - after the found_entry label don't use nfsd_reply_cache_free_locked(),
>    just free rp.  It has never been included in any rbtree or list, so it
>    doesn't need to be removed.
> - I'd be tempted to have nfsd_cache_insert() take the spinlock itself
>    and call it under rcu_read_lock() - and use RCU to free the cached
>    items.
> - put the chunk of code after the found_entry label into a separate
>    function and instead just return RC_REPLY (and maybe rename that
>    RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
>    that function that has the found_entry code.
>
> I think that would make the code a lot easier to follow.  Would you like
> to have a go at that - I suspect it would be several patches - or shall
> I do it?
>
> Thanks,
> NeilBrown
>
Really thanks for your suggestions!
Yes, I'd like to do it in the next v2 patchset as soon as possible.
I'm always searching some things I can participate in about linux kernel
community, so it's happy for me to do this thing.

regards,
Su Hui

>
>> Signed-off-by: Su Hui <suhui@nfschina.com>
>> ---
>>   fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
>>   1 file changed, 48 insertions(+), 51 deletions(-)
>>
>> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
>> index ba9d326b3de6..2d92adf3e6b0 100644
>> --- a/fs/nfsd/nfscache.c
>> +++ b/fs/nfsd/nfscache.c
>> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>>   
>>   	if (type == RC_NOCACHE) {
>>   		nfsd_stats_rc_nocache_inc(nn);
>> -		goto out;
>> +		return rtn;
>>   	}
>>   
>>   	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
>> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>>   	 */
>>   	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
>>   	if (!rp)
>> -		goto out;
>> +		return rtn;
>>   
>>   	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
>> -	spin_lock(&b->cache_lock);
>> -	found = nfsd_cache_insert(b, rp, nn);
>> -	if (found != rp)
>> -		goto found_entry;
>> -	*cacherep = rp;
>> -	rp->c_state = RC_INPROG;
>> -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>> -	spin_unlock(&b->cache_lock);
>> +	scoped_guard(spinlock, &b->cache_lock) {
>> +		found = nfsd_cache_insert(b, rp, nn);
>> +		if (found == rp) {
>> +			*cacherep = rp;
>> +			rp->c_state = RC_INPROG;
>> +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>> +			goto out;
>> +		}
>> +		/* We found a matching entry which is either in progress or done. */
>> +		nfsd_reply_cache_free_locked(NULL, rp, nn);
>> +		nfsd_stats_rc_hits_inc(nn);
>> +		rtn = RC_DROPIT;
>> +		rp = found;
>> +
>> +		/* Request being processed */
>> +		if (rp->c_state == RC_INPROG)
>> +			goto out_trace;
>> +
>> +		/* From the hall of fame of impractical attacks:
>> +		 * Is this a user who tries to snoop on the cache?
>> +		 */
>> +		rtn = RC_DOIT;
>> +		if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
>> +			goto out_trace;
>>   
>> +		/* Compose RPC reply header */
>> +		switch (rp->c_type) {
>> +		case RC_NOCACHE:
>> +			break;
>> +		case RC_REPLSTAT:
>> +			xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
>> +			rtn = RC_REPLY;
>> +			break;
>> +		case RC_REPLBUFF:
>> +			if (!nfsd_cache_append(rqstp, &rp->c_replvec))
>> +				return rtn; /* should not happen */
>> +			rtn = RC_REPLY;
>> +			break;
>> +		default:
>> +			WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
>> +		}
>> +
>> +out_trace:
>> +		trace_nfsd_drc_found(nn, rqstp, rtn);
>> +		return rtn;
>> +	}
>> +out:
>>   	nfsd_cacherep_dispose(&dispose);
>>   
>>   	nfsd_stats_rc_misses_inc(nn);
>>   	atomic_inc(&nn->num_drc_entries);
>>   	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
>> -	goto out;
>> -
>> -found_entry:
>> -	/* We found a matching entry which is either in progress or done. */
>> -	nfsd_reply_cache_free_locked(NULL, rp, nn);
>> -	nfsd_stats_rc_hits_inc(nn);
>> -	rtn = RC_DROPIT;
>> -	rp = found;
>> -
>> -	/* Request being processed */
>> -	if (rp->c_state == RC_INPROG)
>> -		goto out_trace;
>> -
>> -	/* From the hall of fame of impractical attacks:
>> -	 * Is this a user who tries to snoop on the cache? */
>> -	rtn = RC_DOIT;
>> -	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
>> -		goto out_trace;
>> -
>> -	/* Compose RPC reply header */
>> -	switch (rp->c_type) {
>> -	case RC_NOCACHE:
>> -		break;
>> -	case RC_REPLSTAT:
>> -		xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
>> -		rtn = RC_REPLY;
>> -		break;
>> -	case RC_REPLBUFF:
>> -		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
>> -			goto out_unlock; /* should not happen */
>> -		rtn = RC_REPLY;
>> -		break;
>> -	default:
>> -		WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
>> -	}
>> -
>> -out_trace:
>> -	trace_nfsd_drc_found(nn, rqstp, rtn);
>> -out_unlock:
>> -	spin_unlock(&b->cache_lock);
>> -out:
>>   	return rtn;
>>   }
>>   
>> -- 
>> 2.30.2
>>
>>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-23 15:47 ` Dan Carpenter
@ 2025-06-24  1:45   ` Su Hui
  2025-06-24 14:49     ` Dan Carpenter
  0 siblings, 1 reply; 14+ messages in thread
From: Su Hui @ 2025-06-24  1:45 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: chuck.lever, jlayton, neil, okorniev, Dai.Ngo, tom, linux-nfs,
	linux-kernel, kernel-janitors, Su Hui

On 2025/6/23 23:47, Dan Carpenter wrote:
> On Mon, Jun 23, 2025 at 08:22:27PM +0800, Su Hui wrote:
>> Using guard() to replace *unlock* label. guard() makes lock/unlock code
>> more clear. Change the order of the code to let all lock code in the
>> same scope. No functional changes.
>>
>> Signed-off-by: Su Hui <suhui@nfschina.com>
>> ---
>>   fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
>>   1 file changed, 48 insertions(+), 51 deletions(-)
>>
>> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
>> index ba9d326b3de6..2d92adf3e6b0 100644
>> --- a/fs/nfsd/nfscache.c
>> +++ b/fs/nfsd/nfscache.c
>> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>>   
>>   	if (type == RC_NOCACHE) {
>>   		nfsd_stats_rc_nocache_inc(nn);
>> -		goto out;
>> +		return rtn;
>>   	}
>>   
>>   	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
>> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>>   	 */
>>   	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
>>   	if (!rp)
>> -		goto out;
>> +		return rtn;
>>   
>>   	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
>> -	spin_lock(&b->cache_lock);
>> -	found = nfsd_cache_insert(b, rp, nn);
>> -	if (found != rp)
>> -		goto found_entry;
>> -	*cacherep = rp;
>> -	rp->c_state = RC_INPROG;
>> -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>> -	spin_unlock(&b->cache_lock);
>> +	scoped_guard(spinlock, &b->cache_lock) {
>> +		found = nfsd_cache_insert(b, rp, nn);
>> +		if (found == rp) {
>> +			*cacherep = rp;
>> +			rp->c_state = RC_INPROG;
>> +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>> +			goto out;
> It took me a while to figure out why we've added a goto here.  In the
> original code this "goto out;" was a "spin_unlock(&b->cache_lock);".
> The spin_unlock() is more readable because you can immediately see that
> it's trying to drop the lock where a "goto out;" is less obvious about
> the intention.

Does "break;" be better in this place?  Meaning Break this lock guard scope.

But as NeillBrown suggestion[1], this patch will be replaced by several 
patches.

No matter what, this "goto out;" will be removed in the next v2 patchset.

> I think this patch works fine, but I'm not sure it's an improvement.

Got it, thanks for your suggestions!

[1] 
https://lore.kernel.org/all/175072435698.2280845.12079422273351211469@noble.neil.brown.name/

regards,
Su Hui

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24  1:31   ` Su Hui
@ 2025-06-24 13:41     ` Chuck Lever
  2025-06-24 14:21       ` Su Hui
  0 siblings, 1 reply; 14+ messages in thread
From: Chuck Lever @ 2025-06-24 13:41 UTC (permalink / raw)
  To: Su Hui, NeilBrown
  Cc: jlayton, okorniev, Dai.Ngo, tom, linux-nfs, linux-kernel,
	kernel-janitors

On 6/23/25 9:31 PM, Su Hui wrote:
> On 2025/6/24 08:19, NeilBrown wrote:
>> On Mon, 23 Jun 2025, Su Hui wrote:
>>> Using guard() to replace *unlock* label. guard() makes lock/unlock code
>>> more clear. Change the order of the code to let all lock code in the
>>> same scope. No functional changes.
>> While I agree that this code could usefully be cleaned up and that you
>> have made some improvements, I think the use of guard() is a nearly
>> insignificant part of the change.  You could easily do exactly the same
>> patch without using guard() but having and explicit spin_unlock() before
>> the new return.  That doesn't mean you shouldn't use guard(), but it
>> does mean that the comment explaining the change could be more usefully
>> focused on the "Change the order ..." part, and maybe explain what that
>> is important.
> Got it. I will focus on "Change the order ..." part in the next v2 patch.
>> I actually think there is room for other changes which would make the
>> code even better:
>> - Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
>>    take the lock when needed, then drop it, then call
>>    nfsd_cacherep_dispose() - and return the count.
>> - change nfsd_cache_insert to also skip updating the chain length stats
>>    when it finds a match - in that case the "entries" isn't a chain
>>    length. So just  lru_put_end(), return.  Have it return NULL if
>>    no match was found
>> - after the found_entry label don't use nfsd_reply_cache_free_locked(),
>>    just free rp.  It has never been included in any rbtree or list, so it
>>    doesn't need to be removed.
>> - I'd be tempted to have nfsd_cache_insert() take the spinlock itself
>>    and call it under rcu_read_lock() - and use RCU to free the cached
>>    items.
>> - put the chunk of code after the found_entry label into a separate
>>    function and instead just return RC_REPLY (and maybe rename that
>>    RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
>>    that function that has the found_entry code.
>>
>> I think that would make the code a lot easier to follow.  Would you like
>> to have a go at that - I suspect it would be several patches - or shall
>> I do it?
>>
>> Thanks,
>> NeilBrown
>>
> Really thanks for your suggestions!
> Yes, I'd like to do it in the next v2 patchset as soon as possible.
> I'm always searching some things I can participate in about linux kernel
> community, so it's happy for me to do this thing.

Hi Su -

Split the individual changes Neil suggested into separate patches. That
makes the changes easier to review.


>>> Signed-off-by: Su Hui <suhui@nfschina.com>
>>> ---
>>>   fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
>>>   1 file changed, 48 insertions(+), 51 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
>>> index ba9d326b3de6..2d92adf3e6b0 100644
>>> --- a/fs/nfsd/nfscache.c
>>> +++ b/fs/nfsd/nfscache.c
>>> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp,
>>> unsigned int start,
>>>         if (type == RC_NOCACHE) {
>>>           nfsd_stats_rc_nocache_inc(nn);
>>> -        goto out;
>>> +        return rtn;
>>>       }
>>>         csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
>>> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp,
>>> unsigned int start,
>>>        */
>>>       rp = nfsd_cacherep_alloc(rqstp, csum, nn);
>>>       if (!rp)
>>> -        goto out;
>>> +        return rtn;
>>>         b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
>>> -    spin_lock(&b->cache_lock);
>>> -    found = nfsd_cache_insert(b, rp, nn);
>>> -    if (found != rp)
>>> -        goto found_entry;
>>> -    *cacherep = rp;
>>> -    rp->c_state = RC_INPROG;
>>> -    nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>>> -    spin_unlock(&b->cache_lock);
>>> +    scoped_guard(spinlock, &b->cache_lock) {
>>> +        found = nfsd_cache_insert(b, rp, nn);
>>> +        if (found == rp) {
>>> +            *cacherep = rp;
>>> +            rp->c_state = RC_INPROG;
>>> +            nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>>> +            goto out;
>>> +        }
>>> +        /* We found a matching entry which is either in progress or
>>> done. */
>>> +        nfsd_reply_cache_free_locked(NULL, rp, nn);
>>> +        nfsd_stats_rc_hits_inc(nn);
>>> +        rtn = RC_DROPIT;
>>> +        rp = found;
>>> +
>>> +        /* Request being processed */
>>> +        if (rp->c_state == RC_INPROG)
>>> +            goto out_trace;
>>> +
>>> +        /* From the hall of fame of impractical attacks:
>>> +         * Is this a user who tries to snoop on the cache?
>>> +         */
>>> +        rtn = RC_DOIT;
>>> +        if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
>>> +            goto out_trace;
>>>   +        /* Compose RPC reply header */
>>> +        switch (rp->c_type) {
>>> +        case RC_NOCACHE:
>>> +            break;
>>> +        case RC_REPLSTAT:
>>> +            xdr_stream_encode_be32(&rqstp->rq_res_stream, rp-
>>> >c_replstat);
>>> +            rtn = RC_REPLY;
>>> +            break;
>>> +        case RC_REPLBUFF:
>>> +            if (!nfsd_cache_append(rqstp, &rp->c_replvec))
>>> +                return rtn; /* should not happen */
>>> +            rtn = RC_REPLY;
>>> +            break;
>>> +        default:
>>> +            WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
>>> +        }
>>> +
>>> +out_trace:
>>> +        trace_nfsd_drc_found(nn, rqstp, rtn);
>>> +        return rtn;
>>> +    }
>>> +out:
>>>       nfsd_cacherep_dispose(&dispose);
>>>         nfsd_stats_rc_misses_inc(nn);
>>>       atomic_inc(&nn->num_drc_entries);
>>>       nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
>>> -    goto out;
>>> -
>>> -found_entry:
>>> -    /* We found a matching entry which is either in progress or
>>> done. */
>>> -    nfsd_reply_cache_free_locked(NULL, rp, nn);
>>> -    nfsd_stats_rc_hits_inc(nn);
>>> -    rtn = RC_DROPIT;
>>> -    rp = found;
>>> -
>>> -    /* Request being processed */
>>> -    if (rp->c_state == RC_INPROG)
>>> -        goto out_trace;
>>> -
>>> -    /* From the hall of fame of impractical attacks:
>>> -     * Is this a user who tries to snoop on the cache? */
>>> -    rtn = RC_DOIT;
>>> -    if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
>>> -        goto out_trace;
>>> -
>>> -    /* Compose RPC reply header */
>>> -    switch (rp->c_type) {
>>> -    case RC_NOCACHE:
>>> -        break;
>>> -    case RC_REPLSTAT:
>>> -        xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
>>> -        rtn = RC_REPLY;
>>> -        break;
>>> -    case RC_REPLBUFF:
>>> -        if (!nfsd_cache_append(rqstp, &rp->c_replvec))
>>> -            goto out_unlock; /* should not happen */
>>> -        rtn = RC_REPLY;
>>> -        break;
>>> -    default:
>>> -        WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
>>> -    }
>>> -
>>> -out_trace:
>>> -    trace_nfsd_drc_found(nn, rqstp, rtn);
>>> -out_unlock:
>>> -    spin_unlock(&b->cache_lock);
>>> -out:
>>>       return rtn;
>>>   }
>>>   -- 
>>> 2.30.2
>>>
>>>


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24 13:41     ` Chuck Lever
@ 2025-06-24 14:21       ` Su Hui
  0 siblings, 0 replies; 14+ messages in thread
From: Su Hui @ 2025-06-24 14:21 UTC (permalink / raw)
  To: Chuck Lever, NeilBrown
  Cc: jlayton, okorniev, Dai.Ngo, tom, linux-nfs, linux-kernel,
	kernel-janitors, Su Hui

On 2025/6/24 21:41, Chuck Lever wrote:
> On 6/23/25 9:31 PM, Su Hui wrote:
>> On 2025/6/24 08:19, NeilBrown wrote:
>>> On Mon, 23 Jun 2025, Su Hui wrote:
>>>> Using guard() to replace *unlock* label. guard() makes lock/unlock code
>>>> more clear. Change the order of the code to let all lock code in the
>>>> same scope. No functional changes.
>>> While I agree that this code could usefully be cleaned up and that you
>>> have made some improvements, I think the use of guard() is a nearly
>>> insignificant part of the change.  You could easily do exactly the same
>>> patch without using guard() but having and explicit spin_unlock() before
>>> the new return.  That doesn't mean you shouldn't use guard(), but it
>>> does mean that the comment explaining the change could be more usefully
>>> focused on the "Change the order ..." part, and maybe explain what that
>>> is important.
>> Got it. I will focus on "Change the order ..." part in the next v2 patch.
>>> I actually think there is room for other changes which would make the
>>> code even better:
>>> - Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
>>>     take the lock when needed, then drop it, then call
>>>     nfsd_cacherep_dispose() - and return the count.
>>> - change nfsd_cache_insert to also skip updating the chain length stats
>>>     when it finds a match - in that case the "entries" isn't a chain
>>>     length. So just  lru_put_end(), return.  Have it return NULL if
>>>     no match was found
>>> - after the found_entry label don't use nfsd_reply_cache_free_locked(),
>>>     just free rp.  It has never been included in any rbtree or list, so it
>>>     doesn't need to be removed.
>>> - I'd be tempted to have nfsd_cache_insert() take the spinlock itself
>>>     and call it under rcu_read_lock() - and use RCU to free the cached
>>>     items.
>>> - put the chunk of code after the found_entry label into a separate
>>>     function and instead just return RC_REPLY (and maybe rename that
>>>     RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
>>>     that function that has the found_entry code.
>>>
>>> I think that would make the code a lot easier to follow.  Would you like
>>> to have a go at that - I suspect it would be several patches - or shall
>>> I do it?
>>>
>>> Thanks,
>>> NeilBrown
>>>
>> Really thanks for your suggestions!
>> Yes, I'd like to do it in the next v2 patchset as soon as possible.
>> I'm always searching some things I can participate in about linux kernel
>> community, so it's happy for me to do this thing.
> Hi Su -
>
> Split the individual changes Neil suggested into separate patches. That
> makes the changes easier to review.

Hi,

Thanks for your remind. I will split these individual changes.
It won't be too long, at the latest this week I will submit this patchset.

Regards,
Su Hui



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24  1:45   ` Su Hui
@ 2025-06-24 14:49     ` Dan Carpenter
  0 siblings, 0 replies; 14+ messages in thread
From: Dan Carpenter @ 2025-06-24 14:49 UTC (permalink / raw)
  To: Su Hui
  Cc: chuck.lever, jlayton, neil, okorniev, Dai.Ngo, tom, linux-nfs,
	linux-kernel, kernel-janitors

On Tue, Jun 24, 2025 at 09:45:27AM +0800, Su Hui wrote:
> On 2025/6/23 23:47, Dan Carpenter wrote:
> > On Mon, Jun 23, 2025 at 08:22:27PM +0800, Su Hui wrote:
> > > Using guard() to replace *unlock* label. guard() makes lock/unlock code
> > > more clear. Change the order of the code to let all lock code in the
> > > same scope. No functional changes.
> > > 
> > > Signed-off-by: Su Hui <suhui@nfschina.com>
> > > ---
> > >   fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
> > >   1 file changed, 48 insertions(+), 51 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
> > > index ba9d326b3de6..2d92adf3e6b0 100644
> > > --- a/fs/nfsd/nfscache.c
> > > +++ b/fs/nfsd/nfscache.c
> > > @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
> > >   	if (type == RC_NOCACHE) {
> > >   		nfsd_stats_rc_nocache_inc(nn);
> > > -		goto out;
> > > +		return rtn;
> > >   	}
> > >   	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
> > > @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
> > >   	 */
> > >   	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
> > >   	if (!rp)
> > > -		goto out;
> > > +		return rtn;
> > >   	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
> > > -	spin_lock(&b->cache_lock);
> > > -	found = nfsd_cache_insert(b, rp, nn);
> > > -	if (found != rp)
> > > -		goto found_entry;
> > > -	*cacherep = rp;
> > > -	rp->c_state = RC_INPROG;
> > > -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> > > -	spin_unlock(&b->cache_lock);
> > > +	scoped_guard(spinlock, &b->cache_lock) {
> > > +		found = nfsd_cache_insert(b, rp, nn);
> > > +		if (found == rp) {
> > > +			*cacherep = rp;
> > > +			rp->c_state = RC_INPROG;
> > > +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> > > +			goto out;
> > It took me a while to figure out why we've added a goto here.  In the
> > original code this "goto out;" was a "spin_unlock(&b->cache_lock);".
> > The spin_unlock() is more readable because you can immediately see that
> > it's trying to drop the lock where a "goto out;" is less obvious about
> > the intention.
> 
> Does "break;" be better in this place?  Meaning Break this lock guard scope.
> 

Yeah, probably break is better.

regards,
dan carpenter


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24  0:19 ` NeilBrown
  2025-06-24  1:31   ` Su Hui
@ 2025-06-24 15:07   ` Chuck Lever
  2025-06-24 22:15     ` NeilBrown
  1 sibling, 1 reply; 14+ messages in thread
From: Chuck Lever @ 2025-06-24 15:07 UTC (permalink / raw)
  To: NeilBrown, Su Hui
  Cc: jlayton, okorniev, Dai.Ngo, tom, linux-nfs, linux-kernel,
	kernel-janitors

On 6/23/25 8:19 PM, NeilBrown wrote:
> On Mon, 23 Jun 2025, Su Hui wrote:
>> Using guard() to replace *unlock* label. guard() makes lock/unlock code
>> more clear. Change the order of the code to let all lock code in the
>> same scope. No functional changes.
> 
> While I agree that this code could usefully be cleaned up and that you
> have made some improvements, I think the use of guard() is a nearly
> insignificant part of the change.  You could easily do exactly the same
> patch without using guard() but having and explicit spin_unlock() before
> the new return.  That doesn't mean you shouldn't use guard(), but it
> does mean that the comment explaining the change could be more usefully
> focused on the "Change the order ..." part, and maybe explain what that
> is important.
> 
> I actually think there is room for other changes which would make the
> code even better:
> - Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
>   take the lock when needed, then drop it, then call
>   nfsd_cacherep_dispose() - and return the count.
> - change nfsd_cache_insert to also skip updating the chain length stats
>   when it finds a match - in that case the "entries" isn't a chain
>   length. So just  lru_put_end(), return.  Have it return NULL if
>   no match was found
> - after the found_entry label don't use nfsd_reply_cache_free_locked(),
>   just free rp.  It has never been included in any rbtree or list, so it
>   doesn't need to be removed.
> - I'd be tempted to have nfsd_cache_insert() take the spinlock itself
>   and call it under rcu_read_lock() - and use RCU to free the cached
>   items. 
> - put the chunk of code after the found_entry label into a separate
>   function and instead just return RC_REPLY (and maybe rename that
>   RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
>   that function that has the found_entry code.
> 
> I think that would make the code a lot easier to follow.  Would you like
> to have a go at that - I suspect it would be several patches - or shall
> I do it?

I'm going to counsel some caution.

nfsd_cache_lookup() is a hot path. Source code readability, though
important, is not the priority in this area.

I'm happy to consider changes to this function, but the bottom line is
patches need to be accompanied by data that show that proposed code
modifications do not negatively impact performance. (Plus the usual
test results that show no impact to correctness).

That data might include:
- flame graphs that show a decrease in CPU utilization
- objdump output showing a smaller instruction cache footprint
  and/or short instruction path lengths
- perf results showing better memory bandwidth
- perf results showing better branch prediction
- lockstat results showing less contention and/or shorter hold
  time on locks held in this path

Macro benchmark results are also welcome: equal or lower latency for
NFSv3 operations, and equal or higher I/O throughput.

The benefit for the scoped_guard construct is that it might make it more
difficult to add code that returns from this function with a lock held.
However, so far that hasn't been an issue.

Thus I'm not sure there's a lot of strong technical justification for
modification of this code path. But, you might know of one -- if so,
please make sure that appears in the patch descriptions.

What is more interesting to me is trying out more sophisticated abstract
data types for the DRC hashtable. rhashtable is one alternative; so is
Maple tree, which is supposed to handle lookups with more memory
bandwidth efficiency than walking a linked list.

Anyway, have fun, get creative, and let's see what comes up.


>> Signed-off-by: Su Hui <suhui@nfschina.com>
>> ---
>>  fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
>>  1 file changed, 48 insertions(+), 51 deletions(-)
>>
>> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
>> index ba9d326b3de6..2d92adf3e6b0 100644
>> --- a/fs/nfsd/nfscache.c
>> +++ b/fs/nfsd/nfscache.c
>> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>>  
>>  	if (type == RC_NOCACHE) {
>>  		nfsd_stats_rc_nocache_inc(nn);
>> -		goto out;
>> +		return rtn;
>>  	}
>>  
>>  	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
>> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
>>  	 */
>>  	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
>>  	if (!rp)
>> -		goto out;
>> +		return rtn;
>>  
>>  	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
>> -	spin_lock(&b->cache_lock);
>> -	found = nfsd_cache_insert(b, rp, nn);
>> -	if (found != rp)
>> -		goto found_entry;
>> -	*cacherep = rp;
>> -	rp->c_state = RC_INPROG;
>> -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>> -	spin_unlock(&b->cache_lock);
>> +	scoped_guard(spinlock, &b->cache_lock) {
>> +		found = nfsd_cache_insert(b, rp, nn);
>> +		if (found == rp) {
>> +			*cacherep = rp;
>> +			rp->c_state = RC_INPROG;
>> +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
>> +			goto out;
>> +		}
>> +		/* We found a matching entry which is either in progress or done. */
>> +		nfsd_reply_cache_free_locked(NULL, rp, nn);
>> +		nfsd_stats_rc_hits_inc(nn);
>> +		rtn = RC_DROPIT;
>> +		rp = found;
>> +
>> +		/* Request being processed */
>> +		if (rp->c_state == RC_INPROG)
>> +			goto out_trace;
>> +
>> +		/* From the hall of fame of impractical attacks:
>> +		 * Is this a user who tries to snoop on the cache?
>> +		 */
>> +		rtn = RC_DOIT;
>> +		if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
>> +			goto out_trace;
>>  
>> +		/* Compose RPC reply header */
>> +		switch (rp->c_type) {
>> +		case RC_NOCACHE:
>> +			break;
>> +		case RC_REPLSTAT:
>> +			xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
>> +			rtn = RC_REPLY;
>> +			break;
>> +		case RC_REPLBUFF:
>> +			if (!nfsd_cache_append(rqstp, &rp->c_replvec))
>> +				return rtn; /* should not happen */
>> +			rtn = RC_REPLY;
>> +			break;
>> +		default:
>> +			WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
>> +		}
>> +
>> +out_trace:
>> +		trace_nfsd_drc_found(nn, rqstp, rtn);
>> +		return rtn;
>> +	}
>> +out:
>>  	nfsd_cacherep_dispose(&dispose);
>>  
>>  	nfsd_stats_rc_misses_inc(nn);
>>  	atomic_inc(&nn->num_drc_entries);
>>  	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
>> -	goto out;
>> -
>> -found_entry:
>> -	/* We found a matching entry which is either in progress or done. */
>> -	nfsd_reply_cache_free_locked(NULL, rp, nn);
>> -	nfsd_stats_rc_hits_inc(nn);
>> -	rtn = RC_DROPIT;
>> -	rp = found;
>> -
>> -	/* Request being processed */
>> -	if (rp->c_state == RC_INPROG)
>> -		goto out_trace;
>> -
>> -	/* From the hall of fame of impractical attacks:
>> -	 * Is this a user who tries to snoop on the cache? */
>> -	rtn = RC_DOIT;
>> -	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
>> -		goto out_trace;
>> -
>> -	/* Compose RPC reply header */
>> -	switch (rp->c_type) {
>> -	case RC_NOCACHE:
>> -		break;
>> -	case RC_REPLSTAT:
>> -		xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
>> -		rtn = RC_REPLY;
>> -		break;
>> -	case RC_REPLBUFF:
>> -		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
>> -			goto out_unlock; /* should not happen */
>> -		rtn = RC_REPLY;
>> -		break;
>> -	default:
>> -		WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
>> -	}
>> -
>> -out_trace:
>> -	trace_nfsd_drc_found(nn, rqstp, rtn);
>> -out_unlock:
>> -	spin_unlock(&b->cache_lock);
>> -out:
>>  	return rtn;
>>  }
>>  
>> -- 
>> 2.30.2
>>
>>
> 


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24 15:07   ` Chuck Lever
@ 2025-06-24 22:15     ` NeilBrown
  2025-06-25 11:51       ` Su Hui
  2025-07-11 14:22       ` Chuck Lever
  0 siblings, 2 replies; 14+ messages in thread
From: NeilBrown @ 2025-06-24 22:15 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Su Hui, jlayton, okorniev, Dai.Ngo, tom, linux-nfs, linux-kernel,
	kernel-janitors

On Wed, 25 Jun 2025, Chuck Lever wrote:
> On 6/23/25 8:19 PM, NeilBrown wrote:
> > On Mon, 23 Jun 2025, Su Hui wrote:
> >> Using guard() to replace *unlock* label. guard() makes lock/unlock code
> >> more clear. Change the order of the code to let all lock code in the
> >> same scope. No functional changes.
> > 
> > While I agree that this code could usefully be cleaned up and that you
> > have made some improvements, I think the use of guard() is a nearly
> > insignificant part of the change.  You could easily do exactly the same
> > patch without using guard() but having and explicit spin_unlock() before
> > the new return.  That doesn't mean you shouldn't use guard(), but it
> > does mean that the comment explaining the change could be more usefully
> > focused on the "Change the order ..." part, and maybe explain what that
> > is important.
> > 
> > I actually think there is room for other changes which would make the
> > code even better:
> > - Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
> >   take the lock when needed, then drop it, then call
> >   nfsd_cacherep_dispose() - and return the count.
> > - change nfsd_cache_insert to also skip updating the chain length stats
> >   when it finds a match - in that case the "entries" isn't a chain
> >   length. So just  lru_put_end(), return.  Have it return NULL if
> >   no match was found
> > - after the found_entry label don't use nfsd_reply_cache_free_locked(),
> >   just free rp.  It has never been included in any rbtree or list, so it
> >   doesn't need to be removed.
> > - I'd be tempted to have nfsd_cache_insert() take the spinlock itself
> >   and call it under rcu_read_lock() - and use RCU to free the cached
> >   items. 
> > - put the chunk of code after the found_entry label into a separate
> >   function and instead just return RC_REPLY (and maybe rename that
> >   RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
> >   that function that has the found_entry code.
> > 
> > I think that would make the code a lot easier to follow.  Would you like
> > to have a go at that - I suspect it would be several patches - or shall
> > I do it?
> 
> I'm going to counsel some caution.
> 
> nfsd_cache_lookup() is a hot path. Source code readability, though
> important, is not the priority in this area.
> 
> I'm happy to consider changes to this function, but the bottom line is
> patches need to be accompanied by data that show that proposed code
> modifications do not negatively impact performance. (Plus the usual
> test results that show no impact to correctness).
> 
> That data might include:
> - flame graphs that show a decrease in CPU utilization
> - objdump output showing a smaller instruction cache footprint
>   and/or short instruction path lengths
> - perf results showing better memory bandwidth
> - perf results showing better branch prediction
> - lockstat results showing less contention and/or shorter hold
>   time on locks held in this path
> 
> Macro benchmark results are also welcome: equal or lower latency for
> NFSv3 operations, and equal or higher I/O throughput.
> 
> The benefit for the scoped_guard construct is that it might make it more
> difficult to add code that returns from this function with a lock held.
> However, so far that hasn't been an issue.
> 
> Thus I'm not sure there's a lot of strong technical justification for
> modification of this code path. But, you might know of one -- if so,
> please make sure that appears in the patch descriptions.
> 
> What is more interesting to me is trying out more sophisticated abstract
> data types for the DRC hashtable. rhashtable is one alternative; so is
> Maple tree, which is supposed to handle lookups with more memory
> bandwidth efficiency than walking a linked list.
> 

While I generally like rhashtable there is an awkwardness.  It doesn't
guarantee that an insert will always succeed.  If you get lots of new
records that hash to the same value, it will start failing insert
requests until is hash re-hashed the table with a new seed.  This is
intended to defeat collision attacks.  That means we would need to drop
requests sometimes.  Maybe that is OK.  The DRC could be the target of
collision attacks so maybe we really do want to drop requests if
rhashtable refuses to store them.

I think the other area that could use improvement is pruning old entries.
I would not include RC_INPROG entries in the lru at all - they are
always ignored, and will be added when they are switched to RCU_DONE.
I'd generally like to prune less often in larger batches, but removing
each of the batch from the rbtree could hold the lock for longer than we
would like.  I wonder if we could have an 'old' and a 'new' rbtree and
when the 'old' gets too old or the 'new' get too full, we extract 'old',
move 'new' to 'old', and outside the spinlock we free all of the moved
'old'.

But if we switched to rhashtable, we probably wouldn't need an lru -
just walk the entire table occasionally - there would be little conflict
with concurrent lookups.

But as you say, measuring would be useful.  Hopefully the DRC lookup
would be small contribution to the total request time, so we would need
to measure just want happens in the code to compare different versions.

NeilBrown


> Anyway, have fun, get creative, and let's see what comes up.
> 
> 
> >> Signed-off-by: Su Hui <suhui@nfschina.com>
> >> ---
> >>  fs/nfsd/nfscache.c | 99 ++++++++++++++++++++++------------------------
> >>  1 file changed, 48 insertions(+), 51 deletions(-)
> >>
> >> diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
> >> index ba9d326b3de6..2d92adf3e6b0 100644
> >> --- a/fs/nfsd/nfscache.c
> >> +++ b/fs/nfsd/nfscache.c
> >> @@ -489,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
> >>  
> >>  	if (type == RC_NOCACHE) {
> >>  		nfsd_stats_rc_nocache_inc(nn);
> >> -		goto out;
> >> +		return rtn;
> >>  	}
> >>  
> >>  	csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
> >> @@ -500,64 +500,61 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
> >>  	 */
> >>  	rp = nfsd_cacherep_alloc(rqstp, csum, nn);
> >>  	if (!rp)
> >> -		goto out;
> >> +		return rtn;
> >>  
> >>  	b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
> >> -	spin_lock(&b->cache_lock);
> >> -	found = nfsd_cache_insert(b, rp, nn);
> >> -	if (found != rp)
> >> -		goto found_entry;
> >> -	*cacherep = rp;
> >> -	rp->c_state = RC_INPROG;
> >> -	nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> >> -	spin_unlock(&b->cache_lock);
> >> +	scoped_guard(spinlock, &b->cache_lock) {
> >> +		found = nfsd_cache_insert(b, rp, nn);
> >> +		if (found == rp) {
> >> +			*cacherep = rp;
> >> +			rp->c_state = RC_INPROG;
> >> +			nfsd_prune_bucket_locked(nn, b, 3, &dispose);
> >> +			goto out;
> >> +		}
> >> +		/* We found a matching entry which is either in progress or done. */
> >> +		nfsd_reply_cache_free_locked(NULL, rp, nn);
> >> +		nfsd_stats_rc_hits_inc(nn);
> >> +		rtn = RC_DROPIT;
> >> +		rp = found;
> >> +
> >> +		/* Request being processed */
> >> +		if (rp->c_state == RC_INPROG)
> >> +			goto out_trace;
> >> +
> >> +		/* From the hall of fame of impractical attacks:
> >> +		 * Is this a user who tries to snoop on the cache?
> >> +		 */
> >> +		rtn = RC_DOIT;
> >> +		if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
> >> +			goto out_trace;
> >>  
> >> +		/* Compose RPC reply header */
> >> +		switch (rp->c_type) {
> >> +		case RC_NOCACHE:
> >> +			break;
> >> +		case RC_REPLSTAT:
> >> +			xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
> >> +			rtn = RC_REPLY;
> >> +			break;
> >> +		case RC_REPLBUFF:
> >> +			if (!nfsd_cache_append(rqstp, &rp->c_replvec))
> >> +				return rtn; /* should not happen */
> >> +			rtn = RC_REPLY;
> >> +			break;
> >> +		default:
> >> +			WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
> >> +		}
> >> +
> >> +out_trace:
> >> +		trace_nfsd_drc_found(nn, rqstp, rtn);
> >> +		return rtn;
> >> +	}
> >> +out:
> >>  	nfsd_cacherep_dispose(&dispose);
> >>  
> >>  	nfsd_stats_rc_misses_inc(nn);
> >>  	atomic_inc(&nn->num_drc_entries);
> >>  	nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
> >> -	goto out;
> >> -
> >> -found_entry:
> >> -	/* We found a matching entry which is either in progress or done. */
> >> -	nfsd_reply_cache_free_locked(NULL, rp, nn);
> >> -	nfsd_stats_rc_hits_inc(nn);
> >> -	rtn = RC_DROPIT;
> >> -	rp = found;
> >> -
> >> -	/* Request being processed */
> >> -	if (rp->c_state == RC_INPROG)
> >> -		goto out_trace;
> >> -
> >> -	/* From the hall of fame of impractical attacks:
> >> -	 * Is this a user who tries to snoop on the cache? */
> >> -	rtn = RC_DOIT;
> >> -	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
> >> -		goto out_trace;
> >> -
> >> -	/* Compose RPC reply header */
> >> -	switch (rp->c_type) {
> >> -	case RC_NOCACHE:
> >> -		break;
> >> -	case RC_REPLSTAT:
> >> -		xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
> >> -		rtn = RC_REPLY;
> >> -		break;
> >> -	case RC_REPLBUFF:
> >> -		if (!nfsd_cache_append(rqstp, &rp->c_replvec))
> >> -			goto out_unlock; /* should not happen */
> >> -		rtn = RC_REPLY;
> >> -		break;
> >> -	default:
> >> -		WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
> >> -	}
> >> -
> >> -out_trace:
> >> -	trace_nfsd_drc_found(nn, rqstp, rtn);
> >> -out_unlock:
> >> -	spin_unlock(&b->cache_lock);
> >> -out:
> >>  	return rtn;
> >>  }
> >>  
> >> -- 
> >> 2.30.2
> >>
> >>
> > 
> 
> 
> -- 
> Chuck Lever
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24 22:15     ` NeilBrown
@ 2025-06-25 11:51       ` Su Hui
  2025-07-11 14:22       ` Chuck Lever
  1 sibling, 0 replies; 14+ messages in thread
From: Su Hui @ 2025-06-25 11:51 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever
  Cc: jlayton, okorniev, Dai.Ngo, tom, linux-nfs, linux-kernel,
	kernel-janitors, Su Hui

On 2025/6/25 06:15, NeilBrown wrote:
> On Wed, 25 Jun 2025, Chuck Lever wrote:
>> On 6/23/25 8:19 PM, NeilBrown wrote:
>>> On Mon, 23 Jun 2025, Su Hui wrote:
>>>> Using guard() to replace *unlock* label. guard() makes lock/unlock code
>>>> more clear. Change the order of the code to let all lock code in the
>>>> same scope. No functional changes.
>>> While I agree that this code could usefully be cleaned up and that you
>>> have made some improvements, I think the use of guard() is a nearly
>>> insignificant part of the change.  You could easily do exactly the same
>>> patch without using guard() but having and explicit spin_unlock() before
>>> the new return.  That doesn't mean you shouldn't use guard(), but it
>>> does mean that the comment explaining the change could be more usefully
>>> focused on the "Change the order ..." part, and maybe explain what that
>>> is important.
>>>
>>> I actually think there is room for other changes which would make the
>>> code even better:
>>> - Change nfsd_prune_bucket_locked() to nfsd_prune_bucket().  Have it
>>>    take the lock when needed, then drop it, then call
>>>    nfsd_cacherep_dispose() - and return the count.
>>> - change nfsd_cache_insert to also skip updating the chain length stats
>>>    when it finds a match - in that case the "entries" isn't a chain
>>>    length. So just  lru_put_end(), return.  Have it return NULL if
>>>    no match was found
>>> - after the found_entry label don't use nfsd_reply_cache_free_locked(),
>>>    just free rp.  It has never been included in any rbtree or list, so it
>>>    doesn't need to be removed.
>>> - I'd be tempted to have nfsd_cache_insert() take the spinlock itself
>>>    and call it under rcu_read_lock() - and use RCU to free the cached
>>>    items.
>>> - put the chunk of code after the found_entry label into a separate
>>>    function and instead just return RC_REPLY (and maybe rename that
>>>    RC_CACHED).  Then in nfsd_dispatch(), if RC_CACHED was returned, call
>>>    that function that has the found_entry code.
>>>
>>> I think that would make the code a lot easier to follow.  Would you like
>>> to have a go at that - I suspect it would be several patches - or shall
>>> I do it?
>> I'm going to counsel some caution.
>>
>> nfsd_cache_lookup() is a hot path. Source code readability, though
>> important, is not the priority in this area.
>>
>> I'm happy to consider changes to this function, but the bottom line is
>> patches need to be accompanied by data that show that proposed code
>> modifications do not negatively impact performance. (Plus the usual
>> test results that show no impact to correctness).
>>
>> That data might include:
>> - flame graphs that show a decrease in CPU utilization
>> - objdump output showing a smaller instruction cache footprint
>>    and/or short instruction path lengths
>> - perf results showing better memory bandwidth
>> - perf results showing better branch prediction
>> - lockstat results showing less contention and/or shorter hold
>>    time on locks held in this path
>>
>> Macro benchmark results are also welcome: equal or lower latency for
>> NFSv3 operations, and equal or higher I/O throughput.
>>
>> The benefit for the scoped_guard construct is that it might make it more
>> difficult to add code that returns from this function with a lock held.
>> However, so far that hasn't been an issue.
>>
>> Thus I'm not sure there's a lot of strong technical justification for
>> modification of this code path. But, you might know of one -- if so,
>> please make sure that appears in the patch descriptions.
>>
>> What is more interesting to me is trying out more sophisticated abstract
>> data types for the DRC hashtable. rhashtable is one alternative; so is
>> Maple tree, which is supposed to handle lookups with more memory
>> bandwidth efficiency than walking a linked list.
>>
> While I generally like rhashtable there is an awkwardness.  It doesn't
> guarantee that an insert will always succeed.  If you get lots of new
> records that hash to the same value, it will start failing insert
> requests until is hash re-hashed the table with a new seed.  This is
> intended to defeat collision attacks.  That means we would need to drop
> requests sometimes.  Maybe that is OK.  The DRC could be the target of
> collision attacks so maybe we really do want to drop requests if
> rhashtable refuses to store them.
>
> I think the other area that could use improvement is pruning old entries.
> I would not include RC_INPROG entries in the lru at all - they are
> always ignored, and will be added when they are switched to RCU_DONE.
> I'd generally like to prune less often in larger batches, but removing
> each of the batch from the rbtree could hold the lock for longer than we
> would like.  I wonder if we could have an 'old' and a 'new' rbtree and
> when the 'old' gets too old or the 'new' get too full, we extract 'old',
> move 'new' to 'old', and outside the spinlock we free all of the moved
> 'old'.
>
> But if we switched to rhashtable, we probably wouldn't need an lru -
> just walk the entire table occasionally - there would be little conflict
> with concurrent lookups.
>
> But as you say, measuring would be useful.  Hopefully the DRC lookup
> would be small contribution to the total request time, so we would need
> to measure just want happens in the code to compare different versions.
>
> NeilBrown
>
>> Anyway, have fun, get creative, and let's see what comes up.
>>
Thanks for the above prompt. I think I need more time to complete this,
both for code and related tests. I will do my best with curiosity and
creativity :).

Regards,
Su Hui

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-06-24 22:15     ` NeilBrown
  2025-06-25 11:51       ` Su Hui
@ 2025-07-11 14:22       ` Chuck Lever
  2025-07-11 17:20         ` Tom Talpey
  1 sibling, 1 reply; 14+ messages in thread
From: Chuck Lever @ 2025-07-11 14:22 UTC (permalink / raw)
  To: NeilBrown
  Cc: Su Hui, jlayton, okorniev, Dai.Ngo, tom, linux-nfs, linux-kernel,
	kernel-janitors

On 6/24/25 6:15 PM, NeilBrown wrote:
> On Wed, 25 Jun 2025, Chuck Lever wrote:

>> What is more interesting to me is trying out more sophisticated abstract
>> data types for the DRC hashtable. rhashtable is one alternative; so is
>> Maple tree, which is supposed to handle lookups with more memory
>> bandwidth efficiency than walking a linked list.
>>
> 
> While I generally like rhashtable there is an awkwardness.  It doesn't
> guarantee that an insert will always succeed.  If you get lots of new
> records that hash to the same value, it will start failing insert
> requests until is hash re-hashed the table with a new seed.

Hm. I hadn't thought of that.

> This is
> intended to defeat collision attacks.  That means we would need to drop
> requests sometimes.  Maybe that is OK.  The DRC could be the target of
> collision attacks so maybe we really do want to drop requests if
> rhashtable refuses to store them.

Well I can imagine, in a large cohort of clients, there is a pretty good
probability of non-malicious XID collisions due to the birthday paradox.

> I think the other area that could use improvement is pruning old entries.
> I would not include RC_INPROG entries in the lru at all - they are
> always ignored, and will be added when they are switched to RCU_DONE.

That sounds intriguing.

> I'd generally like to prune less often in larger batches, but removing
> each of the batch from the rbtree could hold the lock for longer than we
> would like.

Have a look at 8847ecc9274a ("NFSD: Optimize DRC bucket pruning").
Pruning frequently by small amounts seems to have the greatest benefit.

It certainly does keep request latency jitter down, since NFSD prunes
while the client is waiting. If we can move some management of the cache
until after the reply is sent, that might offer opportunities to prune
more aggressively without impacting server responsiveness.

> I wonder if we could have an 'old' and a 'new' rbtree and
> when the 'old' gets too old or the 'new' get too full, we extract 'old',
> move 'new' to 'old', and outside the spinlock we free all of the moved
> 'old'.

One observation I've had is that nearly every DRC lookup will fail to
find an entry that matches the XID, because when things are operating
smoothly, every incoming RPC contains an XID that hasn't been seen
before.

That means DRC lookups are walking the entire bucket in the common
case. Pointer chasing of any kind is a well-known ADT performance
killer. My experience with the kernel's r-b tree is that is does not
perform well due to the number of memory accesses needed for lookups.

This is why I suggested using rhashtable -- it makes an effort to keep
bucket sizes small by widening the table frequently. The downside is
that this will definitely introduce some latency when an insertion
triggers a table-size change.

What might be helpful is a per-bucket Bloom filter that would make
checking if an XID is in the hashed bucket an O(1) operation -- and
in particular, would require few, if any, pointer dereferences.

> But if we switched to rhashtable, we probably wouldn't need an lru -
> just walk the entire table occasionally - there would be little conflict
> with concurrent lookups.
When the DRC is at capacity, pruning needs to find something to evict
on every insertion. My thought is that a pruning walk would need to be
done quite frequently to ensure clients don't overrun the cache. Thus
attention needs to be paid to keep pruning efficient (although perhaps
an LRU isn't the only choice here).

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-07-11 14:22       ` Chuck Lever
@ 2025-07-11 17:20         ` Tom Talpey
  2025-07-13 23:33           ` NeilBrown
  0 siblings, 1 reply; 14+ messages in thread
From: Tom Talpey @ 2025-07-11 17:20 UTC (permalink / raw)
  To: Chuck Lever, NeilBrown
  Cc: Su Hui, jlayton, okorniev, Dai.Ngo, linux-nfs, linux-kernel,
	kernel-janitors

On 7/11/2025 10:22 AM, Chuck Lever wrote:
> On 6/24/25 6:15 PM, NeilBrown wrote:
>> On Wed, 25 Jun 2025, Chuck Lever wrote:
> 
>>> What is more interesting to me is trying out more sophisticated abstract
>>> data types for the DRC hashtable. rhashtable is one alternative; so is
>>> Maple tree, which is supposed to handle lookups with more memory
>>> bandwidth efficiency than walking a linked list.
>>>
>>
>> While I generally like rhashtable there is an awkwardness.  It doesn't
>> guarantee that an insert will always succeed.  If you get lots of new
>> records that hash to the same value, it will start failing insert
>> requests until is hash re-hashed the table with a new seed.
> 
> Hm. I hadn't thought of that.
> 
> 
>> This is
>> intended to defeat collision attacks.  That means we would need to drop
>> requests sometimes.  Maybe that is OK.  The DRC could be the target of
>> collision attacks so maybe we really do want to drop requests if
>> rhashtable refuses to store them.
> 
> Well I can imagine, in a large cohort of clients, there is a pretty good
> probability of non-malicious XID collisions due to the birthday paradox.
> 
> 
>> I think the other area that could use improvement is pruning old entries.
>> I would not include RC_INPROG entries in the lru at all - they are
>> always ignored, and will be added when they are switched to RCU_DONE.
> 
> That sounds intriguing.
> 
> 
>> I'd generally like to prune less often in larger batches, but removing
>> each of the batch from the rbtree could hold the lock for longer than we
>> would like.
> 
> Have a look at 8847ecc9274a ("NFSD: Optimize DRC bucket pruning").
> Pruning frequently by small amounts seems to have the greatest benefit.
> 
> It certainly does keep request latency jitter down, since NFSD prunes
> while the client is waiting. If we can move some management of the cache
> until after the reply is sent, that might offer opportunities to prune
> more aggressively without impacting server responsiveness.
> 
> 
>> I wonder if we could have an 'old' and a 'new' rbtree and
>> when the 'old' gets too old or the 'new' get too full, we extract 'old',
>> move 'new' to 'old', and outside the spinlock we free all of the moved
>> 'old'.
> 
> One observation I've had is that nearly every DRC lookup will fail to
> find an entry that matches the XID, because when things are operating
> smoothly, every incoming RPC contains an XID that hasn't been seen
> before.
> 
> That means DRC lookups are walking the entire bucket in the common
> case. Pointer chasing of any kind is a well-known ADT performance
> killer. My experience with the kernel's r-b tree is that is does not
> perform well due to the number of memory accesses needed for lookups.
> 
> This is why I suggested using rhashtable -- it makes an effort to keep
> bucket sizes small by widening the table frequently. The downside is
> that this will definitely introduce some latency when an insertion
> triggers a table-size change.
> 
> What might be helpful is a per-bucket Bloom filter that would make
> checking if an XID is in the hashed bucket an O(1) operation -- and
> in particular, would require few, if any, pointer dereferences.
> 
> 
>> But if we switched to rhashtable, we probably wouldn't need an lru -
>> just walk the entire table occasionally - there would be little conflict
>> with concurrent lookups.
> When the DRC is at capacity, pruning needs to find something to evict
> on every insertion. My thought is that a pruning walk would need to be
> done quite frequently to ensure clients don't overrun the cache. Thus
> attention needs to be paid to keep pruning efficient (although perhaps
> an LRU isn't the only choice here).

As a matter of fact, LRU is a *bad* choice for DRC eviction. It will
evict the very entries that are most important! The newest ones are
coming from clients that are working properly. The oldest ones were
from the ones still attempting to retry.

This is pretty subtle, and it may not be a simple thing to implement.
But at a high level, evicting entries from a client that is regularly
issuing new requests is a much safer strategy. Looking at the age of
the requests themselves, without considering their source, is much more
risky.

Tom.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup()
  2025-07-11 17:20         ` Tom Talpey
@ 2025-07-13 23:33           ` NeilBrown
  0 siblings, 0 replies; 14+ messages in thread
From: NeilBrown @ 2025-07-13 23:33 UTC (permalink / raw)
  To: Tom Talpey
  Cc: Chuck Lever, Su Hui, jlayton, okorniev, Dai.Ngo, linux-nfs,
	linux-kernel, kernel-janitors

On Sat, 12 Jul 2025, Tom Talpey wrote:
> On 7/11/2025 10:22 AM, Chuck Lever wrote:
> > On 6/24/25 6:15 PM, NeilBrown wrote:
> >> On Wed, 25 Jun 2025, Chuck Lever wrote:
> > 
> >>> What is more interesting to me is trying out more sophisticated abstract
> >>> data types for the DRC hashtable. rhashtable is one alternative; so is
> >>> Maple tree, which is supposed to handle lookups with more memory
> >>> bandwidth efficiency than walking a linked list.
> >>>
> >>
> >> While I generally like rhashtable there is an awkwardness.  It doesn't
> >> guarantee that an insert will always succeed.  If you get lots of new
> >> records that hash to the same value, it will start failing insert
> >> requests until is hash re-hashed the table with a new seed.
> > 
> > Hm. I hadn't thought of that.
> > 
> > 
> >> This is
> >> intended to defeat collision attacks.  That means we would need to drop
> >> requests sometimes.  Maybe that is OK.  The DRC could be the target of
> >> collision attacks so maybe we really do want to drop requests if
> >> rhashtable refuses to store them.
> > 
> > Well I can imagine, in a large cohort of clients, there is a pretty good
> > probability of non-malicious XID collisions due to the birthday paradox.
> > 
> > 
> >> I think the other area that could use improvement is pruning old entries.
> >> I would not include RC_INPROG entries in the lru at all - they are
> >> always ignored, and will be added when they are switched to RCU_DONE.
> > 
> > That sounds intriguing.
> > 
> > 
> >> I'd generally like to prune less often in larger batches, but removing
> >> each of the batch from the rbtree could hold the lock for longer than we
> >> would like.
> > 
> > Have a look at 8847ecc9274a ("NFSD: Optimize DRC bucket pruning").
> > Pruning frequently by small amounts seems to have the greatest benefit.
> > 
> > It certainly does keep request latency jitter down, since NFSD prunes
> > while the client is waiting. If we can move some management of the cache
> > until after the reply is sent, that might offer opportunities to prune
> > more aggressively without impacting server responsiveness.
> > 
> > 
> >> I wonder if we could have an 'old' and a 'new' rbtree and
> >> when the 'old' gets too old or the 'new' get too full, we extract 'old',
> >> move 'new' to 'old', and outside the spinlock we free all of the moved
> >> 'old'.
> > 
> > One observation I've had is that nearly every DRC lookup will fail to
> > find an entry that matches the XID, because when things are operating
> > smoothly, every incoming RPC contains an XID that hasn't been seen
> > before.
> > 
> > That means DRC lookups are walking the entire bucket in the common
> > case. Pointer chasing of any kind is a well-known ADT performance
> > killer. My experience with the kernel's r-b tree is that is does not
> > perform well due to the number of memory accesses needed for lookups.
> > 
> > This is why I suggested using rhashtable -- it makes an effort to keep
> > bucket sizes small by widening the table frequently. The downside is
> > that this will definitely introduce some latency when an insertion
> > triggers a table-size change.
> > 
> > What might be helpful is a per-bucket Bloom filter that would make
> > checking if an XID is in the hashed bucket an O(1) operation -- and
> > in particular, would require few, if any, pointer dereferences.
> > 
> > 
> >> But if we switched to rhashtable, we probably wouldn't need an lru -
> >> just walk the entire table occasionally - there would be little conflict
> >> with concurrent lookups.
> > When the DRC is at capacity, pruning needs to find something to evict
> > on every insertion. My thought is that a pruning walk would need to be
> > done quite frequently to ensure clients don't overrun the cache. Thus
> > attention needs to be paid to keep pruning efficient (although perhaps
> > an LRU isn't the only choice here).
> 
> As a matter of fact, LRU is a *bad* choice for DRC eviction. It will
> evict the very entries that are most important! The newest ones are
> coming from clients that are working properly. The oldest ones were
> from the ones still attempting to retry.

Isn't this just the standard complaint about LRU cache management?  We
throw out the old ones, but they are mostly likely to be used soon..

My understanding is that LRU cache management is a bit like democracy:
it is the worst solution to the problem - except for all the others.

Without more concrete information about how the client is behaving (like
the information that SEQUENCE gives us in 4.1) you cannot reliably do
any better, and you can certainly do a lot worse.

I don't think there is any value in revising the basic approach we are
taking to DRC cache management.  There might be value in optimising the
code (removing unused assignments, changing data structures, moving some
tasks to a different place in request processing to adjust latency)
while keeping the core approach the same.

> 
> This is pretty subtle, and it may not be a simple thing to implement.
> But at a high level, evicting entries from a client that is regularly
> issuing new requests is a much safer strategy. Looking at the age of
> the requests themselves, without considering their source, is much more
> risky.

Past behaviour cannot usefully predict future behaviour when the
particular behaviour that we care about (lost replies) it typically
caused by a discontinuity.

There might be value is using different tuning (and possibly separate
data structures) for UDP vs TCP sessions as the expected failure mode is
quite different.  With UDP loss of an individual reply would not be a
surprise.  With TCP loss is much less likely, but when it happens it
would be expected to affect all outstanding replies.  Of course the
server doesn't know which are outstanding.

Possibly a count per client, rather than a global count, might make
sense.

Thanks,
NeilBrown


> 
> Tom.
> 


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-07-13 23:34 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-23 12:22 [PATCH] nfsd: Using guard() to simplify nfsd_cache_lookup() Su Hui
2025-06-23 15:47 ` Dan Carpenter
2025-06-24  1:45   ` Su Hui
2025-06-24 14:49     ` Dan Carpenter
2025-06-24  0:19 ` NeilBrown
2025-06-24  1:31   ` Su Hui
2025-06-24 13:41     ` Chuck Lever
2025-06-24 14:21       ` Su Hui
2025-06-24 15:07   ` Chuck Lever
2025-06-24 22:15     ` NeilBrown
2025-06-25 11:51       ` Su Hui
2025-07-11 14:22       ` Chuck Lever
2025-07-11 17:20         ` Tom Talpey
2025-07-13 23:33           ` NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).