Linux NFS development
 help / color / mirror / Atom feed
From: Chuck Lever <chuck.lever@oracle.com>
To: Tom Tucker <tom@opengridcomputing.com>
Cc: nfs@lists.sourceforge.net
Subject: Re: [RFC,PATCH 04/20] svc: xpt_has_wspace
Date: Wed, 29 Aug 2007 13:32:55 -0400	[thread overview]
Message-ID: <46D5ADC7.2090009@oracle.com> (raw)
In-Reply-To: <20070820162329.15224.29032.stgit@dell3.ogc.int>

[-- Attachment #1: Type: text/plain, Size: 5869 bytes --]

Tom Tucker wrote:
> Move the code that checks for available write space on the socket, 
> into a new transport function. This will allow transports flexibility
> when determining if enough space/memory is available to process
> the reply. The role of this function for RDMA is to avoid stalling
> an knfsd thread when SQ space is not available.
> 
> Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
> Signed-off-by: Peter Leckie <pleckie@melbourne.sgi.com>
> Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
> ---
> 
>  include/linux/sunrpc/svcsock.h |    4 ++
>  net/sunrpc/svcsock.c           |   75 ++++++++++++++++++++++++++--------------
>  2 files changed, 52 insertions(+), 27 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index 1da42c2..3faa95c 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -31,6 +31,10 @@ struct svc_xprt {
>  	 * Prepare any transport-specific RPC header.
>  	 */
>  	int                     (*xpt_prep_reply_hdr)(struct svc_rqst *);
> +	/*
> +	 * Return 1 if sufficient space to write reply to network.
> +	 */
> +	int			(*xpt_has_wspace)(struct svc_sock *);
>  };

Again I think this documentation, while important (required, even), 
should go somewhere else.  There is more information required here for a 
complete function document, but there isn't enough space in this 
structure for it.

And as before the "svc_sock *" might be replaced with something more 
generic.

>  /*
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index ca473ee..b16dad4 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -205,22 +205,6 @@ svc_release_skb(struct svc_rqst *rqstp)
>  }
>  
>  /*
> - * Any space to write?
> - */
> -static inline unsigned long
> -svc_sock_wspace(struct svc_sock *svsk)
> -{
> -	int wspace;
> -
> -	if (svsk->sk_sock->type == SOCK_STREAM)
> -		wspace = sk_stream_wspace(svsk->sk_sk);
> -	else
> -		wspace = sock_wspace(svsk->sk_sk);
> -
> -	return wspace;
> -}
> -
> -/*
>   * Queue up a socket with data pending. If there are idle nfsd
>   * processes, wake 'em up.
>   *
> @@ -269,21 +253,13 @@ svc_sock_enqueue(struct svc_sock *svsk)
>  	BUG_ON(svsk->sk_pool != NULL);
>  	svsk->sk_pool = pool;
>  
> -	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -	if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
> -	     > svc_sock_wspace(svsk))
> -	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
> -	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
> -		/* Don't enqueue while not enough space for reply */
> -		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
> -			svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
> -			svc_sock_wspace(svsk));
> +	if (!test_bit(SK_CLOSE, &svsk->sk_flags)
> +	    && !test_bit(SK_CONN, &svsk->sk_flags)
> +	    && !svsk->sk_xprt->xpt_has_wspace(svsk)) {
>  		svsk->sk_pool = NULL;
>  		clear_bit(SK_BUSY, &svsk->sk_flags);
>  		goto out_unlock;
>  	}
> -	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> -
>  
>  	if (!list_empty(&pool->sp_threads)) {
>  		rqstp = list_entry(pool->sp_threads.next,

Your patch changes the order of the tests here of SOCK_NOSPACE, 
SK_CLOSE, SK_CONN, and the other variables.  Can you prove this is safe?

Have you considered abstracting all of svc_sock_enqueue into the switch 
API, instead of just the wspace checking part?  At some point the RDMA 
transport may want to schedule the enqueued I/O differently than the 
socket interface does.

If not, it should be made more generic (perhaps moved out of svcsock.c 
and renamed).

> @@ -882,12 +858,45 @@ svc_udp_sendto(struct svc_rqst *rqstp)
>  	return error;
>  }
>  
> +/**
> + * svc_sock_has_write_space - Checks if there is enough space
> + * to send the reply on the socket.
> + * @svsk: the svc_sock to write on
> + * @wspace: the number of bytes available for writing
> + */
> +static int svc_sock_has_write_space(struct svc_sock *svsk, int wspace)
> +{
> +	struct svc_serv	*serv = svsk->sk_server;
> +	int required = atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg;
> +
> +	if (required*2 > wspace) {
> +		/* Don't enqueue while not enough space for reply */
> +		dprintk("svc: socket %p  no space, %d*2 > %d, not enqueued\n",
> +			svsk->sk_sk, required, wspace);
> +		return 0;
> +	}
> +	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return 1;
> +}

My own style preference here is to keep the set_bit(SOCK_NOSPACE) and 
clear_bit(SOCK_NOSPACE) in the same function if possible, just as a 
defensive coding practice.

> +static int
> +svc_udp_has_wspace(struct svc_sock *svsk)
> +{
> +	/*
> +	 * Set the SOCK_NOSPACE flag before checking the available
> +	 * sock space.
> +	 */
> +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return svc_sock_has_write_space(svsk, sock_wspace(svsk->sk_sk));
> +}
> +
>  static const struct svc_xprt svc_udp_xprt = {
>  	.xpt_name = "udp",
>  	.xpt_recvfrom = svc_udp_recvfrom,
>  	.xpt_sendto = svc_udp_sendto,
>  	.xpt_detach = svc_sock_detach,
>  	.xpt_free = svc_sock_free,
> +	.xpt_has_wspace = svc_udp_has_wspace,
>  };
>  
>  static void
> @@ -1340,6 +1349,17 @@ svc_tcp_prep_reply_hdr(struct svc_rqst *
>  	return 0;
>  }
>  
> +static int
> +svc_tcp_has_wspace(struct svc_sock *svsk)
> +{
> +	/*
> +	 * Set the SOCK_NOSPACE flag before checking the available
> +	 * sock space.
> +	 */
> +	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +	return svc_sock_has_write_space(svsk, sk_stream_wspace(svsk->sk_sk));
> +}
> +
>  static const struct svc_xprt svc_tcp_xprt = {
>  	.xpt_name = "tcp",
>  	.xpt_recvfrom = svc_tcp_recvfrom,
> @@ -1347,6 +1367,7 @@ static const struct svc_xprt svc_tcp_xpr
>  	.xpt_detach = svc_sock_detach,
>  	.xpt_free = svc_sock_free,
>  	.xpt_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpt_has_wspace = svc_tcp_has_wspace,
>  };
>  
>  static void

[-- Attachment #2: chuck.lever.vcf --]
[-- Type: text/x-vcard, Size: 315 bytes --]

begin:vcard
fn:Chuck Lever
n:Lever;Chuck
org:Oracle Corporation;Corporate Architecture: Linux Projects Group
adr:;;1015 Granger Avenue;Ann Arbor;MI;48104;USA
email;internet:chuck dot lever at nospam oracle dot com
title:Principal Member of Staff
tel;work:+1 248 614 5091
x-mozilla-html:FALSE
version:2.1
end:vcard


[-- Attachment #3: Type: text/plain, Size: 315 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

[-- Attachment #4: Type: text/plain, Size: 140 bytes --]

_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

  parent reply	other threads:[~2007-08-29 17:34 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-08-20 16:20 [RFC,PATCH 00/20] svc: Server Side Transport Switch Tom Tucker
2007-08-20 16:23 ` [RFC, PATCH 01/20] svc: Add svc_xprt transport switch structure Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 02/20] svc: xpt_detach and xpt_free Tom Tucker
2007-08-29 17:05   ` Chuck Lever
2007-08-29 17:08   ` J. Bruce Fields
2007-08-20 16:23 ` [RFC,PATCH 03/20] svc: xpt_prep_reply_hdr Tom Tucker
2007-08-29 17:15   ` Chuck Lever
2007-08-29 18:28     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 05/20] svc: xpt_max_payload Tom Tucker
2007-08-29 17:40   ` Chuck Lever
2007-08-29 19:06     ` Tom Tucker
2007-08-20 16:23 ` [RFC, PATCH 06/20] svc: export svc_sock_enqueue, svc_sock_received Tom Tucker
2007-08-21 16:03   ` Chuck Lever
2007-08-21 18:08     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 07/20] svc: centralise close handling Tom Tucker
2007-08-29 18:16   ` Chuck Lever
2007-08-20 16:23 ` [RFC,PATCH 08/20] svc: centralise accept handling Tom Tucker
2007-08-29 18:40   ` Chuck Lever
2007-08-29 23:56     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 09/20] svc: Add SK_LISTENER flag Tom Tucker
2007-08-29 18:41   ` Chuck Lever
2007-08-20 16:23 ` [RFC,PATCH 10/20] svc: Add generic refcount services Tom Tucker
2007-08-29 18:55   ` Chuck Lever
2007-08-29 20:19     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 11/20] svc: cleanup svc_sock initialization Tom Tucker
2007-08-29 19:07   ` Chuck Lever
2007-08-20 16:23 ` [RFC,PATCH 13/20] svc: Add svc_[un]register_transport Tom Tucker
2007-08-29 19:12   ` Chuck Lever
2007-08-29 20:32     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 14/20] svc: Register TCP/UDP Transports Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 15/20] svc: transport file implementation Tom Tucker
2007-08-29 19:15   ` Chuck Lever
2007-08-29 20:37     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 16/20] svc: xpt_create_svc Tom Tucker
2007-08-29 19:21   ` Chuck Lever
2007-08-29 20:43     ` Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 17/20] svc: Add xpt_get_name service Tom Tucker
2007-08-20 16:23 ` [RFC,PATCH 18/20] svc: Add xpt_defer transport function Tom Tucker
2007-08-29 19:29   ` Chuck Lever
2007-08-29 21:34     ` Tom Tucker
2007-08-20 16:24 ` [RFC,PATCH 19/20] knfsd: call svc_create_svcsock Tom Tucker
2007-08-20 16:24 ` [RFC,PATCH 20/20] knfsd: create listener via portlist write Tom Tucker
2007-08-29 16:50 ` [RFC,PATCH 00/20] svc: Server Side Transport Switch Chuck Lever
2007-08-29 17:01   ` Talpey, Thomas
2007-08-29 17:59   ` Tom Tucker
2007-08-30 21:12     ` Chuck Lever
2007-08-31  1:19       ` Talpey, Thomas
2007-08-29 16:55 ` J. Bruce Fields
     [not found] ` <20070820162329.15224.29032.stgit@dell3.ogc.int>
2007-08-29 17:32   ` Chuck Lever [this message]
2007-08-29 18:50     ` [RFC,PATCH 04/20] svc: xpt_has_wspace Tom Tucker
2007-08-29 17:35   ` J. Bruce Fields
2007-08-29 18:52     ` Tom Tucker
2007-08-29 18:53   ` J. Bruce Fields
2007-08-29 19:31     ` J. Bruce Fields
2007-08-29 20:11     ` Tom Tucker
2007-08-29 20:26       ` Tom Tucker
2007-08-29 20:29         ` J. Bruce Fields
2007-08-29 20:28       ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=46D5ADC7.2090009@oracle.com \
    --to=chuck.lever@oracle.com \
    --cc=nfs@lists.sourceforge.net \
    --cc=tom@opengridcomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox