linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff Layton <jlayton@redhat.com>
To: Stefan Hajnoczi <stefanha@redhat.com>, linux-nfs@vger.kernel.org
Cc: Abbas Naderi <abiusx@google.com>,
	Anna Schumaker <anna.schumaker@netapp.com>,
	Trond Myklebust <trond.myklebust@primarydata.com>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	Chuck Lever <chuck.lever@oracle.com>
Subject: Re: [PATCH v3 13/14] SUNRPC: vsock svcsock support
Date: Tue, 07 Nov 2017 09:12:31 -0500	[thread overview]
Message-ID: <1510063951.4705.2.camel@redhat.com> (raw)
In-Reply-To: <20170630132352.32133-14-stefanha@redhat.com>

On Fri, 2017-06-30 at 14:23 +0100, Stefan Hajnoczi wrote:
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  net/sunrpc/svcsock.c | 214 +++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 190 insertions(+), 24 deletions(-)
> 
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index e67b097..86cce8f 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -42,7 +42,9 @@
>  #include <net/udp.h>
>  #include <net/tcp.h>
>  #include <net/tcp_states.h>
> +#include <net/af_vsock.h>
>  #include <linux/uaccess.h>
> +#include <asm/uaccess.h>
>  #include <asm/ioctls.h>
>  #include <trace/events/skb.h>
>  
> @@ -64,7 +66,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
>  static int		svc_udp_recvfrom(struct svc_rqst *);
>  static int		svc_udp_sendto(struct svc_rqst *);
>  static void		svc_sock_detach(struct svc_xprt *);
> -static void		svc_tcp_sock_detach(struct svc_xprt *);
> +static void		svc_common_sock_detach(struct svc_xprt *);
>  static void		svc_sock_free(struct svc_xprt *);
>  
>  static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
> @@ -810,9 +812,9 @@ static void svc_tcp_state_change(struct sock *sk)
>  }
>  
>  /*
> - * Accept a TCP connection
> + * Accept an incoming connection
>   */
> -static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
> +static struct svc_xprt *svc_common_accept(struct svc_xprt *xprt)
>  {
>  	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>  	struct sockaddr_storage addr;
> @@ -824,7 +826,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
>  	int		err, slen;
>  	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
>  
> -	dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
> +	dprintk("svc: %s %p sock %p\n", __func__, svsk, sock);
>  	if (!sock)
>  		return NULL;
>  
> @@ -877,7 +879,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
>  	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
>  	err = kernel_getsockname(newsock, sin, &slen);
>  	if (unlikely(err < 0)) {
> -		dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
> +		dprintk("%s: kernel_getsockname error %d\n", __func__, -err);
>  		slen = offsetof(struct sockaddr, sa_data);
>  	}
>  	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
> @@ -1131,7 +1133,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
>  		rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
>  
>  	rqstp->rq_xprt_ctxt   = NULL;
> -	rqstp->rq_prot	      = IPPROTO_TCP;
> +	rqstp->rq_prot	      = IPPROTO_TCP; /* TODO vsock should either use 0 or IPPROTO_MAX */
>  	if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
>  		set_bit(RQ_LOCAL, &rqstp->rq_flags);
>  	else
> @@ -1200,13 +1202,13 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
>  }
>  
>  /*
> - * Setup response header. TCP has a 4B record length field.
> + * Setup response header. Byte stream transports have a 4B record length field.
>   */
> -static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
> +static void svc_stream_prep_reply_hdr(struct svc_rqst *rqstp)
>  {
>  	struct kvec *resv = &rqstp->rq_res.head[0];
>  
> -	/* tcp needs a space for the record length... */
> +	/* space for the record length... */
>  	svc_putnl(resv, 0);
>  }
>  
> @@ -1232,7 +1234,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
>  	.xpo_create = svc_bc_create_socket,
>  	.xpo_detach = svc_bc_tcp_sock_detach,
>  	.xpo_free = svc_bc_sock_free,
> -	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
>  	.xpo_secure_port = svc_sock_secure_port,
>  };
>  
> @@ -1244,6 +1246,145 @@ static struct svc_xprt_class svc_tcp_bc_class = {
>  };
>  
>  #ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +/*
> + * A data_ready event on a listening socket means there's a connection
> + * pending. Do not use state_change as a substitute for it.
> + */
> +static void svc_vsock_listen_data_ready(struct sock *sk)
> +{
> +	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
> +	wait_queue_head_t *wq;
> +
> +	dprintk("svc: socket %p AF_VSOCK (listen) state change %d\n",
> +		sk, sk->sk_state);
> +
> +	/*
> +	 * This callback may called twice when a new connection
> +	 * is established as a child socket inherits everything
> +	 * from a parent VSOCK_SS_LISTEN socket.
> +	 * 1) data_ready method of the parent socket will be called
> +	 *    when one of child sockets become SS_CONNECTED.
> +	 * 2) data_ready method of the child socket may be called
> +	 *    when it receives data before the socket is accepted.
> +	 * In case of 2, we should ignore it silently.
> +	 */
> +	if (sk->sk_state == VSOCK_SS_LISTEN) {
> +		if (svsk) {
> +			set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
> +			svc_xprt_enqueue(&svsk->sk_xprt);
> +		} else
> +			printk("svc: socket %p: no user data\n", sk);
> +	}
> +
> +	wq = sk_sleep(sk);
> +	if (wq && waitqueue_active(wq))
> +		wake_up_interruptible_all(wq);
> +}
> +
> +/*
> + * A state change on a connected socket means it's dying or dead.
> + */
> +static void svc_vsock_state_change(struct sock *sk)
> +{
> +	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data;
> +	wait_queue_head_t *wq = sk_sleep(sk);
> +
> +	dprintk("svc: socket %p AF_VSOCK (connected) state change %d (svsk %p)\n",
> +		sk, sk->sk_state, sk->sk_user_data);
> +
> +	if (!svsk)
> +		printk("svc: socket %p: no user data\n", sk);
> +	else {
> +		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
> +		svc_xprt_enqueue(&svsk->sk_xprt);
> +	}
> +	if (wq && waitqueue_active(wq))
> +		wake_up_interruptible_all(wq);
> +}
> +
> +static void svc_vsock_data_ready(struct sock *sk)
> +{
> +	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
> +	wait_queue_head_t *wq = sk_sleep(sk);
> +
> +	dprintk("svc: socket %p AF_VSOCK data ready (svsk %p)\n",
> +		sk, sk->sk_user_data);
> +	if (svsk) {
> +		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
> +		svc_xprt_enqueue(&svsk->sk_xprt);
> +	}
> +	if (wq && waitqueue_active(wq))
> +		wake_up_interruptible(wq);
> +}
> +
> +static int svc_vsock_has_wspace(struct svc_xprt *xprt)
> +{
> +	struct svc_sock *svsk =	container_of(xprt, struct svc_sock, sk_xprt);
> +
> +	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
> +		return 1;
> +	return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
> +}
> +
> +static struct svc_xprt *svc_vsock_create(struct svc_serv *serv,
> +					 struct net *net,
> +					 struct sockaddr *sa, int salen,
> +					 int flags)
> +{
> +	return svc_create_socket(serv, 0, net, sa, salen, flags);
> +}
> +
> +static struct svc_xprt_ops svc_vsock_ops = {
> +	.xpo_create = svc_vsock_create,
> +	.xpo_recvfrom = svc_tcp_recvfrom,
> +	.xpo_sendto = svc_tcp_sendto,
> +	.xpo_release_rqst = svc_release_skb,
> +	.xpo_detach = svc_common_sock_detach,
> +	.xpo_free = svc_sock_free,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
> +	.xpo_has_wspace = svc_vsock_has_wspace,
> +	.xpo_accept = svc_common_accept,
> +	.xpo_secure_port = svc_sock_secure_port,
> +};
> +
> +static struct svc_xprt_class svc_vsock_class = {
> +	.xcl_name = "vsock",
> +	.xcl_owner = THIS_MODULE,
> +	.xcl_ops = &svc_vsock_ops,
> +	.xcl_max_payload = RPCSVC_MAXPAYLOAD,
> +	.xcl_ident = XPRT_TRANSPORT_VSOCK,
> +};
> +
> +static void svc_vsock_init(struct svc_sock *svsk, struct svc_serv *serv)
> +{
> +	struct sock *sk = svsk->sk_sk;
> +
> +	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_vsock_class,
> +		      &svsk->sk_xprt, serv);
> +	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
> +	set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
> +	if (sk->sk_state == VSOCK_SS_LISTEN) {
> +		dprintk("setting up AF_VSOCK socket for listening\n");
> +		set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
> +		sk->sk_data_ready = svc_vsock_listen_data_ready;
> +		set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
> +	} else {
> +		dprintk("setting up AF_VSOCK socket for reading\n");
> +		sk->sk_state_change = svc_vsock_state_change;
> +		sk->sk_data_ready = svc_vsock_data_ready;
> +		sk->sk_write_space = svc_write_space;
> +
> +		svsk->sk_reclen = 0;
> +		svsk->sk_tcplen = 0;
> +		svsk->sk_datalen = 0;
> +		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
> +
> +		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
> +		if (sk->sk_state != SS_CONNECTED)
> +			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
> +	}
> +}
> +
>  static void svc_bc_vsock_sock_detach(struct svc_xprt *xprt)
>  {
>  }
> @@ -1252,7 +1393,7 @@ static struct svc_xprt_ops svc_vsock_bc_ops = {
>  	.xpo_create = svc_bc_create_socket,
>  	.xpo_detach = svc_bc_vsock_sock_detach,
>  	.xpo_free = svc_bc_sock_free,
> -	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
>  	.xpo_secure_port = svc_sock_secure_port,
>  };
>  
> @@ -1294,11 +1435,11 @@ static struct svc_xprt_ops svc_tcp_ops = {
>  	.xpo_recvfrom = svc_tcp_recvfrom,
>  	.xpo_sendto = svc_tcp_sendto,
>  	.xpo_release_rqst = svc_release_skb,
> -	.xpo_detach = svc_tcp_sock_detach,
> +	.xpo_detach = svc_common_sock_detach,
>  	.xpo_free = svc_sock_free,
> -	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
> +	.xpo_prep_reply_hdr = svc_stream_prep_reply_hdr,
>  	.xpo_has_wspace = svc_tcp_has_wspace,
> -	.xpo_accept = svc_tcp_accept,
> +	.xpo_accept = svc_common_accept,
>  	.xpo_secure_port = svc_sock_secure_port,
>  	.xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
>  };
> @@ -1315,6 +1456,9 @@ void svc_init_xprt_sock(void)
>  {
>  	svc_reg_xprt_class(&svc_tcp_class);
>  	svc_reg_xprt_class(&svc_udp_class);
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	svc_reg_xprt_class(&svc_vsock_class);
> +#endif
>  	svc_init_bc_xprt_sock();
>  }
>  
> @@ -1322,6 +1466,9 @@ void svc_cleanup_xprt_sock(void)
>  {
>  	svc_unreg_xprt_class(&svc_tcp_class);
>  	svc_unreg_xprt_class(&svc_udp_class);
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	svc_unreg_xprt_class(&svc_vsock_class);
> +#endif
>  	svc_cleanup_bc_xprt_sock();
>  }
>  
> @@ -1417,6 +1564,10 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
>  	/* Initialize the socket */
>  	if (sock->type == SOCK_DGRAM)
>  		svc_udp_init(svsk, serv);
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	else if (inet->sk_family == AF_VSOCK)
> +		svc_vsock_init(svsk, serv);
> +#endif
>  	else
>  		svc_tcp_init(svsk, serv);

The above conditional is a bit of a mess and doesn't really handle the
case where the upper layer might pass it something non-sensical (i.e.
SOCK_DGRAM + AF_VSOCK).

I think this should vet both values and return ERR_PTR(-EINVAL) if it's
not right. Maybe a switch statement on the address family and then check
the sock->type in each? We can afford to code a little defensively here.


>  
> @@ -1468,13 +1619,22 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
>  
>  	if (!so)
>  		return err;
> -	err = -EAFNOSUPPORT;
> -	if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
> -		goto out;
> -	err =  -EPROTONOSUPPORT;
> -	if (so->sk->sk_protocol != IPPROTO_TCP &&
> -	    so->sk->sk_protocol != IPPROTO_UDP)
> +	err = -EPROTONOSUPPORT;
> +	switch (so->sk->sk_family) {
> +	case PF_INET:
> +	case PF_INET6:
> +		if (so->sk->sk_protocol != IPPROTO_TCP &&
> +		    so->sk->sk_protocol != IPPROTO_UDP)
> +			goto out;
> +		break;
> +	case PF_VSOCK:
> +		if (so->sk->sk_protocol != 0)
> +			goto out;
> +		break;
> +	default:
> +		err = -EAFNOSUPPORT;
>  		goto out;
> +	}
>  	err = -EISCONN;
>  	if (so->state > SS_UNCONNECTED)
>  		goto out;
> @@ -1521,7 +1681,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
>  			serv->sv_program->pg_name, protocol,
>  			__svc_print_addr(sin, buf, sizeof(buf)));
>  
> -	if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
> +	if (sin->sa_family != AF_VSOCK &&
> +	    protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
>  		printk(KERN_WARNING "svc: only UDP and TCP "
>  				"sockets supported\n");
>  		return ERR_PTR(-EINVAL);
> @@ -1535,6 +1696,11 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
>  	case AF_INET:
>  		family = PF_INET;
>  		break;
> +#ifdef CONFIG_SUNRPC_XPRT_VSOCK
> +	case AF_VSOCK:
> +		family = PF_VSOCK;
> +		break;
> +#endif
>  	default:
>  		return ERR_PTR(-EINVAL);
>  	}
> @@ -1566,7 +1732,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
>  	if (error < 0)
>  		goto bummer;
>  
> -	if (protocol == IPPROTO_TCP) {
> +	if (type == SOCK_STREAM) {
>  		if ((error = kernel_listen(sock, 64)) < 0)
>  			goto bummer;
>  	}
> @@ -1607,11 +1773,11 @@ static void svc_sock_detach(struct svc_xprt *xprt)
>  /*
>   * Disconnect the socket, and reset the callbacks
>   */
> -static void svc_tcp_sock_detach(struct svc_xprt *xprt)
> +static void svc_common_sock_detach(struct svc_xprt *xprt)
>  {
>  	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
>  
> -	dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
> +	dprintk("svc: %s(%p)\n", __func__, svsk);
>  
>  	svc_sock_detach(xprt);
>  

-- 
Jeff Layton <jlayton@redhat.com>

  reply	other threads:[~2017-11-07 14:12 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-30 13:23 [PATCH v3 00/14] NFS: add AF_VSOCK support Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 01/14] SUNRPC: add AF_VSOCK support to addr.[ch] Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 02/14] SUNRPC: rename "TCP" record parser to "stream" parser Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 03/14] SUNRPC: abstract tcp_read_sock() in record fragment parser Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 04/14] SUNRPC: extract xs_stream_reset_state() Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 05/14] VSOCK: add tcp_read_sock()-like vsock_read_sock() function Stefan Hajnoczi
2017-10-31 13:35   ` Jeff Layton
2017-11-07 13:32     ` Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 06/14] SUNRPC: add AF_VSOCK support to xprtsock.c Stefan Hajnoczi
2017-11-07 13:46   ` Jeff Layton
2017-11-14 16:45     ` Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 07/14] SUNRPC: drop unnecessary svc_bc_tcp_create() helper Stefan Hajnoczi
2017-10-31 13:55   ` Jeff Layton
2017-06-30 13:23 ` [PATCH v3 08/14] SUNRPC: add AF_VSOCK support to svc_xprt.c Stefan Hajnoczi
2017-10-31 14:10   ` Jeff Layton
2017-11-07 13:31     ` Stefan Hajnoczi
2017-11-07 14:01       ` Jeff Layton
2017-11-16 15:25         ` Stefan Hajnoczi
2017-11-16 20:53           ` Chuck Lever
2017-11-20 16:31             ` Stefan Hajnoczi
2017-11-26 11:58             ` Jeff Layton
2017-11-26 15:53               ` Chuck Lever
2017-11-27 16:46                 ` Bruce Fields
2017-11-27 17:34                   ` Jeff Layton
2017-11-27 17:37                     ` Matt Benjamin
2017-06-30 13:23 ` [PATCH v3 09/14] SUNRPC: add AF_VSOCK backchannel support Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 10/14] NFS: add AF_VSOCK support to NFS client Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 11/14] nfsd: support vsock xprt creation Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 12/14] SUNRPC: add AF_VSOCK lock class Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 13/14] SUNRPC: vsock svcsock support Stefan Hajnoczi
2017-11-07 14:12   ` Jeff Layton [this message]
2017-11-14 14:20     ` Stefan Hajnoczi
2017-06-30 13:23 ` [PATCH v3 14/14] SUNRPC: add AF_VSOCK support to auth.unix.ip Stefan Hajnoczi
2017-07-06 18:46   ` Abbas Naderi
2017-07-10 18:05     ` Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1510063951.4705.2.camel@redhat.com \
    --to=jlayton@redhat.com \
    --cc=abiusx@google.com \
    --cc=anna.schumaker@netapp.com \
    --cc=bfields@fieldses.org \
    --cc=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=stefanha@redhat.com \
    --cc=trond.myklebust@primarydata.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).