All of lore.kernel.org
 help / color / mirror / Atom feed
From: Krystad, Peter <peter.krystad at intel.com>
To: mptcp at lists.01.org
Subject: Re: [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections
Date: Mon, 08 Oct 2018 20:22:44 +0000	[thread overview]
Message-ID: <1539030163.19533.7.camel@intel.com> (raw)
In-Reply-To: 20181008182252.GX36310@MacBook-Pro-19.local

[-- Attachment #1: Type: text/plain, Size: 16286 bytes --]

On Mon, 2018-10-08 at 11:22 -0700, Christoph Paasch wrote:
> On 05/10/18 - 15:59:10, Mat Martineau wrote:
> > From: Peter Krystad <peter.krystad(a)intel.com>
> > 
> > Add subflow_request_sock type that extends tcp_request_sock
> > and add an is_mptcp flag to tcp_request_sock distinguish them.
> > 
> > Override the listen() and accept() methods of the MPTCP
> > socket proto_ops so they may act on the subflow socket.
> > 
> > Override the conn_request() and syn_recv_sock() handlers
> > in the inet_connection_sock to handle incoming MPTCP
> > SYNs and the ACK to the response SYN.
> > 
> > Add handling in tcp_output.c to add MP_CAPABLE to an outgoing
> > SYN-ACK response for a subflow_request_sock.
> > 
> > Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
> > ---
> >  include/linux/tcp.h   |   1 +
> >  include/net/mptcp.h   |  27 ++++++++++
> >  net/ipv4/tcp_input.c  |   1 +
> >  net/ipv4/tcp_output.c |  14 +++++
> >  net/mptcp/options.c   |  14 +++++
> >  net/mptcp/protocol.c  | 102 ++++++++++++++++++++++++++++++++++---
> >  net/mptcp/subflow.c   | 115 ++++++++++++++++++++++++++++++++++++++++--
> >  7 files changed, 264 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > index 7f0dd688376c..b109798482d3 100644
> > --- a/include/linux/tcp.h
> > +++ b/include/linux/tcp.h
> > @@ -148,6 +148,7 @@ struct tcp_request_sock {
> >  						  * FastOpen it's the seq#
> >  						  * after data-in-SYN.
> >  						  */
> > +	bool				is_mptcp;
> >  };
> >  
> >  static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index 4b08eb4ccc6f..56883a1ee2fe 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -69,11 +69,30 @@ static inline struct subflow_sock *subflow_sk(const struct sock *sk)
> >  	return (struct subflow_sock *)sk;
> >  }
> >  
> > +struct subflow_request_sock {
> > +	struct	tcp_request_sock sk;
> > +	u8	mp_capable : 1,
> > +		mp_join : 1,
> > +		checksum : 1,
> > +		backup : 1,
> > +		version : 4;
> > +	u64	local_key;
> > +	u64	remote_key;
> > +};
> > +
> > +static inline
> > +struct subflow_request_sock *subflow_rsk(const struct request_sock *rsk)
> > +{
> > +	return (struct subflow_request_sock *)rsk;
> > +}
> > +
> >  #ifdef CONFIG_MPTCP
> >  
> >  void mptcp_parse_option(const unsigned char *ptr, int opsize,
> >  			struct tcp_options_received *opt_rx);
> >  unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
> > +unsigned int mptcp_synack_options(struct request_sock *req,
> > +				  u64 *local_key, u64 *remote_key);
> >  
> >  void mptcp_finish_connect(struct sock *sk, int mp_capable);
> >  
> > @@ -96,6 +115,14 @@ static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> >  {
> >  	return 0;
> >  }
> > +static inline unsigned int mptcp_synack_options(struct request_sock *sk,
> > +						u64 *local_key,
> > +						u64 *remote_key)
> > +{
> > +	return 0;
> > +}
> > +
> > +
> >  
> >  #endif /* CONFIG_MPTCP */
> >  #endif /* __NET_MPTCP_H */
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 4cb38904bb5f..9a326729637f 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -6441,6 +6441,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
> >  
> >  	tcp_rsk(req)->af_specific = af_ops;
> >  	tcp_rsk(req)->ts_off = 0;
> > +	tcp_rsk(req)->is_mptcp = 0;
> >  
> >  	tcp_clear_options(&tmp_opt);
> >  	tmp_opt.mss_clamp = af_ops->mss_clamp;
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index 9919793e293b..780abb11dffd 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -767,6 +767,20 @@ static unsigned int tcp_synack_options(const struct sock *sk,
> >  			remaining -= need;
> >  		}
> >  	}
> > +	if (tcp_rsk(req)->is_mptcp) {
> > +		u64 local_key;
> > +		u64 remote_key;
> > +		if (mptcp_synack_options(req, &local_key, &remote_key)) {
> > +			if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
> > +				opts->options |= OPTION_MPTCP;
> > +				opts->suboptions |= OPTION_MPTCP_MPC_SYNACK;
> > +				opts->sndr_key = local_key;
> > +				opts->rcvr_key = remote_key;
> > +				remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
> > +			}
> > +		}
> > +	}
> > +
> >  	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
> >  
> >  	return MAX_TCP_OPTION_SPACE - remaining;
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > index 4b1cbc3b3efe..7e48d1d92aac 100644
> > --- a/net/mptcp/options.c
> > +++ b/net/mptcp/options.c
> > @@ -161,3 +161,17 @@ unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> >  	}
> >  	return subflow->request_mptcp;
> >  }
> > +
> > +unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
> > +				  u64 *remote_key)
> > +{
> > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +
> > +	if (subflow_req->mp_capable) {
> > +		*local_key = subflow_req->local_key;
> > +		*remote_key = subflow_req->remote_key;
> > +		pr_debug("local_key=%llu", *local_key);
> > +		pr_debug("remote_key=%llu", *remote_key);
> > +	}
> > +	return subflow_req->mp_capable;
> > +}
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index 1a3412a742ea..9f802f69a528 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -80,6 +80,45 @@ static void mptcp_close(struct sock *sk, long timeout)
> >  	}
> >  }
> >  
> > +static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
> > +				 bool kern)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	struct socket *listener = msk->subflow;
> > +	struct socket *new_sock;
> > +	struct socket *mp;
> > +	struct subflow_sock *subflow;
> > +
> > +	pr_debug("msk=%p, listener=%p", msk, listener->sk);
> > +	*err = kernel_accept(listener, &new_sock, flags);
> > +	if (*err < 0)
> > +		return NULL;
> > +
> > +	subflow = subflow_sk(new_sock->sk);
> > +	pr_debug("new_sock=%p", subflow);
> > +
> > +	*err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_MPTCP, &mp);
> > +	if (*err < 0) {
> > +		kernel_sock_shutdown(new_sock, SHUT_RDWR);
> > +		sock_release(new_sock);
> > +		return NULL;
> > +	}
> > +
> > +	msk = mptcp_sk(mp->sk);
> > +	pr_debug("msk=%p", msk);
> > +	subflow->conn = mp->sk;
> > +
> > +	if (subflow->mp_capable) {
> > +		msk->remote_key = subflow->remote_key;
> > +		msk->local_key = subflow->local_key;
> > +		msk->connection_list = new_sock;
> > +	} else {
> > +		msk->subflow = new_sock;
> > +	}
> > +
> > +	return mp->sk;
> > +}
> > +
> >  static int mptcp_get_port(struct sock *sk, unsigned short snum)
> >  {
> >  	struct mptcp_sock *msk = mptcp_sk(sk);
> > @@ -129,11 +168,16 @@ static int subflow_create(struct sock *sock)
> >  int mptcp_stream_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
> >  {
> >  	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > -	struct socket *subflow = msk->subflow;
> > +	int err;
> >  
> > -	pr_debug("msk=%p, subflow=%p", msk, subflow->sk);
> > +	pr_debug("msk=%p", msk);
> >  
> > -	return inet_bind(subflow, uaddr, addr_len);
> > +	if (msk->subflow == NULL) {
> > +		err = subflow_create(sock->sk);
> > +		if (err)
> > +			return err;
> > +	}
> > +	return inet_bind(msk->subflow, uaddr, addr_len);
> >  }
> >  
> >  int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> > @@ -153,12 +197,56 @@ int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> >  	return inet_stream_connect(msk->subflow, uaddr, addr_len, flags);
> >  }
> >  
> > +int mptcp_stream_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > +	struct socket *subflow;
> > +	int err = -EPERM;
> > +
> > +	if (msk->connection_list)
> > +		subflow = msk->connection_list;
> > +	else
> > +		subflow = msk->subflow;
> > +
> > +	err = inet_getname(subflow, uaddr, peer);
> > +
> > +	return err;
> > +}
> > +
> > +int mptcp_stream_listen(struct socket *sock, int backlog)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > +	int err;
> > +
> > +	pr_debug("msk=%p", msk);
> > +
> > +	if (msk->subflow == NULL) {
> > +		err = subflow_create(sock->sk);
> > +		if (err)
> > +			return err;
> > +	}
> > +	return inet_listen(msk->subflow, backlog);
> > +}
> > +
> > +int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags,
> > +			bool kern)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sock->sk);
> > +
> > +	pr_debug("msk=%p", msk);
> > +
> > +	if (msk->subflow == NULL) {
> > +		return -EINVAL;
> > +	}
> > +	return inet_accept(sock, newsock, flags, kern);
> > +}
> > +
> >  static struct proto mptcp_prot = {
> >  	.name		= "MPTCP",
> >  	.owner		= THIS_MODULE,
> >  	.init		= mptcp_init_sock,
> >  	.close		= mptcp_close,
> > -	.accept		= inet_csk_accept,
> > +	.accept		= mptcp_accept,
> >  	.shutdown	= tcp_shutdown,
> >  	.sendmsg	= mptcp_sendmsg,
> >  	.recvmsg	= mptcp_recvmsg,
> > @@ -176,11 +264,11 @@ const struct proto_ops mptcp_stream_ops = {
> >  	.bind		   = mptcp_stream_bind,
> >  	.connect	   = mptcp_stream_connect,
> >  	.socketpair	   = sock_no_socketpair,
> > -	.accept		   = inet_accept,
> > -	.getname	   = inet_getname,
> > +	.accept		   = mptcp_stream_accept,
> > +	.getname	   = mptcp_stream_getname,
> >  	.poll		   = tcp_poll,
> >  	.ioctl		   = inet_ioctl,
> > -	.listen		   = inet_listen,
> > +	.listen		   = mptcp_stream_listen,
> >  	.shutdown	   = inet_shutdown,
> >  	.setsockopt	   = sock_common_setsockopt,
> >  	.getsockopt	   = sock_common_getsockopt,
> > diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> > index 5e5fdcb3175f..89fcc3b746eb 100644
> > --- a/net/mptcp/subflow.c
> > +++ b/net/mptcp/subflow.c
> > @@ -53,6 +53,40 @@ static int subflow_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> >  	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
> >  }
> >  
> > +static void subflow_v4_init_req(struct request_sock *req,
> > +				const struct sock *sk_listener,
> > +				struct sk_buff *skb)
> > +{
> > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +	struct subflow_sock *listener = subflow_sk(sk_listener);
> > +	struct tcp_options_received rx_opt;
> > +
> > +	tcp_rsk(req)->is_mptcp = 1;
> > +	pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
> > +
> > +	tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb);
> > +
> > +	rx_opt.mptcp.flags = 0;
> > +	rx_opt.mptcp.mp_capable = 0;
> > +	rx_opt.mptcp.mp_join = 0;
> > +	rx_opt.mptcp.dss = 0;
> > +	mptcp_get_options(skb, &rx_opt);
> > +
> > +	if (rx_opt.mptcp.mp_capable && listener->request_mptcp) {
> > +		subflow_req->mp_capable = 1;
> > +		if (rx_opt.mptcp.version >= listener->version)
> > +			subflow_req->version = listener->version;
> > +		else
> > +			subflow_req->version = rx_opt.mptcp.version;
> > +		if ((rx_opt.mptcp.flags & MPTCP_CAP_CHECKSUM_REQD) ||
> > +		    listener->checksum)
> > +			subflow_req->checksum = 1;
> > +		subflow_req->remote_key = rx_opt.mptcp.sndr_key;
> > +	} else {
> > +		subflow_req->mp_capable = 0;
> > +	}
> > +}
> > +
> >  static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> >  {
> >  	struct subflow_sock *subflow = subflow_sk(sk);
> > @@ -68,13 +102,66 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
> >  	}
> >  }
> >  
> > +static struct request_sock_ops subflow_request_sock_ops;
> > +static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
> > +
> > +static int subflow_conn_request(struct sock *sk, struct sk_buff *skb)
> > +{
> > +	struct subflow_sock *subflow = subflow_sk(sk);
> > +
> > +	pr_debug("subflow=%p", subflow);
> > +
> > +	/* Never answer to SYNs sent to broadcast or multicast */
> > +	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
> > +		goto drop;
> > +
> > +	return tcp_conn_request(&subflow_request_sock_ops,
> > +				&subflow_request_sock_ipv4_ops,
> > +				sk, skb);
> > +drop:
> > +	tcp_listendrop(sk);
> > +	return 0;
> > +}
> > +
> > +static struct sock *subflow_syn_recv_sock(const struct sock *sk,
> > +					  struct sk_buff *skb,
> > +					  struct request_sock *req,
> > +					  struct dst_entry *dst,
> > +					  struct request_sock *req_unhash,
> > +					  bool *own_req)
> > +{
> > +	struct subflow_sock *listener = subflow_sk(sk);
> > +	struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +	struct sock *child;
> > +
> > +	pr_debug("listener=%p, req=%p, conn=%p", sk, req, listener->conn);
> > +
> > +	child = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req);
> > +
> > +	if (child) {
> > +		struct subflow_sock *subflow = subflow_sk(child);
> > +
> > +		pr_debug("child=%p", child);
> > +		if (subflow_req->mp_capable) {
> > +			subflow->mp_capable = 1;
> > +			subflow->fourth_ack = 1;
> 
> Where is the fourth ack being triggered?
> 
For incoming connections this flag is used to prevent sending
MP_CAPABLE in any ACKs (see tcp_established_options). The field name is
a legacy choice. There is not currently a mechanism to send a duplicate
ack with first DSS option after the three-way handshake completes.

Peter.

> 
> Christoph
> 
> > +			subflow->remote_key = subflow_req->remote_key;
> > +			subflow->local_key = subflow_req->local_key;
> > +		} else {
> > +			subflow->mp_capable = 0;
> > +		}
> > +	}
> > +
> > +	return child;
> > +}
> > +
> >  const struct inet_connection_sock_af_ops subflow_specific = {
> >  	.queue_xmit	   = ip_queue_xmit,
> >  	.send_check	   = tcp_v4_send_check,
> >  	.rebuild_header	   = inet_sk_rebuild_header,
> >  	.sk_rx_dst_set	   = subflow_finish_connect,
> > -	.conn_request	   = tcp_v4_conn_request,
> > -	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
> > +	.conn_request	   = subflow_conn_request,
> > +	.syn_recv_sock	   = subflow_syn_recv_sock,
> >  	.net_header_len	   = sizeof(struct iphdr),
> >  	.setsockopt	   = ip_setsockopt,
> >  	.getsockopt	   = ip_getsockopt,
> > @@ -112,6 +199,21 @@ static void subflow_close(struct sock *sk, long timeout)
> >  	tcp_close(sk, timeout);
> >  }
> >  
> > +static struct sock *subflow_accept(struct sock *sk, int flags, int *err,
> > +				   bool kern)
> > +{
> > +	struct subflow_sock *subflow = subflow_sk(sk);
> > +	struct sock *child;
> > +
> > +	pr_debug("subflow=%p, conn=%p", subflow, subflow->conn);
> > +
> > +	child = inet_csk_accept(sk, flags, err, kern);
> > +
> > +	pr_debug("child=%p", child);
> > +
> > +	return child;
> > +}
> > +
> >  static void subflow_destroy(struct sock *sk)
> >  {
> >  	pr_debug("subflow=%p", sk);
> > @@ -125,7 +227,7 @@ static struct proto subflow_prot = {
> >  	.close		= subflow_close,
> >  	.connect	= subflow_connect,
> >  	.disconnect	= tcp_disconnect,
> > -	.accept		= inet_csk_accept,
> > +	.accept		= subflow_accept,
> >  	.ioctl		= tcp_ioctl,
> >  	.init		= subflow_init_sock,
> >  	.destroy	= subflow_destroy,
> > @@ -169,7 +271,14 @@ int mptcp_subflow_init(void)
> >  
> >  	/* TODO: Register path manager callbacks. */
> >  
> > +	subflow_request_sock_ops = tcp_request_sock_ops;
> > +	subflow_request_sock_ops.obj_size = sizeof(struct subflow_request_sock),
> > +
> > +	subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops;
> > +	subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req;
> > +
> >  	subflow_prot.twsk_prot		= tcp_prot.twsk_prot;
> > +	subflow_prot.rsk_prot		= &subflow_request_sock_ops;
> >  	subflow_prot.h.hashinfo		= tcp_prot.h.hashinfo;
> >  	err = proto_register(&subflow_prot, 1);
> >  	if (err)
> > -- 
> > 2.19.1
> > 
> > _______________________________________________
> > mptcp mailing list
> > mptcp(a)lists.01.org
> > https://lists.01.org/mailman/listinfo/mptcp
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

             reply	other threads:[~2018-10-08 20:22 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-08 20:22 Krystad, Peter [this message]
  -- strict thread matches above, loose matches on Subject: below --
2018-10-08 21:38 [MPTCP] [RFC PATCH v3 08/16] mptcp: Create SUBFLOW socket for incoming connections Krystad, Peter
2018-10-08 20:31 cpaasch
2018-10-08 18:22 Christoph Paasch
2018-10-05 22:59 Mat Martineau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1539030163.19533.7.camel@intel.com \
    --to=unknown@example.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.