From: Krystad, Peter <peter.krystad at intel.com>
To: mptcp at lists.01.org
Subject: Re: [MPTCP] [RFC PATCH v2 04/19] mptcp: Handle MPTCP TCP options
Date: Wed, 13 Jun 2018 00:29:33 +0000 [thread overview]
Message-ID: <1528849771.16607.22.camel@intel.com> (raw)
In-Reply-To: 20180607185343.GK93701@MacBook-Pro-19.local
[-- Attachment #1: Type: text/plain, Size: 17394 bytes --]
Hi Christoph -
On Thu, 2018-06-07 at 11:53 -0700, Christoph Paasch wrote:
> On 05/06/18 - 16:40:23, Mat Martineau wrote:
> > From: Peter Krystad <peter.krystad(a)intel.com>
> >
> > Signed-off-by: Peter Krystad <peter.krystad(a)intel.com>
> > ---
> > include/linux/tcp.h | 14 ++++
> > include/net/mptcp.h | 49 ++++++++++++
> > include/net/tcp.h | 2 +
> > net/ipv4/tcp_input.c | 15 ++++
> > net/ipv4/tcp_output.c | 72 +++++++++++++++++
> > net/mptcp/Makefile | 2 +-
> > net/mptcp/options.c | 176 ++++++++++++++++++++++++++++++++++++++++++
> > 7 files changed, 329 insertions(+), 1 deletion(-)
> > create mode 100644 net/mptcp/options.c
> >
> > diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> > index 72705eaf4b84..17ecbebf129d 100644
> > --- a/include/linux/tcp.h
> > +++ b/include/linux/tcp.h
> > @@ -104,6 +104,16 @@ struct tcp_options_received {
> > u8 num_sacks; /* Number of SACK blocks */
> > u16 user_mss; /* mss requested by user in ioctl */
> > u16 mss_clamp; /* Maximal mss, negotiated at connection setup */
> > + struct {
> > + u8 mp_capable : 1,
> > + mp_join : 1,
> > + dss : 1,
> > + version : 4;
> > + u8 flags;
> > + u64 sndr_key;
> > + u64 rcvr_key;
> > + } mptcp;
>
> Indentation looks wrong here.
>
> Nit-picking ;-) (because I have a bigger comment further down below)
>
> > +
> > };
> >
> > static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
> > @@ -113,6 +123,7 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
> > #if IS_ENABLED(CONFIG_SMC)
> > rx_opt->smc_ok = 0;
> > #endif
> > + rx_opt->mptcp.mp_capable = rx_opt->mptcp.mp_join = rx_opt->mptcp.dss = 0;
> > }
> >
> > /* This is the max number of SACKS that we'll generate and process. It's safe
> > @@ -137,6 +148,7 @@ struct tcp_request_sock {
> > * FastOpen it's the seq#
> > * after data-in-SYN.
> > */
> > + bool is_mptcp;
> > };
> >
> > static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
> > @@ -387,6 +399,8 @@ struct tcp_sock {
> > */
> > struct request_sock *fastopen_rsk;
> > u32 *saved_syn;
> > +
> > + bool is_mptcp;
> > };
> >
> > enum tsq_enum {
> > diff --git a/include/net/mptcp.h b/include/net/mptcp.h
> > index 7f7b18b000fe..736febd2a9d4 100644
> > --- a/include/net/mptcp.h
> > +++ b/include/net/mptcp.h
> > @@ -18,6 +18,23 @@
> >
> > #include <linux/tcp.h>
> >
> > +/* MPTCP option subtypes */
> > +
> > +#define MPTCPOPT_MP_CAPABLE 0
> > +#define MPTCPOPT_MP_JOIN 1
> > +#define MPTCPOPT_DSS 2
> > +#define MPTCPOPT_ADD_ADDR 3
> > +#define MPTCPOPT_REMOVE_ADDR 4
> > +#define MPTCPOPT_MP_PRIO 5
> > +#define MPTCPOPT_MP_FAIL 6
> > +#define MPTCPOPT_MP_FASTCLOSE 7
> > +
> > +/* MPTCP handshake flags */
> > +
> > +#define MPTCP_CAP_CHECKSUM_REQD (1 << 7)
> > +#define MPTCP_CAP_EXTENSIBILITY (1 << 6)
> > +#define MPTCP_CAP_HMAC_SHA1 (1 << 0)
> > +
> > /* MPTCP connection sock */
> > struct mptcp_sock {
> > /* inet_connection_sock must be the first member */
> > @@ -30,4 +47,36 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
> > return (struct mptcp_sock *)sk;
> > }
> >
> > +#ifdef CONFIG_MPTCP
> > +
> > +void mptcp_parse_option(const unsigned char *ptr, int opsize,
> > + struct tcp_options_received *opt_rx);
> > +unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key);
> > +unsigned int mptcp_synack_options(struct request_sock *req,
> > + u64 *local_key, u64 *remote_key);
> > +
> > +void mptcp_get_options(const struct sk_buff *skb,
> > + struct tcp_options_received *options);
> > +
> > +#else
> > +
> > +static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
> > + struct tcp_options_received *opt_rx)
> > +{
> > +}
> > +
> > +static inline unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> > +{
> > + return 0;
> > +}
> > +static inline unsigned int mptcp_synack_options(struct request_sock *sk,
> > + u64 *local_key,
> > + u64 *remote_key)
> > +{
> > + return 0;
> > +}
> > +
> > +
> > +
> > +#endif /* CONFIG_MPTCP */
> > #endif /* __NET_MPTCP_H */
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index c8716de4547c..a1d4e744b258 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -214,6 +214,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
> > #define TCPOLEN_MD5SIG_ALIGNED 20
> > #define TCPOLEN_MSS_ALIGNED 4
> > #define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
> > +#define TCPOLEN_MPTCP_MPC_SYN 12
> > +#define TCPOLEN_MPTCP_MPC_SYNACK 20
> >
> > /* Flags in tp->nonagle */
> > #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
> > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> > index 355d3dffd021..bdc36001ce90 100644
> > --- a/net/ipv4/tcp_input.c
> > +++ b/net/ipv4/tcp_input.c
> > @@ -78,6 +78,7 @@
> > #include <linux/errqueue.h>
> > #include <trace/events/tcp.h>
> > #include <linux/static_key.h>
> > +#include <net/mptcp.h>
> >
> > int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
> >
> > @@ -3842,6 +3843,10 @@ void tcp_parse_options(const struct net *net,
> > */
> > break;
> > #endif
> > + case TCPOPT_MPTCP:
> > + mptcp_parse_option(ptr, opsize, opt_rx);
> > + break;
> > +
> > case TCPOPT_FASTOPEN:
> > tcp_parse_fastopen_option(
> > opsize - TCPOLEN_FASTOPEN_BASE,
> > @@ -5763,6 +5768,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
> > tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
> > tcp_initialize_rcv_mss(sk);
> >
> > + // @@ could use a callout here
> > + if (tp->is_mptcp) {
> > + struct subflow_sock *subflow = subflow_sk(sk);
> > + if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) {
> > + subflow->mp_capable = 1;
> > + subflow->remote_key = tp->rx_opt.mptcp.sndr_key;
> > + }
> > + }
> > +
> > /* Remember, tcp_poll() does not lock socket!
> > * Change state from SYN-SENT only after copied_seq
> > * is initialized. */
> > @@ -6339,6 +6353,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
> >
> > tcp_rsk(req)->af_specific = af_ops;
> > tcp_rsk(req)->ts_off = 0;
> > + tcp_rsk(req)->is_mptcp = 0;
> >
> > tcp_clear_options(&tmp_opt);
> > tmp_opt.mss_clamp = af_ops->mss_clamp;
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index 8e08b409c71e..fc49d80d3ba7 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -37,6 +37,7 @@
> > #define pr_fmt(fmt) "TCP: " fmt
> >
> > #include <net/tcp.h>
> > +#include <net/mptcp.h>
> >
> > #include <linux/compiler.h>
> > #include <linux/gfp.h>
> > @@ -411,6 +412,8 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
> > #define OPTION_WSCALE (1 << 3)
> > #define OPTION_FAST_OPEN_COOKIE (1 << 8)
> > #define OPTION_SMC (1 << 9)
> > +#define OPTION_MPTCP (1 << 10)
> > +#define OPTION_MPTCP_ACK (1 << 11)
> >
> > static void smc_options_write(__be32 *ptr, u16 *options)
> > {
> > @@ -436,6 +439,8 @@ struct tcp_out_options {
> > __u8 *hash_location; /* temporary pointer, overloaded */
> > __u32 tsval, tsecr; /* need to include OPTION_TS */
> > struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
> > + u64 sndr_key;
> > + u64 rcvr_key;
> > };
> >
> > /* Write previously computed TCP options to the packet.
> > @@ -546,6 +551,29 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
> > }
> >
> > smc_options_write(ptr, &options);
> > +
> > + if (OPTION_MPTCP & options) {
> > + u8 *p = (u8 *)ptr;
> > + *p++ = 0x1e; // TCP option: Multipath TCP
> > + *p++ = 12; // length
> > + *p++ = 0; // subtype=MP_CAPABLE | version=0
> > + *p++ = 0x1; // flags=HMAC-SHA1
> > + memcpy(p, &opts->sndr_key, 8);
> > + pr_debug("tcp_options_write: opts->sndr_key=%llu", opts->sndr_key);
> > + }
> > +
> > + if (OPTION_MPTCP_ACK & options) {
> > + u8 *p = (u8 *)ptr;
> > + *p++ = 0x1e; // TCP option: Multipath TCP
> > + *p++ = 20; // length
> > + *p++ = 0; // subtype=MP_CAPABLE | version=0
> > + *p++ = 0x1; // flags=HMAC-SHA1
> > + memcpy(p, &opts->sndr_key, 8);
> > + pr_debug("tcp_options_write: opts->sndr_key=%llu", opts->sndr_key);
> > + p += 8;
> > + memcpy(p, &opts->rcvr_key, 8);
> > + pr_debug("tcp_options_write: opts->rcvr_key=%llu", opts->rcvr_key);
> > + }
> > }
> >
> > static void smc_set_option(const struct tcp_sock *tp,
> > @@ -649,6 +677,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
> >
> > smc_set_option(tp, opts, &remaining);
> >
> > + if (tp->is_mptcp) {
> > + u64 local_key;
> > + if (mptcp_syn_options(sk, &local_key)) {
> > + opts->options |= OPTION_MPTCP;
> > + opts->sndr_key = local_key;
> > + remaining -= TCPOLEN_MPTCP_MPC_SYN;
> > + }
> > + }
> > +
> > return MAX_TCP_OPTION_SPACE - remaining;
> > }
> >
> > @@ -709,6 +746,18 @@ static unsigned int tcp_synack_options(const struct sock *sk,
> > remaining -= need;
> > }
> > }
> > + if (tcp_rsk(req)->is_mptcp) {
> > + u64 local_key;
> > + u64 remote_key;
> > + if (mptcp_synack_options(req, &local_key, &remote_key)) {
> > + if (remaining >= TCPOLEN_MPTCP_MPC_SYNACK) {
> > + opts->options |= OPTION_MPTCP_ACK;
> > + opts->sndr_key = local_key;
> > + opts->rcvr_key = remote_key;
> > + remaining -= TCPOLEN_MPTCP_MPC_SYNACK;
> > + }
> > + }
> > + }
> >
> > smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
> >
> > @@ -757,6 +806,29 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
> > opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
> > }
> >
> > + if (tp->is_mptcp) {
> > + struct subflow_sock *subflow = subflow_sk(sk);
> > + pr_debug("tcp_established_options: subflow=%p", subflow);
> > + if (subflow->mp_capable) {
> > + const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
> > + pr_debug("tcp_established_options: remaining=%d", remaining);
> > + if (!subflow->fourth_ack) {
> > + pr_debug("tcp_established_options: OPTION_MPTCP_ACK");
> > + if (remaining >= 20) {
> > + opts->options |= OPTION_MPTCP_ACK;
> > + opts->sndr_key = subflow->local_key;
> > + opts->rcvr_key = subflow->remote_key;
> > + size += 20;
> > + }
> > + subflow->fourth_ack = 1;
> > + // @@ also this is where first DSS goes in?
> > + }
> > + else {
> > + pr_debug("tcp_established_options: OPTION_MPTCP_DSS");
> > + // @@ send DSS based on remaining
> > + }
> > + }
> > + }
>
> You would need to have this code before the SACK-code. Because otherwise
> SACK might fill up the available space and you will end up not writing the
> DSS-option.
>
> This also implies that you need to check whether at all there is space for a
> SACK-block.
>
This is addressed in patch 16 of this set. I'll clean up the indenting.
Peter.
> Christoph
>
> > return size;
> > }
> >
> > diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
> > index 5624e7d51d48..2bd18e3b9fda 100644
> > --- a/net/mptcp/Makefile
> > +++ b/net/mptcp/Makefile
> > @@ -1,3 +1,3 @@
> > obj-$(CONFIG_MPTCP) += mptcp.o
> >
> > -mptcp-y := protocol.o
> > +mptcp-y := protocol.o options.o
> > diff --git a/net/mptcp/options.c b/net/mptcp/options.c
> > new file mode 100644
> > index 000000000000..2ef7ba20b33b
> > --- /dev/null
> > +++ b/net/mptcp/options.c
> > @@ -0,0 +1,176 @@
> > +/*
> > + * Multipath TCP
> > + *
> > + * Copyright (c) 2017, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU General Public License,
> > + * version 2, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> > + * more details.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <net/tcp.h>
> > +#include <net/mptcp.h>
> > +
> > +void mptcp_parse_option(const unsigned char *ptr, int opsize,
> > + struct tcp_options_received *opt_rx)
> > +{
> > + u8 subtype;
> > + u64 *p;
> > +
> > + opsize -= 2;
> > + subtype = *ptr++;
> > +
> > + /* MPTCPOPT_MP_CAPABLE
> > + * 0: 4MSB=subtype, 4LSB=version
> > + * 1: Handshake flags
> > + * 2-9: Sender key
> > + * 10-17: Receiver key (optional)
> > + */
> > + switch (subtype & 0xF0) {
> > + case 0x00:
> > + pr_debug("MP_CAPABLE");
> > + pr_debug("flags=%02x", *ptr);
> > + opt_rx->mptcp.mp_capable = 1;
> > + opt_rx->mptcp.version = subtype & 0x0F;
> > + opt_rx->mptcp.flags = *ptr++;
> > + p = (u64 *)ptr;
> > + pr_debug("sndr_key=%llu", *p);
> > + opt_rx->mptcp.sndr_key = *p++;
> > + if (opsize > 10) {
> > + pr_debug("rcvr_key=%llu", *p);
> > + opt_rx->mptcp.rcvr_key = *p++;
> > + }
> > + break;
> > +
> > + /* MPTCPOPT_MP_JOIN
> > + *
> > + * Initial SYN
> > + * 0: 4MSB=subtype, 000, 1LSB=Backup
> > + * 1: Address ID
> > + * 2-5: Receiver token
> > + * 6-9: Sender random number
> > + *
> > + * SYN/ACK response
> > + * 0: 4MSB=subtype, 000, 1LSB=Backup
> > + * 1: Address ID
> > + * 2-9: Sender truncated HMAC
> > + * 10-13: Sender random number
> > + *
> > + * Third ACK
> > + * 0: 4MSB=subtype, 0000
> > + * 1: 0 (Reserved)
> > + * 2-21: Sender HMAC
> > + */
> > +
> > + /* MPTCPOPT_DSS
> > + * 0: 4MSB=subtype, 0000
> > + * 1: 3MSB=0, F=Data FIN, m=DSN length, M=has DSN/SSN/DLL/checksum,
> > + * a=DACK length, A=has DACK
> > + * 0, 4, or 8 bytes of DACK (depending on A/a)
> > + * 0, 4, or 8 bytes of DSN (depending on M/m)
> > + * 0 or 4 bytes of SSN (depending on M)
> > + * 0 or 2 bytes of DLL (depending on M)
> > + * 0 or 2 bytes of checksum (depending on M)
> > + */
> > + case 0x20:
> > + pr_debug("DSS");
> > + opt_rx->mptcp.dss = 1;
> > + break;
> > +
> > + /* MPTCPOPT_ADD_ADDR
> > + * 0: 4MSB=subtype, 4LSB=IP version (4 or 6)
> > + * 1: Address ID
> > + * 4 or 16 bytes of address (depending on ip version)
> > + * 0 or 2 bytes of port (depending on length)
> > + */
> > +
> > + /* MPTCPOPT_REMOVE_ADDR
> > + * 0: 4MSB=subtype, 0000
> > + * 1: Address ID
> > + * Additional bytes: More address IDs (depending on length)
> > + */
> > +
> > + /* MPTCPOPT_MP_PRIO
> > + * 0: 4MSB=subtype, 000, 1LSB=Backup
> > + * 1: Address ID (optional, current addr implied if not present)
> > + */
> > +
> > + /* MPTCPOPT_MP_FAIL
> > + * 0: 4MSB=subtype, 0000
> > + * 1: 0 (Reserved)
> > + * 2-9: DSN
> > + */
> > +
> > + /* MPTCPOPT_MP_FASTCLOSE
> > + * 0: 4MSB=subtype, 0000
> > + * 1: 0 (Reserved)
> > + * 2-9: Receiver key
> > + */
> > + default:
> > + break;
> > + }
> > +}
> > +
> > +void mptcp_get_options(const struct sk_buff *skb,
> > + struct tcp_options_received *opt_rx)
> > +{
> > + const unsigned char *ptr;
> > + const struct tcphdr *th = tcp_hdr(skb);
> > + int length = (th->doff * 4) - sizeof(struct tcphdr);
> > +
> > + ptr = (const unsigned char *)(th + 1);
> > +
> > + while (length > 0) {
> > + int opcode = *ptr++;
> > + int opsize;
> > +
> > + switch (opcode) {
> > + case TCPOPT_EOL:
> > + return;
> > + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
> > + length--;
> > + continue;
> > + default:
> > + opsize = *ptr++;
> > + if (opsize < 2) /* "silly options" */
> > + return;
> > + if (opsize > length)
> > + return; /* don't parse partial options */
> > + if (opcode == TCPOPT_MPTCP)
> > + mptcp_parse_option(ptr, opsize, opt_rx);
> > + ptr += opsize - 2;
> > + length -= opsize;
> > + }
> > + }
> > +}
> > +
> > +unsigned int mptcp_syn_options(struct sock *sk, u64 *local_key)
> > +{
> > + struct subflow_sock *subflow = subflow_sk(sk);
> > +
> > + if (subflow->request_mptcp) {
> > + pr_debug("local_key=%llu", subflow->local_key);
> > + *local_key = subflow->local_key;
> > + }
> > + return subflow->request_mptcp;
> > +}
> > +
> > +unsigned int mptcp_synack_options(struct request_sock *req, u64 *local_key,
> > + u64 *remote_key)
> > +{
> > + struct subflow_request_sock *subflow_req = subflow_rsk(req);
> > +
> > + if (subflow_req->mp_capable) {
> > + *local_key = subflow_req->local_key;
> > + *remote_key = subflow_req->remote_key;
> > + pr_debug("local_key=%llu", *local_key);
> > + pr_debug("remote_key=%llu", *remote_key);
> > + }
> > + return subflow_req->mp_capable;
> > +}
> > --
> > 2.17.1
> >
> > _______________________________________________
> > mptcp mailing list
> > mptcp(a)lists.01.org
> > https://lists.01.org/mailman/listinfo/mptcp
>
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp
next reply other threads:[~2018-06-13 0:29 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-13 0:29 Krystad, Peter [this message]
-- strict thread matches above, loose matches on Subject: below --
2018-06-07 18:53 [MPTCP] [RFC PATCH v2 04/19] mptcp: Handle MPTCP TCP options Christoph Paasch
2018-06-05 23:40 Mat Martineau
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1528849771.16607.22.camel@intel.com \
--to=unknown@example.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.