All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Paasch <cpaasch at apple.com>
To: mptcp at lists.01.org
Subject: Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
Date: Tue, 27 Mar 2018 11:36:37 +0200	[thread overview]
Message-ID: <20180327093637.GA71178@MacBook-Pro-6.lan> (raw)
In-Reply-To: 1519343401-19027-2-git-send-email-rao.shoaib@oracle.com

[-- Attachment #1: Type: text/plain, Size: 11087 bytes --]

Hello,

starting a technical review of this set. Please see inline:

On 22/02/18 - 15:49:53, rao.shoaib(a)oracle.com wrote:
> From: Rao Shoaib <rao.shoaib(a)oracle.com>
> 
> Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
> ---
>  include/linux/tcp.h   |  13 +++++
>  include/net/tcp.h     | 129 ++++++++++++++++++++++++++++++++++++++++++++++----
>  net/ipv4/tcp_input.c  |  13 -----
>  net/ipv4/tcp_output.c |  11 -----
>  4 files changed, 133 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 8f4c549..612360b 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -83,6 +83,17 @@ struct tcp_sack_block {
>  	u32	end_seq;
>  };
>  
> +struct tcp_out_options {
> +	u16 options;		/* bit field of OPTION_* */
> +	u16 mss;		/* 0 to disable */
> +	u8 ws;			/* window scale, 0 to disable */
> +	u8 num_sack_blocks;	/* number of SACK blocks to include */
> +	u8 hash_size;		/* bytes in hash_location */
> +	__u8 *hash_location;	/* temporary pointer, overloaded */
> +	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> +	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
> +};
> +
>  /*These are used to set the sack_ok field in struct tcp_options_received */
>  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
> @@ -384,6 +395,8 @@ struct tcp_sock {
>  	 */
>  	struct request_sock *fastopen_rsk;
>  	u32	*saved_syn;
> +	const struct tcp_operational_ops	*op_ops;
> +	const struct tcp_state_ops		*state_ops;
>  };
>  
>  enum tsq_enum {
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 92b06c6..8bc724f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -250,6 +250,17 @@ extern atomic_long_t tcp_memory_allocated;
>  extern struct percpu_counter tcp_sockets_allocated;
>  extern unsigned long tcp_memory_pressure;
>  
> +enum tcp_synack_type {
> +	TCP_SYNACK_NORMAL,
> +	TCP_SYNACK_FASTOPEN,
> +	TCP_SYNACK_COOKIE,
> +};
> +
> +enum tcp_queue {
> +	TCP_FRAG_IN_WRITE_QUEUE,
> +	TCP_FRAG_IN_RTX_QUEUE,
> +};
> +
>  /* optimized version of sk_under_memory_pressure() for TCP sockets */
>  static inline bool tcp_under_memory_pressure(const struct sock *sk)
>  {
> @@ -369,6 +380,18 @@ enum tcp_tw_status {
>  	TCP_TW_SYN = 3
>  };
>  
> +struct tcp_sacktag_state {
> +	u32     reord;
> +	/* Timestamps for earliest and latest never-retransmitted segment
> +	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
> +	 * but congestion control should still get an accurate delay signal.
> +	 */
> +	u64     first_sackt;
> +	u64     last_sackt;
> +	struct rate_sample *rate;
> +	int     flag;
> +	unsigned int mss_now;
> +};
>  
>  enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
>  					      struct sk_buff *skb,
> @@ -427,11 +450,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
>  int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
>  int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
>  int tcp_connect(struct sock *sk);
> -enum tcp_synack_type {
> -	TCP_SYNACK_NORMAL,
> -	TCP_SYNACK_FASTOPEN,
> -	TCP_SYNACK_COOKIE,
> -};
>  struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
>  				struct request_sock *req,
>  				struct tcp_fastopen_cookie *foc,
> @@ -522,10 +540,6 @@ void tcp_xmit_retransmit_queue(struct sock *);
>  void tcp_simple_retransmit(struct sock *);
>  void tcp_enter_recovery(struct sock *sk, bool ece_ack);
>  int tcp_trim_head(struct sock *, struct sk_buff *, u32);
> -enum tcp_queue {
> -	TCP_FRAG_IN_WRITE_QUEUE,
> -	TCP_FRAG_IN_RTX_QUEUE,
> -};
>  int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
>  		 struct sk_buff *skb, u32 len,
>  		 unsigned int mss_now, gfp_t gfp);
> @@ -1822,6 +1836,103 @@ struct tcp_sock_af_ops {
>  #endif
>  };
>  
> +/* TCP operational functions */
> +struct tcp_operational_ops {
> +	u32 (*__select_window)(struct sock *sk);
> +	u16 (*select_window)(struct sock *sk);
> +	void (*select_initial_window)(const struct sock *sk,
> +				      int __space, __u32 mss, __u32 *rcv_wnd,
> +				      __u32 *window_clamp, int wscale_ok,
> +				      __u8 *rcv_wscale, __u32 init_rcv_wnd);
> +	int (*select_size)(const struct sock *sk, bool sg,
> +			   bool first_skb, bool zc);
> +	void (*init_buffer_space)(struct sock *sk);
> +	void (*set_rto)(struct sock *sk);
> +	bool (*should_expand_sndbuf)(const struct sock *sk);
> +	void (*send_fin)(struct sock *sk);
> +	bool (*write_xmit)(struct sock *sk, unsigned int mss_now, int nonagle,
> +			   int push_one, gfp_t gfp);
> +	void (*send_active_reset)(struct sock *sk, gfp_t priority);
> +	int (*write_wakeup)(struct sock *sk, int mib);
> +	bool (*prune_ofo_queue)(struct sock *sk);
> +	void (*retransmit_timer)(struct sock *sk);
> +	void (*time_wait)(struct sock *sk, int state, int timeo);
> +	void (*cleanup_rbuf)(struct sock *sk, int copied);
> +	void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited);
> +	void (*sndbuf_expand)(struct sock *sk);
> +	struct sk_buff *(*shift_skb_data)(struct sock *sk, struct sk_buff *skb,
> +					  struct tcp_sacktag_state *state,
> +					  u32 start_seq, u32 end_seq,
> +					  bool dup_sack);
> +	void (*grow_window)(struct sock *sk, const struct sk_buff *skb);
> +	bool (*check_rtt)(struct sock *sk);
> +	bool (*try_coalesce)(struct sock *sk, struct sk_buff *to,
> +			     struct sk_buff *from,
> +			     bool *fragstolen);
> +	int (*try_rmem_schedule)(struct sock *sk, struct sk_buff *skb,
> +				 unsigned int size);
> +	struct sk_buff *(*collapse_one)(struct sock *sk, struct sk_buff *skb,
> +					struct sk_buff_head *list,
> +					struct rb_root *root);
> +	void (*urg)(struct sock *sk, struct sk_buff *skb,
> +		    const struct tcphdr *th);
> +	int (*trim_head)(struct sock *sk, struct sk_buff *skb, u32 len);
> +	/* MPTCP wants certain skb's that may be dropped, to process options */
> +	void (*ack_only)(struct sock *sk, struct sk_buff *skb);
> +	bool (*can_send)(struct sock *sk);
> +	int (*rx)(struct sock *sk, struct sk_buff *skb, int refcounted);
> +	unsigned int (*synack_options_size)(const struct sock *sk,
> +					    struct request_sock *req,
> +					    unsigned int mss,
> +					    struct sk_buff *skb,
> +					    struct tcp_out_options *opts,
> +					    const struct tcp_md5sig_key *md5,
> +					    struct tcp_fastopen_cookie *foc);
> +	int (*options_size)(struct sock *sk, struct sk_buff *skb,
> +			    struct tcp_out_options *opts,
> +			    struct tcp_md5sig_key **md5);
> +	void (*options_write)(__be32 **ptr_ptr, struct tcp_sock *tp,
> +			      struct tcp_out_options *opts,
> +			      struct sk_buff *skb);
> +	void (*connect_init)(struct sock *sk);
> +	void (*sock_init)(struct sock *sk);
> +	int (*send_mss)(struct sock *sk, int *size_goal, int flags);
> +	int (*mtu_probe)(struct sock *sk);
> +	void (*retrans_try_collapse)(struct sock *sk, struct sk_buff *to,
> +				     int space);
> +	bool (*fastopen_synack)(struct sock *sk, struct sk_buff *synack,
> +				struct tcp_fastopen_cookie *cookie);
> +	int (*sendpage_locked)(struct sock *sk, struct page *page, int offset,
> +			       size_t size, int flags);
> +	struct request_sock *(*cookie_req_alloc)(struct sock *sk,
> +						 struct sk_buff *skb,
> +						 struct tcp_options_received *
> +						 tcp_opts,
> +						 __u32 cookie, int mss);
> +	struct sock *(*get_cookie_sock)(struct sock *sk, struct sk_buff *skb,
> +					struct request_sock *req,
> +					struct dst_entry *dst, u32 tsoff);
> +};

it is important to explain (in the commit-message) for each of these callbacks
why they are needed for an MPTCP implementation and why it can't be done
without them.

This is important to allow people to understand why the change is necessary and
where it is leading to. Ignoring potential performance hits for a moment,
callbacks also increase the code-maintenance overhead and thus each of them
will need to justified.


Christoph


> +
> +/*
> + * Pointers to state specific TCP functions
> + */
> +struct tcp_state_ops {
> +	int (*rcv_state_process)(struct sock *sk, struct sk_buff *skb);
> +	int (*close)(struct sock *sk, struct sk_buff *skb);
> +	int (*synsent)(struct sock *sk, struct sk_buff *skb,
> +		       const struct tcphdr *th);
> +	int (*listen)(struct sock *sk, struct sk_buff *skb);
> +	int (*synrcv)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait1)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait2)(struct sock *sk, struct sk_buff *skb);
> +	int (*established)(struct sock *sk, struct sk_buff *skb);
> +	int (*closewait)(struct sock *sk, struct sk_buff *skb);
> +};
> +
> +extern struct tcp_operational_ops *tcp_default_op_ops;
> +extern struct tcp_state_ops *tcp_default_state_ops;
> +
>  struct tcp_request_sock_ops {
>  	u16 mss_clamp;
>  #ifdef CONFIG_TCP_MD5SIG
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a6b48f6..be81be4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1093,19 +1093,6 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
>  	return dup_sack;
>  }
>  
> -struct tcp_sacktag_state {
> -	u32	reord;
> -	/* Timestamps for earliest and latest never-retransmitted segment
> -	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
> -	 * but congestion control should still get an accurate delay signal.
> -	 */
> -	u64	first_sackt;
> -	u64	last_sackt;
> -	struct rate_sample *rate;
> -	int	flag;
> -	unsigned int mss_now;
> -};
> -
>  /* Check if skb is fully within the SACK block. In presence of GSO skbs,
>   * the incoming SACK may not exactly match but we can find smaller MSS
>   * aligned portion of it that matches. Therefore we might need to fragment
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index e9f985e..777c8b5 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -420,17 +420,6 @@ static void smc_options_write(__be32 *ptr, u16 *options)
>  #endif
>  }
>  
> -struct tcp_out_options {
> -	u16 options;		/* bit field of OPTION_* */
> -	u16 mss;		/* 0 to disable */
> -	u8 ws;			/* window scale, 0 to disable */
> -	u8 num_sack_blocks;	/* number of SACK blocks to include */
> -	u8 hash_size;		/* bytes in hash_location */
> -	__u8 *hash_location;	/* temporary pointer, overloaded */
> -	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> -	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
> -};
> -
>  /* Write previously computed TCP options to the packet.
>   *
>   * Beware: Something in the Internet is very sensitive to the ordering of
> -- 
> 2.7.4
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

             reply	other threads:[~2018-03-27  9:36 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-27  9:36 Christoph Paasch [this message]
  -- strict thread matches above, loose matches on Subject: below --
2018-04-10  5:19 [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers Rao Shoaib
2018-04-09  4:41 Christoph Paasch
2018-03-30 18:37 Rao Shoaib
2018-03-30 17:55 Krystad, Peter
2018-03-30 17:32 Rao Shoaib
2018-02-22 23:49 rao.shoaib

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180327093637.GA71178@MacBook-Pro-6.lan \
    --to=unknown@example.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.