All of lore.kernel.org
 help / color / mirror / Atom feed
From: Krystad, Peter <peter.krystad at intel.com>
To: mptcp at lists.01.org
Subject: Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
Date: Fri, 30 Mar 2018 17:55:22 +0000	[thread overview]
Message-ID: <1522432520.16359.88.camel@intel.com> (raw)
In-Reply-To: 1519343401-19027-2-git-send-email-rao.shoaib@oracle.com

[-- Attachment #1: Type: text/plain, Size: 10979 bytes --]


Hi - 

Also beginning a review, see inline....

On Thu, 2018-02-22 at 15:49 -0800, rao.shoaib(a)oracle.com wrote:
> From: Rao Shoaib <rao.shoaib(a)oracle.com>
> 
> Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
> ---
>  include/linux/tcp.h   |  13 +++++
>  include/net/tcp.h     | 129
> ++++++++++++++++++++++++++++++++++++++++++++++----
>  net/ipv4/tcp_input.c  |  13 -----
>  net/ipv4/tcp_output.c |  11 -----
>  4 files changed, 133 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 8f4c549..612360b 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -83,6 +83,17 @@ struct tcp_sack_block {
>  	u32	end_seq;
>  };
>  
> +struct tcp_out_options {
> +	u16 options;		/* bit field of OPTION_* */
> +	u16 mss;		/* 0 to disable */
> +	u8 ws;			/* window scale, 0 to disable
> */
> +	u8 num_sack_blocks;	/* number of SACK blocks to
> include */
> +	u8 hash_size;		/* bytes in hash_location */
> +	__u8 *hash_location;	/* temporary pointer, overloaded
> */
> +	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> +	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast
> open cookie */
> +};
> +
>  /*These are used to set the sack_ok field in struct
> tcp_options_received */
>  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from
> peer*/
> @@ -384,6 +395,8 @@ struct tcp_sock {
>  	 */
>  	struct request_sock *fastopen_rsk;
>  	u32	*saved_syn;
> +	const struct tcp_operational_ops	*op_ops;
> +	const struct tcp_state_ops		*state_ops;
>  };
>  
>  enum tsq_enum {
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 92b06c6..8bc724f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -250,6 +250,17 @@ extern atomic_long_t tcp_memory_allocated;
>  extern struct percpu_counter tcp_sockets_allocated;
>  extern unsigned long tcp_memory_pressure;
>  
> +enum tcp_synack_type {
> +	TCP_SYNACK_NORMAL,
> +	TCP_SYNACK_FASTOPEN,
> +	TCP_SYNACK_COOKIE,
> +};
> +
> +enum tcp_queue {
> +	TCP_FRAG_IN_WRITE_QUEUE,
> +	TCP_FRAG_IN_RTX_QUEUE,
> +};
> +
>  /* optimized version of sk_under_memory_pressure() for TCP sockets
> */
>  static inline bool tcp_under_memory_pressure(const struct sock *sk)
>  {
> @@ -369,6 +380,18 @@ enum tcp_tw_status {
>  	TCP_TW_SYN = 3
>  };
>  
> +struct tcp_sacktag_state {
> +	u32     reord;
> +	/* Timestamps for earliest and latest never-retransmitted
> segment
> +	 * that was SACKed. RTO needs the earliest RTT to stay
> conservative,
> +	 * but congestion control should still get an accurate delay
> signal.
> +	 */
> +	u64     first_sackt;
> +	u64     last_sackt;
> +	struct rate_sample *rate;
> +	int     flag;
> +	unsigned int mss_now;
> +};
>  
>  enum tcp_tw_status tcp_timewait_state_process(struct
> inet_timewait_sock *tw,
>  					      struct sk_buff *skb,
> @@ -427,11 +450,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct
> sock *sk, struct sk_buff *skb,
>  int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
>  int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int
> addr_len);
>  int tcp_connect(struct sock *sk);
> -enum tcp_synack_type {
> -	TCP_SYNACK_NORMAL,
> -	TCP_SYNACK_FASTOPEN,
> -	TCP_SYNACK_COOKIE,
> -};
>  struct sk_buff *tcp_make_synack(const struct sock *sk, struct
> dst_entry *dst,
>  				struct request_sock *req,
>  				struct tcp_fastopen_cookie *foc,
> @@ -522,10 +540,6 @@ void tcp_xmit_retransmit_queue(struct sock *);
>  void tcp_simple_retransmit(struct sock *);
>  void tcp_enter_recovery(struct sock *sk, bool ece_ack);
>  int tcp_trim_head(struct sock *, struct sk_buff *, u32);
> -enum tcp_queue {
> -	TCP_FRAG_IN_WRITE_QUEUE,
> -	TCP_FRAG_IN_RTX_QUEUE,
> -};
>  int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
>  		 struct sk_buff *skb, u32 len,
>  		 unsigned int mss_now, gfp_t gfp);
> @@ -1822,6 +1836,103 @@ struct tcp_sock_af_ops {
>  #endif
>  };
>  
> +/* TCP operational functions */
> +struct tcp_operational_ops {
> +	u32 (*__select_window)(struct sock *sk);
> +	u16 (*select_window)(struct sock *sk);
> +	void (*select_initial_window)(const struct sock *sk,
> +				      int __space, __u32 mss, __u32
> *rcv_wnd,
> +				      __u32 *window_clamp, int
> wscale_ok,
> +				      __u8 *rcv_wscale, __u32
> init_rcv_wnd);
> +	int (*select_size)(const struct sock *sk, bool sg,
> +			   bool first_skb, bool zc);
> +	void (*init_buffer_space)(struct sock *sk);
> +	void (*set_rto)(struct sock *sk);
> +	bool (*should_expand_sndbuf)(const struct sock *sk);
> +	void (*send_fin)(struct sock *sk);
> +	bool (*write_xmit)(struct sock *sk, unsigned int mss_now,
> int nonagle,
> +			   int push_one, gfp_t gfp);
> +	void (*send_active_reset)(struct sock *sk, gfp_t priority);
> +	int (*write_wakeup)(struct sock *sk, int mib);
> +	bool (*prune_ofo_queue)(struct sock *sk);
> +	void (*retransmit_timer)(struct sock *sk);
> +	void (*time_wait)(struct sock *sk, int state, int timeo);
> +	void (*cleanup_rbuf)(struct sock *sk, int copied);
> +	void (*cwnd_validate)(struct sock *sk, bool
> is_cwnd_limited);
> +	void (*sndbuf_expand)(struct sock *sk);
> +	struct sk_buff *(*shift_skb_data)(struct sock *sk, struct
> sk_buff *skb,
> +					  struct tcp_sacktag_state
> *state,
> +					  u32 start_seq, u32
> end_seq,
> +					  bool dup_sack);
> +	void (*grow_window)(struct sock *sk, const struct sk_buff
> *skb);
> +	bool (*check_rtt)(struct sock *sk);
> +	bool (*try_coalesce)(struct sock *sk, struct sk_buff *to,
> +			     struct sk_buff *from,
> +			     bool *fragstolen);
> +	int (*try_rmem_schedule)(struct sock *sk, struct sk_buff
> *skb,
> +				 unsigned int size);
> +	struct sk_buff *(*collapse_one)(struct sock *sk, struct
> sk_buff *skb,
> +					struct sk_buff_head *list,
> +					struct rb_root *root);
> +	void (*urg)(struct sock *sk, struct sk_buff *skb,
> +		    const struct tcphdr *th);
> +	int (*trim_head)(struct sock *sk, struct sk_buff *skb, u32
> len);
> +	/* MPTCP wants certain skb's that may be dropped, to process
> options */
> +	void (*ack_only)(struct sock *sk, struct sk_buff *skb);
> +	bool (*can_send)(struct sock *sk);
> +	int (*rx)(struct sock *sk, struct sk_buff *skb, int
> refcounted);
> +	unsigned int (*synack_options_size)(const struct sock *sk,
> +					    struct request_sock
> *req,
> +					    unsigned int mss,
> +					    struct sk_buff *skb,
> +					    struct tcp_out_options
> *opts,
> +					    const struct
> tcp_md5sig_key *md5,
> +					    struct
> tcp_fastopen_cookie *foc);
> +	int (*options_size)(struct sock *sk, struct sk_buff *skb,
> +			    struct tcp_out_options *opts,
> +			    struct tcp_md5sig_key **md5);
> +	void (*options_write)(__be32 **ptr_ptr, struct tcp_sock *tp,
> +			      struct tcp_out_options *opts,
> +			      struct sk_buff *skb);
> +	void (*connect_init)(struct sock *sk);
> +	void (*sock_init)(struct sock *sk);
> +	int (*send_mss)(struct sock *sk, int *size_goal, int flags);
> +	int (*mtu_probe)(struct sock *sk);
> +	void (*retrans_try_collapse)(struct sock *sk, struct sk_buff
> *to,
> +				     int space);
> +	bool (*fastopen_synack)(struct sock *sk, struct sk_buff
> *synack,
> +				struct tcp_fastopen_cookie *cookie);
> +	int (*sendpage_locked)(struct sock *sk, struct page *page,
> int offset,
> +			       size_t size, int flags);
> +	struct request_sock *(*cookie_req_alloc)(struct sock *sk,
> +						 struct sk_buff
> *skb,
> +						 struct
> tcp_options_received *
> +						 tcp_opts,
> +						 __u32 cookie, int
> mss);
> +	struct sock *(*get_cookie_sock)(struct sock *sk, struct
> sk_buff *skb,
> +					struct request_sock *req,
> +					struct dst_entry *dst, u32
> tsoff);
> +};
> +
> +/*
> + * Pointers to state specific TCP functions
> + */
> +struct tcp_state_ops {
> +	int (*rcv_state_process)(struct sock *sk, struct sk_buff
> *skb);
> +	int (*close)(struct sock *sk, struct sk_buff *skb);
> +	int (*synsent)(struct sock *sk, struct sk_buff *skb,
> +		       const struct tcphdr *th);
> +	int (*listen)(struct sock *sk, struct sk_buff *skb);
> +	int (*synrcv)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait1)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait2)(struct sock *sk, struct sk_buff *skb);
> +	int (*established)(struct sock *sk, struct sk_buff *skb);
> +	int (*closewait)(struct sock *sk, struct sk_buff *skb);
> +};
> +

This structure looks like a facility to have different processing for
MPTCP for each of these states. Peaking at the patch for the mptcp side
shows only synsent being re-implemented. If it is not necessary to
rework all of these it would be less impact to TCP code to just address
the synsent case directly and do away with this.  

Peter.

> +extern struct tcp_operational_ops *tcp_default_op_ops;
> +extern struct tcp_state_ops *tcp_default_state_ops;
> +
>  struct tcp_request_sock_ops {
>  	u16 mss_clamp;
>  #ifdef CONFIG_TCP_MD5SIG
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a6b48f6..be81be4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1093,19 +1093,6 @@ static bool tcp_check_dsack(struct sock *sk,
> const struct sk_buff *ack_skb,
>  	return dup_sack;
>  }
>  
> -struct tcp_sacktag_state {
> -	u32	reord;
> -	/* Timestamps for earliest and latest never-retransmitted
> segment
> -	 * that was SACKed. RTO needs the earliest RTT to stay
> conservative,
> -	 * but congestion control should still get an accurate delay
> signal.
> -	 */
> -	u64	first_sackt;
> -	u64	last_sackt;
> -	struct rate_sample *rate;
> -	int	flag;
> -	unsigned int mss_now;
> -};
> -
>  /* Check if skb is fully within the SACK block. In presence of GSO
> skbs,
>   * the incoming SACK may not exactly match but we can find smaller
> MSS
>   * aligned portion of it that matches. Therefore we might need to
> fragment
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index e9f985e..777c8b5 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -420,17 +420,6 @@ static void smc_options_write(__be32 *ptr, u16
> *options)
>  #endif
>  }
>  
> -struct tcp_out_options {
> -	u16 options;		/* bit field of OPTION_* */
> -	u16 mss;		/* 0 to disable */
> -	u8 ws;			/* window scale, 0 to disable
> */
> -	u8 num_sack_blocks;	/* number of SACK blocks to
> include */
> -	u8 hash_size;		/* bytes in hash_location */
> -	__u8 *hash_location;	/* temporary pointer, overloaded
> */
> -	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> -	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast
> open cookie */
> -};
> -
>  /* Write previously computed TCP options to the packet.
>   *
>   * Beware: Something in the Internet is very sensitive to the
> ordering of

             reply	other threads:[~2018-03-30 17:55 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-30 17:55 Krystad, Peter [this message]
  -- strict thread matches above, loose matches on Subject: below --
2018-04-10  5:19 [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers Rao Shoaib
2018-04-09  4:41 Christoph Paasch
2018-03-30 18:37 Rao Shoaib
2018-03-30 17:32 Rao Shoaib
2018-03-27  9:36 Christoph Paasch
2018-02-22 23:49 rao.shoaib

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1522432520.16359.88.camel@intel.com \
    --to=unknown@example.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.