All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-03-30 17:55 Krystad, Peter
  0 siblings, 0 replies; 7+ messages in thread
From: Krystad, Peter @ 2018-03-30 17:55 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 10979 bytes --]


Hi - 

Also beginning a review, see inline....

On Thu, 2018-02-22 at 15:49 -0800, rao.shoaib(a)oracle.com wrote:
> From: Rao Shoaib <rao.shoaib(a)oracle.com>
> 
> Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
> ---
>  include/linux/tcp.h   |  13 +++++
>  include/net/tcp.h     | 129
> ++++++++++++++++++++++++++++++++++++++++++++++----
>  net/ipv4/tcp_input.c  |  13 -----
>  net/ipv4/tcp_output.c |  11 -----
>  4 files changed, 133 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 8f4c549..612360b 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -83,6 +83,17 @@ struct tcp_sack_block {
>  	u32	end_seq;
>  };
>  
> +struct tcp_out_options {
> +	u16 options;		/* bit field of OPTION_* */
> +	u16 mss;		/* 0 to disable */
> +	u8 ws;			/* window scale, 0 to disable
> */
> +	u8 num_sack_blocks;	/* number of SACK blocks to
> include */
> +	u8 hash_size;		/* bytes in hash_location */
> +	__u8 *hash_location;	/* temporary pointer, overloaded
> */
> +	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> +	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast
> open cookie */
> +};
> +
>  /*These are used to set the sack_ok field in struct
> tcp_options_received */
>  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from
> peer*/
> @@ -384,6 +395,8 @@ struct tcp_sock {
>  	 */
>  	struct request_sock *fastopen_rsk;
>  	u32	*saved_syn;
> +	const struct tcp_operational_ops	*op_ops;
> +	const struct tcp_state_ops		*state_ops;
>  };
>  
>  enum tsq_enum {
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 92b06c6..8bc724f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -250,6 +250,17 @@ extern atomic_long_t tcp_memory_allocated;
>  extern struct percpu_counter tcp_sockets_allocated;
>  extern unsigned long tcp_memory_pressure;
>  
> +enum tcp_synack_type {
> +	TCP_SYNACK_NORMAL,
> +	TCP_SYNACK_FASTOPEN,
> +	TCP_SYNACK_COOKIE,
> +};
> +
> +enum tcp_queue {
> +	TCP_FRAG_IN_WRITE_QUEUE,
> +	TCP_FRAG_IN_RTX_QUEUE,
> +};
> +
>  /* optimized version of sk_under_memory_pressure() for TCP sockets
> */
>  static inline bool tcp_under_memory_pressure(const struct sock *sk)
>  {
> @@ -369,6 +380,18 @@ enum tcp_tw_status {
>  	TCP_TW_SYN = 3
>  };
>  
> +struct tcp_sacktag_state {
> +	u32     reord;
> +	/* Timestamps for earliest and latest never-retransmitted
> segment
> +	 * that was SACKed. RTO needs the earliest RTT to stay
> conservative,
> +	 * but congestion control should still get an accurate delay
> signal.
> +	 */
> +	u64     first_sackt;
> +	u64     last_sackt;
> +	struct rate_sample *rate;
> +	int     flag;
> +	unsigned int mss_now;
> +};
>  
>  enum tcp_tw_status tcp_timewait_state_process(struct
> inet_timewait_sock *tw,
>  					      struct sk_buff *skb,
> @@ -427,11 +450,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct
> sock *sk, struct sk_buff *skb,
>  int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
>  int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int
> addr_len);
>  int tcp_connect(struct sock *sk);
> -enum tcp_synack_type {
> -	TCP_SYNACK_NORMAL,
> -	TCP_SYNACK_FASTOPEN,
> -	TCP_SYNACK_COOKIE,
> -};
>  struct sk_buff *tcp_make_synack(const struct sock *sk, struct
> dst_entry *dst,
>  				struct request_sock *req,
>  				struct tcp_fastopen_cookie *foc,
> @@ -522,10 +540,6 @@ void tcp_xmit_retransmit_queue(struct sock *);
>  void tcp_simple_retransmit(struct sock *);
>  void tcp_enter_recovery(struct sock *sk, bool ece_ack);
>  int tcp_trim_head(struct sock *, struct sk_buff *, u32);
> -enum tcp_queue {
> -	TCP_FRAG_IN_WRITE_QUEUE,
> -	TCP_FRAG_IN_RTX_QUEUE,
> -};
>  int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
>  		 struct sk_buff *skb, u32 len,
>  		 unsigned int mss_now, gfp_t gfp);
> @@ -1822,6 +1836,103 @@ struct tcp_sock_af_ops {
>  #endif
>  };
>  
> +/* TCP operational functions */
> +struct tcp_operational_ops {
> +	u32 (*__select_window)(struct sock *sk);
> +	u16 (*select_window)(struct sock *sk);
> +	void (*select_initial_window)(const struct sock *sk,
> +				      int __space, __u32 mss, __u32
> *rcv_wnd,
> +				      __u32 *window_clamp, int
> wscale_ok,
> +				      __u8 *rcv_wscale, __u32
> init_rcv_wnd);
> +	int (*select_size)(const struct sock *sk, bool sg,
> +			   bool first_skb, bool zc);
> +	void (*init_buffer_space)(struct sock *sk);
> +	void (*set_rto)(struct sock *sk);
> +	bool (*should_expand_sndbuf)(const struct sock *sk);
> +	void (*send_fin)(struct sock *sk);
> +	bool (*write_xmit)(struct sock *sk, unsigned int mss_now,
> int nonagle,
> +			   int push_one, gfp_t gfp);
> +	void (*send_active_reset)(struct sock *sk, gfp_t priority);
> +	int (*write_wakeup)(struct sock *sk, int mib);
> +	bool (*prune_ofo_queue)(struct sock *sk);
> +	void (*retransmit_timer)(struct sock *sk);
> +	void (*time_wait)(struct sock *sk, int state, int timeo);
> +	void (*cleanup_rbuf)(struct sock *sk, int copied);
> +	void (*cwnd_validate)(struct sock *sk, bool
> is_cwnd_limited);
> +	void (*sndbuf_expand)(struct sock *sk);
> +	struct sk_buff *(*shift_skb_data)(struct sock *sk, struct
> sk_buff *skb,
> +					  struct tcp_sacktag_state
> *state,
> +					  u32 start_seq, u32
> end_seq,
> +					  bool dup_sack);
> +	void (*grow_window)(struct sock *sk, const struct sk_buff
> *skb);
> +	bool (*check_rtt)(struct sock *sk);
> +	bool (*try_coalesce)(struct sock *sk, struct sk_buff *to,
> +			     struct sk_buff *from,
> +			     bool *fragstolen);
> +	int (*try_rmem_schedule)(struct sock *sk, struct sk_buff
> *skb,
> +				 unsigned int size);
> +	struct sk_buff *(*collapse_one)(struct sock *sk, struct
> sk_buff *skb,
> +					struct sk_buff_head *list,
> +					struct rb_root *root);
> +	void (*urg)(struct sock *sk, struct sk_buff *skb,
> +		    const struct tcphdr *th);
> +	int (*trim_head)(struct sock *sk, struct sk_buff *skb, u32
> len);
> +	/* MPTCP wants certain skb's that may be dropped, to process
> options */
> +	void (*ack_only)(struct sock *sk, struct sk_buff *skb);
> +	bool (*can_send)(struct sock *sk);
> +	int (*rx)(struct sock *sk, struct sk_buff *skb, int
> refcounted);
> +	unsigned int (*synack_options_size)(const struct sock *sk,
> +					    struct request_sock
> *req,
> +					    unsigned int mss,
> +					    struct sk_buff *skb,
> +					    struct tcp_out_options
> *opts,
> +					    const struct
> tcp_md5sig_key *md5,
> +					    struct
> tcp_fastopen_cookie *foc);
> +	int (*options_size)(struct sock *sk, struct sk_buff *skb,
> +			    struct tcp_out_options *opts,
> +			    struct tcp_md5sig_key **md5);
> +	void (*options_write)(__be32 **ptr_ptr, struct tcp_sock *tp,
> +			      struct tcp_out_options *opts,
> +			      struct sk_buff *skb);
> +	void (*connect_init)(struct sock *sk);
> +	void (*sock_init)(struct sock *sk);
> +	int (*send_mss)(struct sock *sk, int *size_goal, int flags);
> +	int (*mtu_probe)(struct sock *sk);
> +	void (*retrans_try_collapse)(struct sock *sk, struct sk_buff
> *to,
> +				     int space);
> +	bool (*fastopen_synack)(struct sock *sk, struct sk_buff
> *synack,
> +				struct tcp_fastopen_cookie *cookie);
> +	int (*sendpage_locked)(struct sock *sk, struct page *page,
> int offset,
> +			       size_t size, int flags);
> +	struct request_sock *(*cookie_req_alloc)(struct sock *sk,
> +						 struct sk_buff
> *skb,
> +						 struct
> tcp_options_received *
> +						 tcp_opts,
> +						 __u32 cookie, int
> mss);
> +	struct sock *(*get_cookie_sock)(struct sock *sk, struct
> sk_buff *skb,
> +					struct request_sock *req,
> +					struct dst_entry *dst, u32
> tsoff);
> +};
> +
> +/*
> + * Pointers to state specific TCP functions
> + */
> +struct tcp_state_ops {
> +	int (*rcv_state_process)(struct sock *sk, struct sk_buff
> *skb);
> +	int (*close)(struct sock *sk, struct sk_buff *skb);
> +	int (*synsent)(struct sock *sk, struct sk_buff *skb,
> +		       const struct tcphdr *th);
> +	int (*listen)(struct sock *sk, struct sk_buff *skb);
> +	int (*synrcv)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait1)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait2)(struct sock *sk, struct sk_buff *skb);
> +	int (*established)(struct sock *sk, struct sk_buff *skb);
> +	int (*closewait)(struct sock *sk, struct sk_buff *skb);
> +};
> +

This structure looks like a facility to have different processing for
MPTCP for each of these states. Peaking at the patch for the mptcp side
shows only synsent being re-implemented. If it is not necessary to
rework all of these it would be less impact to TCP code to just address
the synsent case directly and do away with this.  

Peter.

> +extern struct tcp_operational_ops *tcp_default_op_ops;
> +extern struct tcp_state_ops *tcp_default_state_ops;
> +
>  struct tcp_request_sock_ops {
>  	u16 mss_clamp;
>  #ifdef CONFIG_TCP_MD5SIG
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a6b48f6..be81be4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1093,19 +1093,6 @@ static bool tcp_check_dsack(struct sock *sk,
> const struct sk_buff *ack_skb,
>  	return dup_sack;
>  }
>  
> -struct tcp_sacktag_state {
> -	u32	reord;
> -	/* Timestamps for earliest and latest never-retransmitted
> segment
> -	 * that was SACKed. RTO needs the earliest RTT to stay
> conservative,
> -	 * but congestion control should still get an accurate delay
> signal.
> -	 */
> -	u64	first_sackt;
> -	u64	last_sackt;
> -	struct rate_sample *rate;
> -	int	flag;
> -	unsigned int mss_now;
> -};
> -
>  /* Check if skb is fully within the SACK block. In presence of GSO
> skbs,
>   * the incoming SACK may not exactly match but we can find smaller
> MSS
>   * aligned portion of it that matches. Therefore we might need to
> fragment
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index e9f985e..777c8b5 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -420,17 +420,6 @@ static void smc_options_write(__be32 *ptr, u16
> *options)
>  #endif
>  }
>  
> -struct tcp_out_options {
> -	u16 options;		/* bit field of OPTION_* */
> -	u16 mss;		/* 0 to disable */
> -	u8 ws;			/* window scale, 0 to disable
> */
> -	u8 num_sack_blocks;	/* number of SACK blocks to
> include */
> -	u8 hash_size;		/* bytes in hash_location */
> -	__u8 *hash_location;	/* temporary pointer, overloaded
> */
> -	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> -	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast
> open cookie */
> -};
> -
>  /* Write previously computed TCP options to the packet.
>   *
>   * Beware: Something in the Internet is very sensitive to the
> ordering of

^ permalink raw reply	[flat|nested] 7+ messages in thread
* Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-04-10  5:19 Rao Shoaib
  0 siblings, 0 replies; 7+ messages in thread
From: Rao Shoaib @ 2018-04-10  5:19 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 1541 bytes --]



On 04/08/2018 09:41 PM, Christoph Paasch wrote:
> On 30/03/18 - 10:32:23, Rao Shoaib wrote:
>>
>> On 03/27/2018 02:36 AM, Christoph Paasch wrote:
>>> it is important to explain (in the commit-message) for each of these callbacks
>>> why they are needed for an MPTCP implementation and why it can't be done
>>> without them.
>>>
>>> This is important to allow people to understand why the change is necessary and
>>> where it is leading to. Ignoring potential performance hits for a moment,
>>> callbacks also increase the code-maintenance overhead and thus each of them
>>> will need to justified.
>>>
>>>
>>> Christoph
>>>
>>>
>>>
>> These callbacks are needed to make the code more readable and maintainable.
>> These will be replace by the macro anyways so this does not matter.
> Even with the macros, it is important to describe to the reviewer in the
> commit message why each of these changes is there for.
>
>
> Christoph
The patch was not sent to just any group. It was sent to folks who I 
assumed were very familiar with the code and what was being done.  I did 
say.

Following patches modify TCP code to enable implementation of MPTCP. MPTCP implementation requires sharing of TCP code with minor modification here and there. In order to keep the TCP code clean and easy to maintain, common code has been moved to new functions for use by both TCP and MPTCP. struct tcp_sock now has function pointers and based on the socket type (TCP/MPTCP) appropriate function is called.


Shoaib

>


^ permalink raw reply	[flat|nested] 7+ messages in thread
* Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-04-09  4:41 Christoph Paasch
  0 siblings, 0 replies; 7+ messages in thread
From: Christoph Paasch @ 2018-04-09  4:41 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 913 bytes --]

On 30/03/18 - 10:32:23, Rao Shoaib wrote:
> 
> 
> On 03/27/2018 02:36 AM, Christoph Paasch wrote:
> > 
> > it is important to explain (in the commit-message) for each of these callbacks
> > why they are needed for an MPTCP implementation and why it can't be done
> > without them.
> > 
> > This is important to allow people to understand why the change is necessary and
> > where it is leading to. Ignoring potential performance hits for a moment,
> > callbacks also increase the code-maintenance overhead and thus each of them
> > will need to justified.
> > 
> > 
> > Christoph
> > 
> > 
> > 
> These callbacks are needed to make the code more readable and maintainable.
> These will be replace by the macro anyways so this does not matter.

Even with the macros, it is important to describe to the reviewer in the
commit message why each of these changes is there for.


Christoph


^ permalink raw reply	[flat|nested] 7+ messages in thread
* Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-03-30 18:37 Rao Shoaib
  0 siblings, 0 replies; 7+ messages in thread
From: Rao Shoaib @ 2018-03-30 18:37 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 11635 bytes --]



On 03/30/2018 10:55 AM, Krystad, Peter wrote:
> Hi -
>
> Also beginning a review, see inline....
>
> On Thu, 2018-02-22 at 15:49 -0800, rao.shoaib(a)oracle.com wrote:
>> From: Rao Shoaib <rao.shoaib(a)oracle.com>
>>
>> Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
>> ---
>>   include/linux/tcp.h   |  13 +++++
>>   include/net/tcp.h     | 129
>> ++++++++++++++++++++++++++++++++++++++++++++++----
>>   net/ipv4/tcp_input.c  |  13 -----
>>   net/ipv4/tcp_output.c |  11 -----
>>   4 files changed, 133 insertions(+), 33 deletions(-)
>>
>> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
>> index 8f4c549..612360b 100644
>> --- a/include/linux/tcp.h
>> +++ b/include/linux/tcp.h
>> @@ -83,6 +83,17 @@ struct tcp_sack_block {
>>   	u32	end_seq;
>>   };
>>   
>> +struct tcp_out_options {
>> +	u16 options;		/* bit field of OPTION_* */
>> +	u16 mss;		/* 0 to disable */
>> +	u8 ws;			/* window scale, 0 to disable
>> */
>> +	u8 num_sack_blocks;	/* number of SACK blocks to
>> include */
>> +	u8 hash_size;		/* bytes in hash_location */
>> +	__u8 *hash_location;	/* temporary pointer, overloaded
>> */
>> +	__u32 tsval, tsecr;	/* need to include OPTION_TS */
>> +	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast
>> open cookie */
>> +};
>> +
>>   /*These are used to set the sack_ok field in struct
>> tcp_options_received */
>>   #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>>   #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from
>> peer*/
>> @@ -384,6 +395,8 @@ struct tcp_sock {
>>   	 */
>>   	struct request_sock *fastopen_rsk;
>>   	u32	*saved_syn;
>> +	const struct tcp_operational_ops	*op_ops;
>> +	const struct tcp_state_ops		*state_ops;
>>   };
>>   
>>   enum tsq_enum {
>> diff --git a/include/net/tcp.h b/include/net/tcp.h
>> index 92b06c6..8bc724f 100644
>> --- a/include/net/tcp.h
>> +++ b/include/net/tcp.h
>> @@ -250,6 +250,17 @@ extern atomic_long_t tcp_memory_allocated;
>>   extern struct percpu_counter tcp_sockets_allocated;
>>   extern unsigned long tcp_memory_pressure;
>>   
>> +enum tcp_synack_type {
>> +	TCP_SYNACK_NORMAL,
>> +	TCP_SYNACK_FASTOPEN,
>> +	TCP_SYNACK_COOKIE,
>> +};
>> +
>> +enum tcp_queue {
>> +	TCP_FRAG_IN_WRITE_QUEUE,
>> +	TCP_FRAG_IN_RTX_QUEUE,
>> +};
>> +
>>   /* optimized version of sk_under_memory_pressure() for TCP sockets
>> */
>>   static inline bool tcp_under_memory_pressure(const struct sock *sk)
>>   {
>> @@ -369,6 +380,18 @@ enum tcp_tw_status {
>>   	TCP_TW_SYN = 3
>>   };
>>   
>> +struct tcp_sacktag_state {
>> +	u32     reord;
>> +	/* Timestamps for earliest and latest never-retransmitted
>> segment
>> +	 * that was SACKed. RTO needs the earliest RTT to stay
>> conservative,
>> +	 * but congestion control should still get an accurate delay
>> signal.
>> +	 */
>> +	u64     first_sackt;
>> +	u64     last_sackt;
>> +	struct rate_sample *rate;
>> +	int     flag;
>> +	unsigned int mss_now;
>> +};
>>   
>>   enum tcp_tw_status tcp_timewait_state_process(struct
>> inet_timewait_sock *tw,
>>   					      struct sk_buff *skb,
>> @@ -427,11 +450,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct
>> sock *sk, struct sk_buff *skb,
>>   int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
>>   int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int
>> addr_len);
>>   int tcp_connect(struct sock *sk);
>> -enum tcp_synack_type {
>> -	TCP_SYNACK_NORMAL,
>> -	TCP_SYNACK_FASTOPEN,
>> -	TCP_SYNACK_COOKIE,
>> -};
>>   struct sk_buff *tcp_make_synack(const struct sock *sk, struct
>> dst_entry *dst,
>>   				struct request_sock *req,
>>   				struct tcp_fastopen_cookie *foc,
>> @@ -522,10 +540,6 @@ void tcp_xmit_retransmit_queue(struct sock *);
>>   void tcp_simple_retransmit(struct sock *);
>>   void tcp_enter_recovery(struct sock *sk, bool ece_ack);
>>   int tcp_trim_head(struct sock *, struct sk_buff *, u32);
>> -enum tcp_queue {
>> -	TCP_FRAG_IN_WRITE_QUEUE,
>> -	TCP_FRAG_IN_RTX_QUEUE,
>> -};
>>   int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
>>   		 struct sk_buff *skb, u32 len,
>>   		 unsigned int mss_now, gfp_t gfp);
>> @@ -1822,6 +1836,103 @@ struct tcp_sock_af_ops {
>>   #endif
>>   };
>>   
>> +/* TCP operational functions */
>> +struct tcp_operational_ops {
>> +	u32 (*__select_window)(struct sock *sk);
>> +	u16 (*select_window)(struct sock *sk);
>> +	void (*select_initial_window)(const struct sock *sk,
>> +				      int __space, __u32 mss, __u32
>> *rcv_wnd,
>> +				      __u32 *window_clamp, int
>> wscale_ok,
>> +				      __u8 *rcv_wscale, __u32
>> init_rcv_wnd);
>> +	int (*select_size)(const struct sock *sk, bool sg,
>> +			   bool first_skb, bool zc);
>> +	void (*init_buffer_space)(struct sock *sk);
>> +	void (*set_rto)(struct sock *sk);
>> +	bool (*should_expand_sndbuf)(const struct sock *sk);
>> +	void (*send_fin)(struct sock *sk);
>> +	bool (*write_xmit)(struct sock *sk, unsigned int mss_now,
>> int nonagle,
>> +			   int push_one, gfp_t gfp);
>> +	void (*send_active_reset)(struct sock *sk, gfp_t priority);
>> +	int (*write_wakeup)(struct sock *sk, int mib);
>> +	bool (*prune_ofo_queue)(struct sock *sk);
>> +	void (*retransmit_timer)(struct sock *sk);
>> +	void (*time_wait)(struct sock *sk, int state, int timeo);
>> +	void (*cleanup_rbuf)(struct sock *sk, int copied);
>> +	void (*cwnd_validate)(struct sock *sk, bool
>> is_cwnd_limited);
>> +	void (*sndbuf_expand)(struct sock *sk);
>> +	struct sk_buff *(*shift_skb_data)(struct sock *sk, struct
>> sk_buff *skb,
>> +					  struct tcp_sacktag_state
>> *state,
>> +					  u32 start_seq, u32
>> end_seq,
>> +					  bool dup_sack);
>> +	void (*grow_window)(struct sock *sk, const struct sk_buff
>> *skb);
>> +	bool (*check_rtt)(struct sock *sk);
>> +	bool (*try_coalesce)(struct sock *sk, struct sk_buff *to,
>> +			     struct sk_buff *from,
>> +			     bool *fragstolen);
>> +	int (*try_rmem_schedule)(struct sock *sk, struct sk_buff
>> *skb,
>> +				 unsigned int size);
>> +	struct sk_buff *(*collapse_one)(struct sock *sk, struct
>> sk_buff *skb,
>> +					struct sk_buff_head *list,
>> +					struct rb_root *root);
>> +	void (*urg)(struct sock *sk, struct sk_buff *skb,
>> +		    const struct tcphdr *th);
>> +	int (*trim_head)(struct sock *sk, struct sk_buff *skb, u32
>> len);
>> +	/* MPTCP wants certain skb's that may be dropped, to process
>> options */
>> +	void (*ack_only)(struct sock *sk, struct sk_buff *skb);
>> +	bool (*can_send)(struct sock *sk);
>> +	int (*rx)(struct sock *sk, struct sk_buff *skb, int
>> refcounted);
>> +	unsigned int (*synack_options_size)(const struct sock *sk,
>> +					    struct request_sock
>> *req,
>> +					    unsigned int mss,
>> +					    struct sk_buff *skb,
>> +					    struct tcp_out_options
>> *opts,
>> +					    const struct
>> tcp_md5sig_key *md5,
>> +					    struct
>> tcp_fastopen_cookie *foc);
>> +	int (*options_size)(struct sock *sk, struct sk_buff *skb,
>> +			    struct tcp_out_options *opts,
>> +			    struct tcp_md5sig_key **md5);
>> +	void (*options_write)(__be32 **ptr_ptr, struct tcp_sock *tp,
>> +			      struct tcp_out_options *opts,
>> +			      struct sk_buff *skb);
>> +	void (*connect_init)(struct sock *sk);
>> +	void (*sock_init)(struct sock *sk);
>> +	int (*send_mss)(struct sock *sk, int *size_goal, int flags);
>> +	int (*mtu_probe)(struct sock *sk);
>> +	void (*retrans_try_collapse)(struct sock *sk, struct sk_buff
>> *to,
>> +				     int space);
>> +	bool (*fastopen_synack)(struct sock *sk, struct sk_buff
>> *synack,
>> +				struct tcp_fastopen_cookie *cookie);
>> +	int (*sendpage_locked)(struct sock *sk, struct page *page,
>> int offset,
>> +			       size_t size, int flags);
>> +	struct request_sock *(*cookie_req_alloc)(struct sock *sk,
>> +						 struct sk_buff
>> *skb,
>> +						 struct
>> tcp_options_received *
>> +						 tcp_opts,
>> +						 __u32 cookie, int
>> mss);
>> +	struct sock *(*get_cookie_sock)(struct sock *sk, struct
>> sk_buff *skb,
>> +					struct request_sock *req,
>> +					struct dst_entry *dst, u32
>> tsoff);
>> +};
>> +
>> +/*
>> + * Pointers to state specific TCP functions
>> + */
>> +struct tcp_state_ops {
>> +	int (*rcv_state_process)(struct sock *sk, struct sk_buff
>> *skb);
>> +	int (*close)(struct sock *sk, struct sk_buff *skb);
>> +	int (*synsent)(struct sock *sk, struct sk_buff *skb,
>> +		       const struct tcphdr *th);
>> +	int (*listen)(struct sock *sk, struct sk_buff *skb);
>> +	int (*synrcv)(struct sock *sk, struct sk_buff *skb);
>> +	int (*finwait1)(struct sock *sk, struct sk_buff *skb);
>> +	int (*finwait2)(struct sock *sk, struct sk_buff *skb);
>> +	int (*established)(struct sock *sk, struct sk_buff *skb);
>> +	int (*closewait)(struct sock *sk, struct sk_buff *skb);
>> +};
>> +
> This structure looks like a facility to have different processing for
> MPTCP for each of these states. Peaking at the patch for the mptcp side
> shows only synsent being re-implemented. If it is not necessary to
> rework all of these it would be less impact to TCP code to just address
> the synsent case directly and do away with this.
>
> Peter.
Agreed.
Thanks for the comments. I will try to address them.

Shoaib

>
>> +extern struct tcp_operational_ops *tcp_default_op_ops;
>> +extern struct tcp_state_ops *tcp_default_state_ops;
>> +
>>   struct tcp_request_sock_ops {
>>   	u16 mss_clamp;
>>   #ifdef CONFIG_TCP_MD5SIG
>> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>> index a6b48f6..be81be4 100644
>> --- a/net/ipv4/tcp_input.c
>> +++ b/net/ipv4/tcp_input.c
>> @@ -1093,19 +1093,6 @@ static bool tcp_check_dsack(struct sock *sk,
>> const struct sk_buff *ack_skb,
>>   	return dup_sack;
>>   }
>>   
>> -struct tcp_sacktag_state {
>> -	u32	reord;
>> -	/* Timestamps for earliest and latest never-retransmitted
>> segment
>> -	 * that was SACKed. RTO needs the earliest RTT to stay
>> conservative,
>> -	 * but congestion control should still get an accurate delay
>> signal.
>> -	 */
>> -	u64	first_sackt;
>> -	u64	last_sackt;
>> -	struct rate_sample *rate;
>> -	int	flag;
>> -	unsigned int mss_now;
>> -};
>> -
>>   /* Check if skb is fully within the SACK block. In presence of GSO
>> skbs,
>>    * the incoming SACK may not exactly match but we can find smaller
>> MSS
>>    * aligned portion of it that matches. Therefore we might need to
>> fragment
>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>> index e9f985e..777c8b5 100644
>> --- a/net/ipv4/tcp_output.c
>> +++ b/net/ipv4/tcp_output.c
>> @@ -420,17 +420,6 @@ static void smc_options_write(__be32 *ptr, u16
>> *options)
>>   #endif
>>   }
>>   
>> -struct tcp_out_options {
>> -	u16 options;		/* bit field of OPTION_* */
>> -	u16 mss;		/* 0 to disable */
>> -	u8 ws;			/* window scale, 0 to disable
>> */
>> -	u8 num_sack_blocks;	/* number of SACK blocks to
>> include */
>> -	u8 hash_size;		/* bytes in hash_location */
>> -	__u8 *hash_location;	/* temporary pointer, overloaded
>> */
>> -	__u32 tsval, tsecr;	/* need to include OPTION_TS */
>> -	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast
>> open cookie */
>> -};
>> -
>>   /* Write previously computed TCP options to the packet.
>>    *
>>    * Beware: Something in the Internet is very sensitive to the
>> ordering of
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp


^ permalink raw reply	[flat|nested] 7+ messages in thread
* Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-03-30 17:32 Rao Shoaib
  0 siblings, 0 replies; 7+ messages in thread
From: Rao Shoaib @ 2018-03-30 17:32 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 687 bytes --]



On 03/27/2018 02:36 AM, Christoph Paasch wrote:
>
> it is important to explain (in the commit-message) for each of these callbacks
> why they are needed for an MPTCP implementation and why it can't be done
> without them.
>
> This is important to allow people to understand why the change is necessary and
> where it is leading to. Ignoring potential performance hits for a moment,
> callbacks also increase the code-maintenance overhead and thus each of them
> will need to justified.
>
>
> Christoph
>
>
>
These callbacks are needed to make the code more readable and 
maintainable. These will be replace by the macro anyways so this does 
not matter.

Shoaib

^ permalink raw reply	[flat|nested] 7+ messages in thread
* Re: [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-03-27  9:36 Christoph Paasch
  0 siblings, 0 replies; 7+ messages in thread
From: Christoph Paasch @ 2018-03-27  9:36 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 11087 bytes --]

Hello,

starting a technical review of this set. Please see inline:

On 22/02/18 - 15:49:53, rao.shoaib(a)oracle.com wrote:
> From: Rao Shoaib <rao.shoaib(a)oracle.com>
> 
> Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
> ---
>  include/linux/tcp.h   |  13 +++++
>  include/net/tcp.h     | 129 ++++++++++++++++++++++++++++++++++++++++++++++----
>  net/ipv4/tcp_input.c  |  13 -----
>  net/ipv4/tcp_output.c |  11 -----
>  4 files changed, 133 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 8f4c549..612360b 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -83,6 +83,17 @@ struct tcp_sack_block {
>  	u32	end_seq;
>  };
>  
> +struct tcp_out_options {
> +	u16 options;		/* bit field of OPTION_* */
> +	u16 mss;		/* 0 to disable */
> +	u8 ws;			/* window scale, 0 to disable */
> +	u8 num_sack_blocks;	/* number of SACK blocks to include */
> +	u8 hash_size;		/* bytes in hash_location */
> +	__u8 *hash_location;	/* temporary pointer, overloaded */
> +	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> +	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
> +};
> +
>  /*These are used to set the sack_ok field in struct tcp_options_received */
>  #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
>  #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
> @@ -384,6 +395,8 @@ struct tcp_sock {
>  	 */
>  	struct request_sock *fastopen_rsk;
>  	u32	*saved_syn;
> +	const struct tcp_operational_ops	*op_ops;
> +	const struct tcp_state_ops		*state_ops;
>  };
>  
>  enum tsq_enum {
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 92b06c6..8bc724f 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -250,6 +250,17 @@ extern atomic_long_t tcp_memory_allocated;
>  extern struct percpu_counter tcp_sockets_allocated;
>  extern unsigned long tcp_memory_pressure;
>  
> +enum tcp_synack_type {
> +	TCP_SYNACK_NORMAL,
> +	TCP_SYNACK_FASTOPEN,
> +	TCP_SYNACK_COOKIE,
> +};
> +
> +enum tcp_queue {
> +	TCP_FRAG_IN_WRITE_QUEUE,
> +	TCP_FRAG_IN_RTX_QUEUE,
> +};
> +
>  /* optimized version of sk_under_memory_pressure() for TCP sockets */
>  static inline bool tcp_under_memory_pressure(const struct sock *sk)
>  {
> @@ -369,6 +380,18 @@ enum tcp_tw_status {
>  	TCP_TW_SYN = 3
>  };
>  
> +struct tcp_sacktag_state {
> +	u32     reord;
> +	/* Timestamps for earliest and latest never-retransmitted segment
> +	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
> +	 * but congestion control should still get an accurate delay signal.
> +	 */
> +	u64     first_sackt;
> +	u64     last_sackt;
> +	struct rate_sample *rate;
> +	int     flag;
> +	unsigned int mss_now;
> +};
>  
>  enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
>  					      struct sk_buff *skb,
> @@ -427,11 +450,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
>  int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
>  int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
>  int tcp_connect(struct sock *sk);
> -enum tcp_synack_type {
> -	TCP_SYNACK_NORMAL,
> -	TCP_SYNACK_FASTOPEN,
> -	TCP_SYNACK_COOKIE,
> -};
>  struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
>  				struct request_sock *req,
>  				struct tcp_fastopen_cookie *foc,
> @@ -522,10 +540,6 @@ void tcp_xmit_retransmit_queue(struct sock *);
>  void tcp_simple_retransmit(struct sock *);
>  void tcp_enter_recovery(struct sock *sk, bool ece_ack);
>  int tcp_trim_head(struct sock *, struct sk_buff *, u32);
> -enum tcp_queue {
> -	TCP_FRAG_IN_WRITE_QUEUE,
> -	TCP_FRAG_IN_RTX_QUEUE,
> -};
>  int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
>  		 struct sk_buff *skb, u32 len,
>  		 unsigned int mss_now, gfp_t gfp);
> @@ -1822,6 +1836,103 @@ struct tcp_sock_af_ops {
>  #endif
>  };
>  
> +/* TCP operational functions */
> +struct tcp_operational_ops {
> +	u32 (*__select_window)(struct sock *sk);
> +	u16 (*select_window)(struct sock *sk);
> +	void (*select_initial_window)(const struct sock *sk,
> +				      int __space, __u32 mss, __u32 *rcv_wnd,
> +				      __u32 *window_clamp, int wscale_ok,
> +				      __u8 *rcv_wscale, __u32 init_rcv_wnd);
> +	int (*select_size)(const struct sock *sk, bool sg,
> +			   bool first_skb, bool zc);
> +	void (*init_buffer_space)(struct sock *sk);
> +	void (*set_rto)(struct sock *sk);
> +	bool (*should_expand_sndbuf)(const struct sock *sk);
> +	void (*send_fin)(struct sock *sk);
> +	bool (*write_xmit)(struct sock *sk, unsigned int mss_now, int nonagle,
> +			   int push_one, gfp_t gfp);
> +	void (*send_active_reset)(struct sock *sk, gfp_t priority);
> +	int (*write_wakeup)(struct sock *sk, int mib);
> +	bool (*prune_ofo_queue)(struct sock *sk);
> +	void (*retransmit_timer)(struct sock *sk);
> +	void (*time_wait)(struct sock *sk, int state, int timeo);
> +	void (*cleanup_rbuf)(struct sock *sk, int copied);
> +	void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited);
> +	void (*sndbuf_expand)(struct sock *sk);
> +	struct sk_buff *(*shift_skb_data)(struct sock *sk, struct sk_buff *skb,
> +					  struct tcp_sacktag_state *state,
> +					  u32 start_seq, u32 end_seq,
> +					  bool dup_sack);
> +	void (*grow_window)(struct sock *sk, const struct sk_buff *skb);
> +	bool (*check_rtt)(struct sock *sk);
> +	bool (*try_coalesce)(struct sock *sk, struct sk_buff *to,
> +			     struct sk_buff *from,
> +			     bool *fragstolen);
> +	int (*try_rmem_schedule)(struct sock *sk, struct sk_buff *skb,
> +				 unsigned int size);
> +	struct sk_buff *(*collapse_one)(struct sock *sk, struct sk_buff *skb,
> +					struct sk_buff_head *list,
> +					struct rb_root *root);
> +	void (*urg)(struct sock *sk, struct sk_buff *skb,
> +		    const struct tcphdr *th);
> +	int (*trim_head)(struct sock *sk, struct sk_buff *skb, u32 len);
> +	/* MPTCP wants certain skb's that may be dropped, to process options */
> +	void (*ack_only)(struct sock *sk, struct sk_buff *skb);
> +	bool (*can_send)(struct sock *sk);
> +	int (*rx)(struct sock *sk, struct sk_buff *skb, int refcounted);
> +	unsigned int (*synack_options_size)(const struct sock *sk,
> +					    struct request_sock *req,
> +					    unsigned int mss,
> +					    struct sk_buff *skb,
> +					    struct tcp_out_options *opts,
> +					    const struct tcp_md5sig_key *md5,
> +					    struct tcp_fastopen_cookie *foc);
> +	int (*options_size)(struct sock *sk, struct sk_buff *skb,
> +			    struct tcp_out_options *opts,
> +			    struct tcp_md5sig_key **md5);
> +	void (*options_write)(__be32 **ptr_ptr, struct tcp_sock *tp,
> +			      struct tcp_out_options *opts,
> +			      struct sk_buff *skb);
> +	void (*connect_init)(struct sock *sk);
> +	void (*sock_init)(struct sock *sk);
> +	int (*send_mss)(struct sock *sk, int *size_goal, int flags);
> +	int (*mtu_probe)(struct sock *sk);
> +	void (*retrans_try_collapse)(struct sock *sk, struct sk_buff *to,
> +				     int space);
> +	bool (*fastopen_synack)(struct sock *sk, struct sk_buff *synack,
> +				struct tcp_fastopen_cookie *cookie);
> +	int (*sendpage_locked)(struct sock *sk, struct page *page, int offset,
> +			       size_t size, int flags);
> +	struct request_sock *(*cookie_req_alloc)(struct sock *sk,
> +						 struct sk_buff *skb,
> +						 struct tcp_options_received *
> +						 tcp_opts,
> +						 __u32 cookie, int mss);
> +	struct sock *(*get_cookie_sock)(struct sock *sk, struct sk_buff *skb,
> +					struct request_sock *req,
> +					struct dst_entry *dst, u32 tsoff);
> +};

it is important to explain (in the commit-message) for each of these callbacks
why they are needed for an MPTCP implementation and why it can't be done
without them.

This is important to allow people to understand why the change is necessary and
where it is leading to. Ignoring potential performance hits for a moment,
callbacks also increase the code-maintenance overhead and thus each of them
will need to justified.


Christoph


> +
> +/*
> + * Pointers to state specific TCP functions
> + */
> +struct tcp_state_ops {
> +	int (*rcv_state_process)(struct sock *sk, struct sk_buff *skb);
> +	int (*close)(struct sock *sk, struct sk_buff *skb);
> +	int (*synsent)(struct sock *sk, struct sk_buff *skb,
> +		       const struct tcphdr *th);
> +	int (*listen)(struct sock *sk, struct sk_buff *skb);
> +	int (*synrcv)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait1)(struct sock *sk, struct sk_buff *skb);
> +	int (*finwait2)(struct sock *sk, struct sk_buff *skb);
> +	int (*established)(struct sock *sk, struct sk_buff *skb);
> +	int (*closewait)(struct sock *sk, struct sk_buff *skb);
> +};
> +
> +extern struct tcp_operational_ops *tcp_default_op_ops;
> +extern struct tcp_state_ops *tcp_default_state_ops;
> +
>  struct tcp_request_sock_ops {
>  	u16 mss_clamp;
>  #ifdef CONFIG_TCP_MD5SIG
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a6b48f6..be81be4 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1093,19 +1093,6 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
>  	return dup_sack;
>  }
>  
> -struct tcp_sacktag_state {
> -	u32	reord;
> -	/* Timestamps for earliest and latest never-retransmitted segment
> -	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
> -	 * but congestion control should still get an accurate delay signal.
> -	 */
> -	u64	first_sackt;
> -	u64	last_sackt;
> -	struct rate_sample *rate;
> -	int	flag;
> -	unsigned int mss_now;
> -};
> -
>  /* Check if skb is fully within the SACK block. In presence of GSO skbs,
>   * the incoming SACK may not exactly match but we can find smaller MSS
>   * aligned portion of it that matches. Therefore we might need to fragment
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index e9f985e..777c8b5 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -420,17 +420,6 @@ static void smc_options_write(__be32 *ptr, u16 *options)
>  #endif
>  }
>  
> -struct tcp_out_options {
> -	u16 options;		/* bit field of OPTION_* */
> -	u16 mss;		/* 0 to disable */
> -	u8 ws;			/* window scale, 0 to disable */
> -	u8 num_sack_blocks;	/* number of SACK blocks to include */
> -	u8 hash_size;		/* bytes in hash_location */
> -	__u8 *hash_location;	/* temporary pointer, overloaded */
> -	__u32 tsval, tsecr;	/* need to include OPTION_TS */
> -	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
> -};
> -
>  /* Write previously computed TCP options to the packet.
>   *
>   * Beware: Something in the Internet is very sensitive to the ordering of
> -- 
> 2.7.4
> 
> _______________________________________________
> mptcp mailing list
> mptcp(a)lists.01.org
> https://lists.01.org/mailman/listinfo/mptcp

^ permalink raw reply	[flat|nested] 7+ messages in thread
* [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers
@ 2018-02-22 23:49 rao.shoaib
  0 siblings, 0 replies; 7+ messages in thread
From: rao.shoaib @ 2018-02-22 23:49 UTC (permalink / raw)
  To: mptcp 

[-- Attachment #1: Type: text/plain, Size: 9848 bytes --]

From: Rao Shoaib <rao.shoaib(a)oracle.com>

Signed-off-by: Rao Shoaib <rao.shoaib(a)oracle.com>
---
 include/linux/tcp.h   |  13 +++++
 include/net/tcp.h     | 129 ++++++++++++++++++++++++++++++++++++++++++++++----
 net/ipv4/tcp_input.c  |  13 -----
 net/ipv4/tcp_output.c |  11 -----
 4 files changed, 133 insertions(+), 33 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8f4c549..612360b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -83,6 +83,17 @@ struct tcp_sack_block {
 	u32	end_seq;
 };
 
+struct tcp_out_options {
+	u16 options;		/* bit field of OPTION_* */
+	u16 mss;		/* 0 to disable */
+	u8 ws;			/* window scale, 0 to disable */
+	u8 num_sack_blocks;	/* number of SACK blocks to include */
+	u8 hash_size;		/* bytes in hash_location */
+	__u8 *hash_location;	/* temporary pointer, overloaded */
+	__u32 tsval, tsecr;	/* need to include OPTION_TS */
+	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
+};
+
 /*These are used to set the sack_ok field in struct tcp_options_received */
 #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
 #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
@@ -384,6 +395,8 @@ struct tcp_sock {
 	 */
 	struct request_sock *fastopen_rsk;
 	u32	*saved_syn;
+	const struct tcp_operational_ops	*op_ops;
+	const struct tcp_state_ops		*state_ops;
 };
 
 enum tsq_enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 92b06c6..8bc724f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -250,6 +250,17 @@ extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
 extern unsigned long tcp_memory_pressure;
 
+enum tcp_synack_type {
+	TCP_SYNACK_NORMAL,
+	TCP_SYNACK_FASTOPEN,
+	TCP_SYNACK_COOKIE,
+};
+
+enum tcp_queue {
+	TCP_FRAG_IN_WRITE_QUEUE,
+	TCP_FRAG_IN_RTX_QUEUE,
+};
+
 /* optimized version of sk_under_memory_pressure() for TCP sockets */
 static inline bool tcp_under_memory_pressure(const struct sock *sk)
 {
@@ -369,6 +380,18 @@ enum tcp_tw_status {
 	TCP_TW_SYN = 3
 };
 
+struct tcp_sacktag_state {
+	u32     reord;
+	/* Timestamps for earliest and latest never-retransmitted segment
+	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
+	 * but congestion control should still get an accurate delay signal.
+	 */
+	u64     first_sackt;
+	u64     last_sackt;
+	struct rate_sample *rate;
+	int     flag;
+	unsigned int mss_now;
+};
 
 enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
 					      struct sk_buff *skb,
@@ -427,11 +450,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int tcp_connect(struct sock *sk);
-enum tcp_synack_type {
-	TCP_SYNACK_NORMAL,
-	TCP_SYNACK_FASTOPEN,
-	TCP_SYNACK_COOKIE,
-};
 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
 				struct tcp_fastopen_cookie *foc,
@@ -522,10 +540,6 @@ void tcp_xmit_retransmit_queue(struct sock *);
 void tcp_simple_retransmit(struct sock *);
 void tcp_enter_recovery(struct sock *sk, bool ece_ack);
 int tcp_trim_head(struct sock *, struct sk_buff *, u32);
-enum tcp_queue {
-	TCP_FRAG_IN_WRITE_QUEUE,
-	TCP_FRAG_IN_RTX_QUEUE,
-};
 int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 		 struct sk_buff *skb, u32 len,
 		 unsigned int mss_now, gfp_t gfp);
@@ -1822,6 +1836,103 @@ struct tcp_sock_af_ops {
 #endif
 };
 
+/* TCP operational functions */
+struct tcp_operational_ops {
+	u32 (*__select_window)(struct sock *sk);
+	u16 (*select_window)(struct sock *sk);
+	void (*select_initial_window)(const struct sock *sk,
+				      int __space, __u32 mss, __u32 *rcv_wnd,
+				      __u32 *window_clamp, int wscale_ok,
+				      __u8 *rcv_wscale, __u32 init_rcv_wnd);
+	int (*select_size)(const struct sock *sk, bool sg,
+			   bool first_skb, bool zc);
+	void (*init_buffer_space)(struct sock *sk);
+	void (*set_rto)(struct sock *sk);
+	bool (*should_expand_sndbuf)(const struct sock *sk);
+	void (*send_fin)(struct sock *sk);
+	bool (*write_xmit)(struct sock *sk, unsigned int mss_now, int nonagle,
+			   int push_one, gfp_t gfp);
+	void (*send_active_reset)(struct sock *sk, gfp_t priority);
+	int (*write_wakeup)(struct sock *sk, int mib);
+	bool (*prune_ofo_queue)(struct sock *sk);
+	void (*retransmit_timer)(struct sock *sk);
+	void (*time_wait)(struct sock *sk, int state, int timeo);
+	void (*cleanup_rbuf)(struct sock *sk, int copied);
+	void (*cwnd_validate)(struct sock *sk, bool is_cwnd_limited);
+	void (*sndbuf_expand)(struct sock *sk);
+	struct sk_buff *(*shift_skb_data)(struct sock *sk, struct sk_buff *skb,
+					  struct tcp_sacktag_state *state,
+					  u32 start_seq, u32 end_seq,
+					  bool dup_sack);
+	void (*grow_window)(struct sock *sk, const struct sk_buff *skb);
+	bool (*check_rtt)(struct sock *sk);
+	bool (*try_coalesce)(struct sock *sk, struct sk_buff *to,
+			     struct sk_buff *from,
+			     bool *fragstolen);
+	int (*try_rmem_schedule)(struct sock *sk, struct sk_buff *skb,
+				 unsigned int size);
+	struct sk_buff *(*collapse_one)(struct sock *sk, struct sk_buff *skb,
+					struct sk_buff_head *list,
+					struct rb_root *root);
+	void (*urg)(struct sock *sk, struct sk_buff *skb,
+		    const struct tcphdr *th);
+	int (*trim_head)(struct sock *sk, struct sk_buff *skb, u32 len);
+	/* MPTCP wants certain skb's that may be dropped, to process options */
+	void (*ack_only)(struct sock *sk, struct sk_buff *skb);
+	bool (*can_send)(struct sock *sk);
+	int (*rx)(struct sock *sk, struct sk_buff *skb, int refcounted);
+	unsigned int (*synack_options_size)(const struct sock *sk,
+					    struct request_sock *req,
+					    unsigned int mss,
+					    struct sk_buff *skb,
+					    struct tcp_out_options *opts,
+					    const struct tcp_md5sig_key *md5,
+					    struct tcp_fastopen_cookie *foc);
+	int (*options_size)(struct sock *sk, struct sk_buff *skb,
+			    struct tcp_out_options *opts,
+			    struct tcp_md5sig_key **md5);
+	void (*options_write)(__be32 **ptr_ptr, struct tcp_sock *tp,
+			      struct tcp_out_options *opts,
+			      struct sk_buff *skb);
+	void (*connect_init)(struct sock *sk);
+	void (*sock_init)(struct sock *sk);
+	int (*send_mss)(struct sock *sk, int *size_goal, int flags);
+	int (*mtu_probe)(struct sock *sk);
+	void (*retrans_try_collapse)(struct sock *sk, struct sk_buff *to,
+				     int space);
+	bool (*fastopen_synack)(struct sock *sk, struct sk_buff *synack,
+				struct tcp_fastopen_cookie *cookie);
+	int (*sendpage_locked)(struct sock *sk, struct page *page, int offset,
+			       size_t size, int flags);
+	struct request_sock *(*cookie_req_alloc)(struct sock *sk,
+						 struct sk_buff *skb,
+						 struct tcp_options_received *
+						 tcp_opts,
+						 __u32 cookie, int mss);
+	struct sock *(*get_cookie_sock)(struct sock *sk, struct sk_buff *skb,
+					struct request_sock *req,
+					struct dst_entry *dst, u32 tsoff);
+};
+
+/*
+ * Pointers to state specific TCP functions
+ */
+struct tcp_state_ops {
+	int (*rcv_state_process)(struct sock *sk, struct sk_buff *skb);
+	int (*close)(struct sock *sk, struct sk_buff *skb);
+	int (*synsent)(struct sock *sk, struct sk_buff *skb,
+		       const struct tcphdr *th);
+	int (*listen)(struct sock *sk, struct sk_buff *skb);
+	int (*synrcv)(struct sock *sk, struct sk_buff *skb);
+	int (*finwait1)(struct sock *sk, struct sk_buff *skb);
+	int (*finwait2)(struct sock *sk, struct sk_buff *skb);
+	int (*established)(struct sock *sk, struct sk_buff *skb);
+	int (*closewait)(struct sock *sk, struct sk_buff *skb);
+};
+
+extern struct tcp_operational_ops *tcp_default_op_ops;
+extern struct tcp_state_ops *tcp_default_state_ops;
+
 struct tcp_request_sock_ops {
 	u16 mss_clamp;
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a6b48f6..be81be4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1093,19 +1093,6 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 	return dup_sack;
 }
 
-struct tcp_sacktag_state {
-	u32	reord;
-	/* Timestamps for earliest and latest never-retransmitted segment
-	 * that was SACKed. RTO needs the earliest RTT to stay conservative,
-	 * but congestion control should still get an accurate delay signal.
-	 */
-	u64	first_sackt;
-	u64	last_sackt;
-	struct rate_sample *rate;
-	int	flag;
-	unsigned int mss_now;
-};
-
 /* Check if skb is fully within the SACK block. In presence of GSO skbs,
  * the incoming SACK may not exactly match but we can find smaller MSS
  * aligned portion of it that matches. Therefore we might need to fragment
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e9f985e..777c8b5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -420,17 +420,6 @@ static void smc_options_write(__be32 *ptr, u16 *options)
 #endif
 }
 
-struct tcp_out_options {
-	u16 options;		/* bit field of OPTION_* */
-	u16 mss;		/* 0 to disable */
-	u8 ws;			/* window scale, 0 to disable */
-	u8 num_sack_blocks;	/* number of SACK blocks to include */
-	u8 hash_size;		/* bytes in hash_location */
-	__u8 *hash_location;	/* temporary pointer, overloaded */
-	__u32 tsval, tsecr;	/* need to include OPTION_TS */
-	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
-};
-
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-04-10  5:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-03-30 17:55 [MPTCP] [RFC 1/9] Modify tcp structures to support function pointers Krystad, Peter
  -- strict thread matches above, loose matches on Subject: below --
2018-04-10  5:19 Rao Shoaib
2018-04-09  4:41 Christoph Paasch
2018-03-30 18:37 Rao Shoaib
2018-03-30 17:32 Rao Shoaib
2018-03-27  9:36 Christoph Paasch
2018-02-22 23:49 rao.shoaib

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.