netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
@ 2016-03-04 23:57 Martin KaFai Lau
  2016-03-05  0:33 ` Eric Dumazet
  0 siblings, 1 reply; 4+ messages in thread
From: Martin KaFai Lau @ 2016-03-04 23:57 UTC (permalink / raw)
  To: netdev
  Cc: Kernel Team, Chris Rapier, Eric Dumazet, Marcelo Ricardo Leitner,
	Neal Cardwell, Yuchung Cheng

Per RFC4898, they count segments sent/received
containing a positive length data segment (that includes
retransmission segments carrying data).  Unlike
tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments
carrying no data (e.g. pure ack).

The patch also updates the segs_in in tcp_fastopen_add_skb()
so that segs_in >= data_segs_in property is kept.

Together with retransmission data, tcpi_data_segs_out
gives a better signal on the rxmit rate.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Marcelo Ricardo Leitner <mleitner@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
---
 include/linux/tcp.h      | 6 ++++++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_fastopen.c  | 4 ++++
 net/ipv4/tcp_ipv4.c      | 7 ++++++-
 net/ipv4/tcp_output.c    | 4 +++-
 net/ipv6/tcp_ipv6.c      | 7 ++++++-
 7 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcbf51d..7be9b12 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -158,6 +158,9 @@ struct tcp_sock {
 	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
 				 * total number of segments in.
 				 */
+	u32	data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
+				 * total number of data segments in.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
@@ -165,6 +168,9 @@ struct tcp_sock {
 	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
 				 * The total number of segments sent.
 				 */
+	u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
+				 * total number of data segments sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index fe95446..53e8e3f 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -199,6 +199,8 @@ struct tcp_info {
 
 	__u32	tcpi_notsent_bytes;
 	__u32	tcpi_min_rtt;
+	__u32	tcpi_data_segs_in;	/* RFC4898 tcpEStatsDataSegsIn */
+	__u32	tcpi_data_segs_out;	/* RFC4898 tcpEStatsDataSegsOut */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f9faadb..6b01b48 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2728,6 +2728,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_notsent_bytes = max(0, notsent_bytes);
 
 	info->tcpi_min_rtt = tcp_min_rtt(tp);
+	info->tcpi_data_segs_in = tp->data_segs_in;
+	info->tcpi_data_segs_out = tp->data_segs_out;
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdb286d..f583c85 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -131,6 +131,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u16 segs_in;
 
 	if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
 		return;
@@ -154,6 +155,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
 	 * as we certainly are not changing upper 32bit value (0)
 	 */
 	tp->bytes_received = skb->len;
+	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tp->segs_in = segs_in;
+	tp->data_segs_in = segs_in;
 
 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 		tcp_fin(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4c8d58d..c9b576f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1540,6 +1540,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	const struct iphdr *iph;
 	const struct tcphdr *th;
 	struct sock *sk;
+	u16 segs_in;
 	int ret;
 	struct net *net = dev_net(skb->dev);
 
@@ -1650,7 +1651,11 @@ process:
 	sk_incoming_cpu_update(sk);
 
 	bh_lock_sock_nested(sk);
-	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tcp_sk(sk)->segs_in += segs_in;
+	if (skb->len > __tcp_hdrlen(th))
+		tcp_sk(sk)->data_segs_in += segs_in;
+
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d2c7a4..7d2dc01 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
-	if (skb->len != tcp_header_size)
+	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
+		tp->data_segs_out += tcp_skb_pcount(skb);
+	}
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33f2820..941a8a5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1349,6 +1349,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
 	struct sock *sk;
+	u16 segs_in;
 	int ret;
 	struct net *net = dev_net(skb->dev);
 
@@ -1443,7 +1444,11 @@ process:
 	sk_incoming_cpu_update(sk);
 
 	bh_lock_sock_nested(sk);
-	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+	tcp_sk(sk)->segs_in += segs_in;
+	if (skb->len > __tcp_hdrlen(th))
+		tcp_sk(sk)->data_segs_in += segs_in;
+
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
  2016-03-04 23:57 [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Martin KaFai Lau
@ 2016-03-05  0:33 ` Eric Dumazet
  2016-03-06 17:29   ` [PATCH net] tcp: fix tcpi_segs_in after connection establishment Eric Dumazet
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2016-03-05  0:33 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: netdev, Kernel Team, Chris Rapier, Eric Dumazet,
	Marcelo Ricardo Leitner, Neal Cardwell, Yuchung Cheng

On ven., 2016-03-04 at 15:57 -0800, Martin KaFai Lau wrote:

> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 4c8d58d..c9b576f 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1540,6 +1540,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
>  	const struct iphdr *iph;
>  	const struct tcphdr *th;
>  	struct sock *sk;
> +	u16 segs_in;
>  	int ret;
>  	struct net *net = dev_net(skb->dev);
>  
> @@ -1650,7 +1651,11 @@ process:
>  	sk_incoming_cpu_update(sk);
>  
>  	bh_lock_sock_nested(sk);
> -	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> +	segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> +	tcp_sk(sk)->segs_in += segs_in;
> +	if (skb->len > __tcp_hdrlen(th))
> +		tcp_sk(sk)->data_segs_in += segs_in;


It looks like we have a bug if the segment comes for a SYN_RECV request
socket.  (It happens if the ACK packet of the 3WHS was lost)

We do not enter this path in this case.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH net] tcp: fix tcpi_segs_in after connection establishment
  2016-03-05  0:33 ` Eric Dumazet
@ 2016-03-06 17:29   ` Eric Dumazet
  2016-03-07 20:47     ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2016-03-06 17:29 UTC (permalink / raw)
  To: Martin KaFai Lau, David Miller
  Cc: netdev, Kernel Team, Chris Rapier, Eric Dumazet,
	Marcelo Ricardo Leitner, Neal Cardwell, Yuchung Cheng

From: Eric Dumazet <edumazet@google.com>

If final packet (ACK) of 3WHS is lost, it appears we do not properly
account the following incoming segment into tcpi_segs_in

While we are at it, starts segs_in with one, to count the SYN packet.

We do not yet count number of SYN we received for a request sock, we
might add this someday.

packetdrill script showing proper behavior after fix :

// Tests tcpi_segs_in when 3rd packet (ACK) of 3WHS is lost
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+.020 < P. 1:1001(1000) ack 1 win 32792

   +0 accept(3, ..., ...) = 4

+.000 %{ assert tcpi_segs_in == 2, 'tcpi_segs_in=%d' % tcpi_segs_in }%

Fixes: 2efd055c53c06 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp_minisocks.c |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a925824..9b02af2139d3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->segs_in = 0;
+		newtp->segs_in = 1;
 
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -815,6 +815,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int ret = 0;
 	int state = child->sk_state;
 
+	tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	if (!sock_owned_by_user(child)) {
 		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net] tcp: fix tcpi_segs_in after connection establishment
  2016-03-06 17:29   ` [PATCH net] tcp: fix tcpi_segs_in after connection establishment Eric Dumazet
@ 2016-03-07 20:47     ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2016-03-07 20:47 UTC (permalink / raw)
  To: eric.dumazet
  Cc: kafai, netdev, kernel-team, rapier, edumazet, mleitner, ncardwell,
	ycheng

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 06 Mar 2016 09:29:21 -0800

> From: Eric Dumazet <edumazet@google.com>
> 
> If final packet (ACK) of 3WHS is lost, it appears we do not properly
> account the following incoming segment into tcpi_segs_in
> 
> While we are at it, starts segs_in with one, to count the SYN packet.
> 
> We do not yet count number of SYN we received for a request sock, we
> might add this someday.
> 
> packetdrill script showing proper behavior after fix :
> 
> // Tests tcpi_segs_in when 3rd packet (ACK) of 3WHS is lost
> 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
>    +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
>    +0 bind(3, ..., ...) = 0
>    +0 listen(3, 1) = 0
> 
>    +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
>    +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
> +.020 < P. 1:1001(1000) ack 1 win 32792
> 
>    +0 accept(3, ..., ...) = 4
> 
> +.000 %{ assert tcpi_segs_in == 2, 'tcpi_segs_in=%d' % tcpi_segs_in }%
> 
> Fixes: 2efd055c53c06 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info")
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-03-07 20:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-04 23:57 [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Martin KaFai Lau
2016-03-05  0:33 ` Eric Dumazet
2016-03-06 17:29   ` [PATCH net] tcp: fix tcpi_segs_in after connection establishment Eric Dumazet
2016-03-07 20:47     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).