* [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
@ 2016-03-04 23:57 Martin KaFai Lau
2016-03-05 0:33 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Martin KaFai Lau @ 2016-03-04 23:57 UTC (permalink / raw)
To: netdev
Cc: Kernel Team, Chris Rapier, Eric Dumazet, Marcelo Ricardo Leitner,
Neal Cardwell, Yuchung Cheng
Per RFC4898, they count segments sent/received
containing a positive length data segment (that includes
retransmission segments carrying data). Unlike
tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments
carrying no data (e.g. pure ack).
The patch also updates the segs_in in tcp_fastopen_add_skb()
so that segs_in >= data_segs_in property is kept.
Together with retransmission data, tcpi_data_segs_out
gives a better signal on the rxmit rate.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Marcelo Ricardo Leitner <mleitner@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
---
include/linux/tcp.h | 6 ++++++
include/uapi/linux/tcp.h | 2 ++
net/ipv4/tcp.c | 2 ++
net/ipv4/tcp_fastopen.c | 4 ++++
net/ipv4/tcp_ipv4.c | 7 ++++++-
net/ipv4/tcp_output.c | 4 +++-
net/ipv6/tcp_ipv6.c | 7 ++++++-
7 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcbf51d..7be9b12 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -158,6 +158,9 @@ struct tcp_sock {
u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
* total number of segments in.
*/
+ u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
+ */
u32 rcv_nxt; /* What we want to receive next */
u32 copied_seq; /* Head of yet unread data */
u32 rcv_wup; /* rcv_nxt on last window update sent */
@@ -165,6 +168,9 @@ struct tcp_sock {
u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
* The total number of segments sent.
*/
+ u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index fe95446..53e8e3f 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -199,6 +199,8 @@ struct tcp_info {
__u32 tcpi_notsent_bytes;
__u32 tcpi_min_rtt;
+ __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
+ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
};
/* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f9faadb..6b01b48 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2728,6 +2728,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_notsent_bytes = max(0, notsent_bytes);
info->tcpi_min_rtt = tcp_min_rtt(tp);
+ info->tcpi_data_segs_in = tp->data_segs_in;
+ info->tcpi_data_segs_out = tp->data_segs_out;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdb286d..f583c85 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -131,6 +131,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u16 segs_in;
if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
return;
@@ -154,6 +155,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
* as we certainly are not changing upper 32bit value (0)
*/
tp->bytes_received = skb->len;
+ segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tp->segs_in = segs_in;
+ tp->data_segs_in = segs_in;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4c8d58d..c9b576f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1540,6 +1540,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
+ u16 segs_in;
int ret;
struct net *net = dev_net(skb->dev);
@@ -1650,7 +1651,11 @@ process:
sk_incoming_cpu_update(sk);
bh_lock_sock_nested(sk);
- tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tcp_sk(sk)->segs_in += segs_in;
+ if (skb->len > __tcp_hdrlen(th))
+ tcp_sk(sk)->data_segs_in += segs_in;
+
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d2c7a4..7d2dc01 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
- if (skb->len != tcp_header_size)
+ if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
+ tp->data_segs_out += tcp_skb_pcount(skb);
+ }
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33f2820..941a8a5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1349,6 +1349,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
const struct tcphdr *th;
const struct ipv6hdr *hdr;
struct sock *sk;
+ u16 segs_in;
int ret;
struct net *net = dev_net(skb->dev);
@@ -1443,7 +1444,11 @@ process:
sk_incoming_cpu_update(sk);
bh_lock_sock_nested(sk);
- tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tcp_sk(sk)->segs_in += segs_in;
+ if (skb->len > __tcp_hdrlen(th))
+ tcp_sk(sk)->data_segs_in += segs_in;
+
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
--
2.5.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
2016-03-04 23:57 [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Martin KaFai Lau
@ 2016-03-05 0:33 ` Eric Dumazet
2016-03-06 17:29 ` [PATCH net] tcp: fix tcpi_segs_in after connection establishment Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2016-03-05 0:33 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: netdev, Kernel Team, Chris Rapier, Eric Dumazet,
Marcelo Ricardo Leitner, Neal Cardwell, Yuchung Cheng
On ven., 2016-03-04 at 15:57 -0800, Martin KaFai Lau wrote:
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 4c8d58d..c9b576f 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1540,6 +1540,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
> const struct iphdr *iph;
> const struct tcphdr *th;
> struct sock *sk;
> + u16 segs_in;
> int ret;
> struct net *net = dev_net(skb->dev);
>
> @@ -1650,7 +1651,11 @@ process:
> sk_incoming_cpu_update(sk);
>
> bh_lock_sock_nested(sk);
> - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + tcp_sk(sk)->segs_in += segs_in;
> + if (skb->len > __tcp_hdrlen(th))
> + tcp_sk(sk)->data_segs_in += segs_in;
It looks like we have a bug if the segment comes for a SYN_RECV request
socket. (It happens if the ACK packet of the 3WHS was lost)
We do not enter this path in this case.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH net] tcp: fix tcpi_segs_in after connection establishment
2016-03-05 0:33 ` Eric Dumazet
@ 2016-03-06 17:29 ` Eric Dumazet
2016-03-07 20:47 ` David Miller
0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2016-03-06 17:29 UTC (permalink / raw)
To: Martin KaFai Lau, David Miller
Cc: netdev, Kernel Team, Chris Rapier, Eric Dumazet,
Marcelo Ricardo Leitner, Neal Cardwell, Yuchung Cheng
From: Eric Dumazet <edumazet@google.com>
If final packet (ACK) of 3WHS is lost, it appears we do not properly
account the following incoming segment into tcpi_segs_in
While we are at it, starts segs_in with one, to count the SYN packet.
We do not yet count number of SYN we received for a request sock, we
might add this someday.
packetdrill script showing proper behavior after fix :
// Tests tcpi_segs_in when 3rd packet (ACK) of 3WHS is lost
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+.020 < P. 1:1001(1000) ack 1 win 32792
+0 accept(3, ..., ...) = 4
+.000 %{ assert tcpi_segs_in == 2, 'tcpi_segs_in=%d' % tcpi_segs_in }%
Fixes: 2efd055c53c06 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
net/ipv4/tcp_minisocks.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75632a925824..9b02af2139d3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -455,7 +455,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->segs_in = 0;
+ newtp->segs_in = 1;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -815,6 +815,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net] tcp: fix tcpi_segs_in after connection establishment
2016-03-06 17:29 ` [PATCH net] tcp: fix tcpi_segs_in after connection establishment Eric Dumazet
@ 2016-03-07 20:47 ` David Miller
0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2016-03-07 20:47 UTC (permalink / raw)
To: eric.dumazet
Cc: kafai, netdev, kernel-team, rapier, edumazet, mleitner, ncardwell,
ycheng
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 06 Mar 2016 09:29:21 -0800
> From: Eric Dumazet <edumazet@google.com>
>
> If final packet (ACK) of 3WHS is lost, it appears we do not properly
> account the following incoming segment into tcpi_segs_in
>
> While we are at it, starts segs_in with one, to count the SYN packet.
>
> We do not yet count number of SYN we received for a request sock, we
> might add this someday.
>
> packetdrill script showing proper behavior after fix :
>
> // Tests tcpi_segs_in when 3rd packet (ACK) of 3WHS is lost
> 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
> +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
> +0 bind(3, ..., ...) = 0
> +0 listen(3, 1) = 0
>
> +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
> +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
> +.020 < P. 1:1001(1000) ack 1 win 32792
>
> +0 accept(3, ..., ...) = 4
>
> +.000 %{ assert tcpi_segs_in == 2, 'tcpi_segs_in=%d' % tcpi_segs_in }%
>
> Fixes: 2efd055c53c06 ("tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
Applied and queued up for -stable, thanks.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-03-07 20:47 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-04 23:57 [RFC PATCH net-next] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Martin KaFai Lau
2016-03-05 0:33 ` Eric Dumazet
2016-03-06 17:29 ` [PATCH net] tcp: fix tcpi_segs_in after connection establishment Eric Dumazet
2016-03-07 20:47 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).