* [RFC PATCH net-next v2] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
@ 2016-03-08 2:01 Martin KaFai Lau
2016-03-08 19:41 ` Marcelo Ricardo Leitner
0 siblings, 1 reply; 3+ messages in thread
From: Martin KaFai Lau @ 2016-03-08 2:01 UTC (permalink / raw)
To: netdev
Cc: Kernel Team, Chris Rapier, Eric Dumazet, Marcelo Ricardo Leitner,
Neal Cardwell, Yuchung Cheng
v2:
Rework based on recent fix by Eric:
commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment")
v1:
Per RFC4898, they count segments sent/received
containing a positive length data segment (that includes
retransmission segments carrying data). Unlike
tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments
carrying no data (e.g. pure ack).
The patch also updates the segs_in in tcp_fastopen_add_skb()
so that segs_in >= data_segs_in property is kept.
Together with retransmission data, tcpi_data_segs_out
gives a better signal on the rxmit rate.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Marcelo Ricardo Leitner <mleitner@redhat.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
---
include/linux/tcp.h | 6 ++++++
include/net/tcp.h | 10 ++++++++++
include/uapi/linux/tcp.h | 2 ++
net/ipv4/tcp.c | 2 ++
net/ipv4/tcp_fastopen.c | 4 ++++
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv4/tcp_minisocks.c | 2 +-
net/ipv4/tcp_output.c | 4 +++-
net/ipv6/tcp_ipv6.c | 2 +-
9 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcbf51d..7be9b12 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -158,6 +158,9 @@ struct tcp_sock {
u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
* total number of segments in.
*/
+ u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
+ */
u32 rcv_nxt; /* What we want to receive next */
u32 copied_seq; /* Head of yet unread data */
u32 rcv_wup; /* rcv_nxt on last window update sent */
@@ -165,6 +168,9 @@ struct tcp_sock {
u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
* The total number of segments sent.
*/
+ u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
* sum(delta(snd_una)), or how many bytes
* were acked.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e90db85..e2916cc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1816,4 +1816,14 @@ static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
skb->truesize = 2;
}
+static inline void tcp_segs_in(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ u16 segs_in;
+
+ segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tp->segs_in += segs_in;
+ if (skb->len > tcp_hdrlen(skb))
+ tp->data_segs_in += segs_in;
+}
+
#endif /* _TCP_H */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index fe95446..53e8e3f 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -199,6 +199,8 @@ struct tcp_info {
__u32 tcpi_notsent_bytes;
__u32 tcpi_min_rtt;
+ __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
+ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
};
/* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f9faadb..6b01b48 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2728,6 +2728,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_notsent_bytes = max(0, notsent_bytes);
info->tcpi_min_rtt = tcp_min_rtt(tp);
+ info->tcpi_data_segs_in = tp->data_segs_in;
+ info->tcpi_data_segs_out = tp->data_segs_out;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fdb286d..f583c85 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -131,6 +131,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u16 segs_in;
if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
return;
@@ -154,6 +155,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
* as we certainly are not changing upper 32bit value (0)
*/
tp->bytes_received = skb->len;
+ segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tp->segs_in = segs_in;
+ tp->data_segs_in = segs_in;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4c8d58d..0b02ef7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1650,7 +1650,7 @@ process:
sk_incoming_cpu_update(sk);
bh_lock_sock_nested(sk);
- tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ae90e4b..acb366d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
- tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tcp_segs_in(tcp_sk(child), skb);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7d2c7a4..7d2dc01 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
- if (skb->len != tcp_header_size)
+ if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
+ tp->data_segs_out += tcp_skb_pcount(skb);
+ }
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33f2820..9c16565 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1443,7 +1443,7 @@ process:
sk_incoming_cpu_update(sk);
bh_lock_sock_nested(sk);
- tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
--
2.5.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [RFC PATCH net-next v2] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
2016-03-08 2:01 [RFC PATCH net-next v2] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Martin KaFai Lau
@ 2016-03-08 19:41 ` Marcelo Ricardo Leitner
2016-03-08 20:24 ` David Miller
0 siblings, 1 reply; 3+ messages in thread
From: Marcelo Ricardo Leitner @ 2016-03-08 19:41 UTC (permalink / raw)
To: Martin KaFai Lau
Cc: netdev, Kernel Team, Chris Rapier, Eric Dumazet, Neal Cardwell,
Yuchung Cheng
On Mon, Mar 07, 2016 at 06:01:05PM -0800, Martin KaFai Lau wrote:
> v2:
> Rework based on recent fix by Eric:
> commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment")
>
> v1:
Patch itself looks good to me, just this patch history is better placed
on the Notes region (together with the diffstat) or place a scissors
mark, otherwise it will be present in the git changelog too.
> Per RFC4898, they count segments sent/received
> containing a positive length data segment (that includes
> retransmission segments carrying data). Unlike
> tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments
> carrying no data (e.g. pure ack).
>
> The patch also updates the segs_in in tcp_fastopen_add_skb()
> so that segs_in >= data_segs_in property is kept.
>
> Together with retransmission data, tcpi_data_segs_out
> gives a better signal on the rxmit rate.
>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> Cc: Chris Rapier <rapier@psc.edu>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Marcelo Ricardo Leitner <mleitner@redhat.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> ---
> include/linux/tcp.h | 6 ++++++
> include/net/tcp.h | 10 ++++++++++
> include/uapi/linux/tcp.h | 2 ++
> net/ipv4/tcp.c | 2 ++
> net/ipv4/tcp_fastopen.c | 4 ++++
> net/ipv4/tcp_ipv4.c | 2 +-
> net/ipv4/tcp_minisocks.c | 2 +-
> net/ipv4/tcp_output.c | 4 +++-
> net/ipv6/tcp_ipv6.c | 2 +-
> 9 files changed, 30 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index bcbf51d..7be9b12 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -158,6 +158,9 @@ struct tcp_sock {
> u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
> * total number of segments in.
> */
> + u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
> + * total number of data segments in.
> + */
> u32 rcv_nxt; /* What we want to receive next */
> u32 copied_seq; /* Head of yet unread data */
> u32 rcv_wup; /* rcv_nxt on last window update sent */
> @@ -165,6 +168,9 @@ struct tcp_sock {
> u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
> * The total number of segments sent.
> */
> + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
> + * total number of data segments sent.
> + */
> u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
> * sum(delta(snd_una)), or how many bytes
> * were acked.
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index e90db85..e2916cc 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1816,4 +1816,14 @@ static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
> skb->truesize = 2;
> }
>
> +static inline void tcp_segs_in(struct tcp_sock *tp, struct sk_buff *skb)
> +{
> + u16 segs_in;
> +
> + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + tp->segs_in += segs_in;
> + if (skb->len > tcp_hdrlen(skb))
> + tp->data_segs_in += segs_in;
> +}
> +
> #endif /* _TCP_H */
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index fe95446..53e8e3f 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -199,6 +199,8 @@ struct tcp_info {
>
> __u32 tcpi_notsent_bytes;
> __u32 tcpi_min_rtt;
> + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
> + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
> };
>
> /* for TCP_MD5SIG socket option */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index f9faadb..6b01b48 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2728,6 +2728,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
> info->tcpi_notsent_bytes = max(0, notsent_bytes);
>
> info->tcpi_min_rtt = tcp_min_rtt(tp);
> + info->tcpi_data_segs_in = tp->data_segs_in;
> + info->tcpi_data_segs_out = tp->data_segs_out;
> }
> EXPORT_SYMBOL_GPL(tcp_get_info);
>
> diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
> index fdb286d..f583c85 100644
> --- a/net/ipv4/tcp_fastopen.c
> +++ b/net/ipv4/tcp_fastopen.c
> @@ -131,6 +131,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
> void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> + u16 segs_in;
>
> if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
> return;
> @@ -154,6 +155,9 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
> * as we certainly are not changing upper 32bit value (0)
> */
> tp->bytes_received = skb->len;
> + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + tp->segs_in = segs_in;
> + tp->data_segs_in = segs_in;
>
> if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
> tcp_fin(sk);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 4c8d58d..0b02ef7 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1650,7 +1650,7 @@ process:
> sk_incoming_cpu_update(sk);
>
> bh_lock_sock_nested(sk);
> - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + tcp_segs_in(tcp_sk(sk), skb);
> ret = 0;
> if (!sock_owned_by_user(sk)) {
> if (!tcp_prequeue(sk, skb))
> diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
> index ae90e4b..acb366d 100644
> --- a/net/ipv4/tcp_minisocks.c
> +++ b/net/ipv4/tcp_minisocks.c
> @@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
> int ret = 0;
> int state = child->sk_state;
>
> - tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + tcp_segs_in(tcp_sk(child), skb);
> if (!sock_owned_by_user(child)) {
> ret = tcp_rcv_state_process(child, skb);
> /* Wakeup parent, send SIGIO */
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 7d2c7a4..7d2dc01 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
> if (likely(tcb->tcp_flags & TCPHDR_ACK))
> tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
>
> - if (skb->len != tcp_header_size)
> + if (skb->len != tcp_header_size) {
> tcp_event_data_sent(tp, sk);
> + tp->data_segs_out += tcp_skb_pcount(skb);
> + }
>
> if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
> TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 33f2820..9c16565 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1443,7 +1443,7 @@ process:
> sk_incoming_cpu_update(sk);
>
> bh_lock_sock_nested(sk);
> - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
> + tcp_segs_in(tcp_sk(sk), skb);
> ret = 0;
> if (!sock_owned_by_user(sk)) {
> if (!tcp_prequeue(sk, skb))
> --
> 2.5.1
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [RFC PATCH net-next v2] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In
2016-03-08 19:41 ` Marcelo Ricardo Leitner
@ 2016-03-08 20:24 ` David Miller
0 siblings, 0 replies; 3+ messages in thread
From: David Miller @ 2016-03-08 20:24 UTC (permalink / raw)
To: mleitner; +Cc: kafai, netdev, kernel-team, rapier, edumazet, ncardwell, ycheng
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Tue, 8 Mar 2016 16:41:54 -0300
> On Mon, Mar 07, 2016 at 06:01:05PM -0800, Martin KaFai Lau wrote:
>> v2:
>> Rework based on recent fix by Eric:
>> commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment")
>>
>> v1:
>
> Patch itself looks good to me, just this patch history is better placed
> on the Notes region (together with the diffstat) or place a scissors
> mark, otherwise it will be present in the git changelog too.
I like seeing it in the commit message.
Then later people can figure out answers to questions like "why didn't you
implement this like X" and see in the version history that this was
attempted first and then intentionally changed.
I even move version history into the commit message when people try to
move it out, so please don't do that.
This is important _especially_ for "0/N" header postings for a patch
series.
DO NOT LOSE INFORMATION.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-03-08 20:24 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-08 2:01 [RFC PATCH net-next v2] tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Martin KaFai Lau
2016-03-08 19:41 ` Marcelo Ricardo Leitner
2016-03-08 20:24 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).