Netdev List
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: David Miller <davem@davemloft.net>
Cc: netdev <netdev@vger.kernel.org>,
	"Neal Cardwell" <ncardwell@google.com>,
	"Jeff Kirsher" <jeffrey.t.kirsher@intel.com>,
	"Tom Herbert" <therbert@google.com>,
	"Herbert Xu" <herbert@gondor.apana.org.au>,
	"Ben Hutchings" <bhutchings@solarflare.com>,
	"Matt Carlson" <mcarlson@broadcom.com>,
	"Michael Chan" <mchan@broadcom.com>,
	"Maciej Żenczykowski" <maze@google.com>,
	"Ilpo Järvinen" <ilpo.jarvinen@helsinki.fi>
Subject: [PATCH 4/4 net-next] tcp: makes tcp_try_coalesce aware of skb->head_frag
Date: Fri, 27 Apr 2012 12:38:33 +0200	[thread overview]
Message-ID: <1335523113.2775.239.camel@edumazet-glaptop> (raw)

From: Eric Dumazet <edumazet@google.com>

TCP coalesce can check if skb to be merged has its skb->head mapped to a
page fragment, instead of a kmalloc() area.

We had to disable coalescing in this case, for performance reasons.

We 'upgrade' skb->head as a fragment in itself.

This reduces number of cache misses when user makes its copies, since a
less sk_buff are fetched.

This makes receive and ofo queues shorter and thus reduce cache line
misses in TCP stack.

This is a followup of patch "net: allow skb->head to be a page fragment"

Tested with tg3 nic, with GRO on or off. We can see "TCPRcvCoalesce"
counter being incremented.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Matt Carlson <mcarlson@broadcom.com>
Cc: Michael Chan <mchan@broadcom.com>
---
 net/ipv4/tcp_input.c |   55 ++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c93b0cb..96a631d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4464,10 +4464,12 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
  */
 static bool tcp_try_coalesce(struct sock *sk,
 			     struct sk_buff *to,
-			     struct sk_buff *from)
+			     struct sk_buff *from,
+			     bool *fragstolen)
 {
-	int len = from->len;
+	int delta, len = from->len;
 
+	*fragstolen = false;
 	if (tcp_hdr(from)->fin)
 		return false;
 	if (len <= skb_tailroom(to)) {
@@ -4478,15 +4480,19 @@ merge:
 		TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
 		return true;
 	}
+
+	if (skb_has_frag_list(to) || skb_has_frag_list(from))
+		return false;
+
 	if (skb_headlen(from) == 0 &&
-	    !skb_has_frag_list(to) &&
-	    !skb_has_frag_list(from) &&
 	    (skb_shinfo(to)->nr_frags +
 	     skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) {
-		int delta = from->truesize - ksize(from->head) -
-			    SKB_DATA_ALIGN(sizeof(struct sk_buff));
+		WARN_ON_ONCE(from->head_frag);
+		delta = from->truesize - ksize(from->head) -
+			SKB_DATA_ALIGN(sizeof(struct sk_buff));
 
 		WARN_ON_ONCE(delta < len);
+copyfrags:
 		memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
 		       skb_shinfo(from)->frags,
 		       skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
@@ -4499,6 +4505,20 @@ merge:
 		to->data_len += len;
 		goto merge;
 	}
+	if (from->head_frag) {
+		struct page *page;
+		unsigned int offset;
+
+		if (skb_shinfo(to)->nr_frags + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+			return false;
+		page = virt_to_head_page(from->head);
+		offset = from->data - (unsigned char *)page_address(page);
+		skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+				   page, offset, skb_headlen(from));
+		*fragstolen = true;
+		delta = len; /* we dont know real truesize... */
+		goto copyfrags;
+	}
 	return false;
 }
 
@@ -4540,10 +4560,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	end_seq = TCP_SKB_CB(skb)->end_seq;
 
 	if (seq == TCP_SKB_CB(skb1)->end_seq) {
-		if (!tcp_try_coalesce(sk, skb1, skb)) {
+		bool fragstolen;
+
+		if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
 			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
 		} else {
-			__kfree_skb(skb);
+			if (fragstolen)
+				kmem_cache_free(skbuff_head_cache, skb);
+			else
+				__kfree_skb(skb);
 			skb = NULL;
 		}
 
@@ -4626,6 +4651,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int eaten = -1;
+	bool fragstolen = false;
 
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
@@ -4672,7 +4698,9 @@ queue_and_out:
 				goto drop;
 
 			tail = skb_peek_tail(&sk->sk_receive_queue);
-			eaten = (tail && tcp_try_coalesce(sk, tail, skb)) ? 1 : 0;
+			eaten = (tail &&
+				 tcp_try_coalesce(sk, tail, skb,
+						  &fragstolen)) ? 1 : 0;
 			if (eaten <= 0) {
 				skb_set_owner_r(skb, sk);
 				__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -4699,9 +4727,12 @@ queue_and_out:
 
 		tcp_fast_path_check(sk);
 
-		if (eaten > 0)
-			__kfree_skb(skb);
-		else if (!sock_flag(sk, SOCK_DEAD))
+		if (eaten > 0) {
+			if (fragstolen)
+				kmem_cache_free(skbuff_head_cache, skb);
+			else
+				__kfree_skb(skb);
+		} else if (!sock_flag(sk, SOCK_DEAD))
 			sk->sk_data_ready(sk, 0);
 		return;
 	}

             reply	other threads:[~2012-04-27 10:38 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-27 10:38 Eric Dumazet [this message]
2012-05-01  1:48 ` [PATCH 4/4 net-next] tcp: makes tcp_try_coalesce aware of skb->head_frag David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1335523113.2775.239.camel@edumazet-glaptop \
    --to=eric.dumazet@gmail.com \
    --cc=bhutchings@solarflare.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=ilpo.jarvinen@helsinki.fi \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=maze@google.com \
    --cc=mcarlson@broadcom.com \
    --cc=mchan@broadcom.com \
    --cc=ncardwell@google.com \
    --cc=netdev@vger.kernel.org \
    --cc=therbert@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox