All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arjun Roy <arjunroy.kdev@gmail.com>
To: davem@davemloft.net, netdev@vger.kernel.org,
	akpm@linux-foundation.org, linux-mm@kvack.org
Cc: arjunroy@google.com, Eric Dumazet <edumazet@google.com>,
	Soheil Hassas Yeganeh <soheil@google.com>
Subject: [PATCH resend mm,net-next 3/3] net-zerocopy: Use vm_insert_pages() for tcp rcv zerocopy.
Date: Mon, 27 Jan 2020 18:59:58 -0800	[thread overview]
Message-ID: <20200128025958.43490-3-arjunroy.kdev@gmail.com> (raw)
In-Reply-To: <20200128025958.43490-1-arjunroy.kdev@gmail.com>

From: Arjun Roy <arjunroy@google.com>

Use vm_insert_pages() for tcp receive zerocopy. Spin lock cycles
(as reported by perf) drop from a couple of percentage points
to a fraction of a percent. This results in a roughly 6% increase in
efficiency, measured roughly as zerocopy receive count divided by CPU
utilization.

The intention of this patch-set is to reduce atomic ops for
tcp zerocopy receives, which normally hits the same spinlock multiple
times consecutively.

Signed-off-by: Arjun Roy <arjunroy@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>

---
 net/ipv4/tcp.c | 67 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 61 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34490d972758..52f96c3ceab3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1861,14 +1861,48 @@ int tcp_mmap(struct file *file, struct socket *sock,
 }
 EXPORT_SYMBOL(tcp_mmap);
 
+static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
+					struct page **pages,
+					unsigned long pages_to_map,
+					unsigned long *insert_addr,
+					u32 *length_with_pending,
+					u32 *seq,
+					struct tcp_zerocopy_receive *zc)
+{
+	unsigned long pages_remaining = pages_to_map;
+	int bytes_mapped;
+	int ret;
+
+	ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
+	bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
+	/* Even if vm_insert_pages fails, it may have partially succeeded in
+	 * mapping (some but not all of the pages).
+	 */
+	*seq += bytes_mapped;
+	*insert_addr += bytes_mapped;
+	if (ret) {
+		/* But if vm_insert_pages did fail, we have to unroll some state
+		 * we speculatively touched before.
+		 */
+		const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
+		*length_with_pending -= bytes_not_mapped;
+		zc->recv_skip_hint += bytes_not_mapped;
+	}
+	return ret;
+}
+
 static int tcp_zerocopy_receive(struct sock *sk,
 				struct tcp_zerocopy_receive *zc)
 {
 	unsigned long address = (unsigned long)zc->address;
 	u32 length = 0, seq, offset, zap_len;
+	#define PAGE_BATCH_SIZE 8
+	struct page *pages[PAGE_BATCH_SIZE];
 	const skb_frag_t *frags = NULL;
 	struct vm_area_struct *vma;
 	struct sk_buff *skb = NULL;
+	unsigned long pg_idx = 0;
+	unsigned long curr_addr;
 	struct tcp_sock *tp;
 	int inq;
 	int ret;
@@ -1901,15 +1935,26 @@ static int tcp_zerocopy_receive(struct sock *sk,
 		zc->recv_skip_hint = zc->length;
 	}
 	ret = 0;
+	curr_addr = address;
 	while (length + PAGE_SIZE <= zc->length) {
 		if (zc->recv_skip_hint < PAGE_SIZE) {
+			/* If we're here, finish the current batch. */
+			if (pg_idx) {
+				ret = tcp_zerocopy_vm_insert_batch(vma, pages,
+								   pg_idx,
+								   &curr_addr,
+								   &length,
+								   &seq, zc);
+				if (ret)
+					goto out;
+				pg_idx = 0;
+			}
 			if (skb) {
 				skb = skb->next;
 				offset = seq - TCP_SKB_CB(skb)->seq;
 			} else {
 				skb = tcp_recv_skb(sk, seq, &offset);
 			}
-
 			zc->recv_skip_hint = skb->len - offset;
 			offset -= skb_headlen(skb);
 			if ((int)offset < 0 || skb_has_frag_list(skb))
@@ -1933,14 +1978,24 @@ static int tcp_zerocopy_receive(struct sock *sk,
 			zc->recv_skip_hint -= remaining;
 			break;
 		}
-		ret = vm_insert_page(vma, address + length,
-				     skb_frag_page(frags));
-		if (ret)
-			break;
+		pages[pg_idx] = skb_frag_page(frags);
+		pg_idx++;
 		length += PAGE_SIZE;
-		seq += PAGE_SIZE;
 		zc->recv_skip_hint -= PAGE_SIZE;
 		frags++;
+		if (pg_idx == PAGE_BATCH_SIZE) {
+			ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
+							   &curr_addr, &length,
+							   &seq, zc);
+			if (ret)
+				goto out;
+			pg_idx = 0;
+		}
+	}
+	if (pg_idx) {
+		ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
+						   &curr_addr, &length, &seq,
+						   zc);
 	}
 out:
 	up_read(&current->mm->mmap_sem);
-- 
2.25.0.rc1.283.g88dfdc4193-goog



  parent reply	other threads:[~2020-01-28  3:00 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-01-28  2:59 [PATCH resend mm,net-next 1/3] mm: Refactor insert_page to prepare for batched-lock insert Arjun Roy
2020-01-28  2:59 ` [PATCH resend mm,net-next 2/3] mm: Add vm_insert_pages() Arjun Roy
2020-02-13  2:41   ` Andrew Morton
2020-02-13 17:09     ` Arjun Roy
2020-02-13 21:37     ` Linus Torvalds
2020-02-13 21:54   ` Matthew Wilcox
2020-02-13 22:06     ` Arjun Roy
2020-01-28  2:59 ` Arjun Roy [this message]
2020-02-13  2:56   ` [PATCH resend mm,net-next 3/3] net-zerocopy: Use vm_insert_pages() for tcp rcv zerocopy Andrew Morton
2020-02-17  2:49     ` Arjun Roy
2020-02-21 21:21       ` Arjun Roy
2020-02-24  3:37         ` Andrew Morton
2020-02-24 16:19           ` Arjun Roy
2020-04-10 19:04         ` Andrew Morton
2020-04-10 19:13           ` Arjun Roy
2020-04-10 19:15           ` Arjun Roy
2020-02-13  2:41 ` [PATCH resend mm,net-next 1/3] mm: Refactor insert_page to prepare for batched-lock insert Andrew Morton
2020-02-13 16:52   ` Arjun Roy
2020-02-13 16:55   ` Arjun Roy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200128025958.43490-3-arjunroy.kdev@gmail.com \
    --to=arjunroy.kdev@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=arjunroy@google.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=soheil@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.