public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] TCP Zero Copy for mmapped files
@ 2002-12-30  1:09 Thomas Ogrisegg
  2002-12-30  1:29 ` Larry McVoy
  0 siblings, 1 reply; 19+ messages in thread
From: Thomas Ogrisegg @ 2002-12-30  1:09 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 529 bytes --]

The following patch (for 2.4.20 -- should work with all kernels
above 2.4.17) implements TCP Zero Copy for normal (writing)
socket operations on memory mapped files.

This is a major speedup for the TCP/IP stack (depending on the size
of the file more than 100% more throughput) and makes sendfile(2)
nearly useless.

BTW: When I did a (loopback) benchmark against my very own HTTP-
Server it outperformed TUX by roughly 6%. With logging disabled
by roughly 20%.

Please CC any replies to me, as I'm not subscribed to this list.

[-- Attachment #2: tcp.diff --]
[-- Type: text/plain, Size: 2685 bytes --]

--- linux.old/net/ipv4/tcp.c	Fri Nov 29 00:53:15 2002
+++ linux-2.4.20/net/ipv4/tcp.c	Sun Dec 29 20:30:10 2002
@@ -204,6 +204,7 @@
  *		Andi Kleen 	:	Make poll agree with SIGIO
  *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
  *					lingertime == 0 (RFC 793 ABORT Call)
+ *	Thomas Ogrisegg		:	Added TCP Zero Copy for mmapped files
  *					
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -1006,6 +1007,41 @@
 	return tmp;
 }
 
+static ssize_t file_send_actor (read_descriptor_t *desc, struct page *page,
+	unsigned long offset, unsigned long size)
+{
+	ssize_t res;
+	unsigned long count = desc->count;
+	struct sock *sk = (struct sock *) desc->buf;
+	int flags;
+
+	if (size > count)
+		size = count;
+
+	flags = (sk->socket->file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+	if (size < count) flags |= MSG_MORE;
+
+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
+
+	if (!(sk->route_caps & NETIF_F_SG) ||
+		!(sk->route_caps & TCP_ZC_CSUM_FLAGS))
+		return sock_no_sendpage(sk->socket, page, offset, size, flags);
+
+#undef TCP_ZC_CSUM_FLAGS
+
+	TCP_CHECK_TIMER(sk);
+	res = do_tcp_sendpages(sk, &page, offset, size, flags);
+	TCP_CHECK_TIMER(sk);
+
+	if (res < 0) desc->error = res;
+	else {
+		desc->count -= res;
+		desc->written += res;
+	}
+
+	return res;
+}
+
 int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
 {
 	struct iovec *iov;
@@ -1015,6 +1051,7 @@
 	int mss_now;
 	int err, copied;
 	long timeo;
+	int has_sendpage = sk->socket->file->f_op->sendpage != NULL;
 
 	tp = &(sk->tp_pinfo.af_tcp);
 
@@ -1049,6 +1086,44 @@
 
 		iov++;
 
+		if (seglen >= PAGE_SIZE && has_sendpage) {
+			struct vm_area_struct *vma =
+				find_vma (current->mm, (long) from);
+			struct file *filp;
+
+			if (vma && (filp = vma->vm_file)) {
+				read_descriptor_t desc;
+				struct inode *in, *out;
+				loff_t pos = (long) from - vma->vm_start;
+
+				in  = filp->f_dentry->d_inode;
+				out = sk->socket->file->f_dentry->d_inode;
+
+				if (locks_verify_area (FLOCK_VERIFY_READ, in,
+					filp, filp->f_pos, seglen))
+					goto out_no_zero_copy;
+
+				if (locks_verify_area (FLOCK_VERIFY_WRITE, out,
+					sk->socket->file, 0, seglen))
+					goto out_no_zero_copy;
+
+				desc.written = 0;
+				desc.count   = seglen;
+				desc.buf     = (char *) sk;
+				desc.error   = 0;
+
+				do_generic_file_read (filp, &pos, &desc,
+					file_send_actor);
+
+				if (!desc.written) {
+					err = desc.error;
+					goto do_error;
+				}
+				copied += desc.written;
+				continue;
+			}
+		}
+out_no_zero_copy:
 		while (seglen > 0) {
 			int copy;
 			

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2003-01-06 23:29 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-12-30  1:09 [PATCH] TCP Zero Copy for mmapped files Thomas Ogrisegg
2002-12-30  1:29 ` Larry McVoy
2003-01-02  6:37   ` David S. Miller
2003-01-02 22:12     ` Thomas Ogrisegg
2003-01-02 22:28       ` Larry McVoy
2003-01-02 23:20         ` Alan Cox
2003-01-02 23:16           ` David S. Miller
2003-01-03  0:56             ` Alan Cox
2003-01-03  2:40               ` David S. Miller
2003-01-03  2:41               ` Linus Torvalds
2003-01-02 23:13       ` David S. Miller
2003-01-03  0:45         ` Thomas Ogrisegg
2003-01-03  1:01           ` Larry McVoy
2003-01-03  1:59             ` Alan Cox
2003-01-06 14:36               ` Gianni Tedesco
2003-01-06 23:29                 ` David S. Miller
2003-01-03  1:56           ` Alan Cox
2003-01-03  1:27             ` Larry McVoy
2003-01-03  2:42           ` David S. Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox