* [PATCH][RFC] network splice receive v3
@ 2007-07-11 9:19 Jens Axboe
2007-07-11 18:17 ` Joel Becker
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Jens Axboe @ 2007-07-11 9:19 UTC (permalink / raw)
To: netdev, linux-kernel; +Cc: olaf.kirch, johnpol
[-- Attachment #1: Type: text/plain, Size: 726 bytes --]
Hi,
Here's an updated implementation of tcp network splice receive support.
It actually works for me now, no data corruption seen.
For the original announcement and how to test it, see:
http://marc.info/?l=linux-netdev&m=118103093400770&w=2
The splice core changes needed to support this are now merged in
2.6.22-git, so the patchset shrinks to just two patches - one for adding
a release hook, and one for the networking changes.
The code is also available in the splice-net branch here:
git://git.kernel.dk/data/git/linux-2.6-block.git splice-net
There's a third experimental patch in there that allows vmsplice
directly to user memory, that still needs some work though.
Comments, testing welcome!
--
Jens Axboe
[-- Attachment #2: 0001-splice-don-t-assume-regular-pages-in-splice_to_pipe.patch --]
[-- Type: text/x-patch, Size: 2136 bytes --]
>From e59a68f2d7d261b301960b97659910aab8e3d776 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 11 Jun 2007 13:00:32 +0200
Subject: [PATCH] splice: don't assume regular pages in splice_to_pipe()
Allow caller to pass in a release function, there might be
other resources that need releasing as well. Needed for
network receive.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/splice.c | 9 ++++++++-
include/linux/splice.h | 1 +
2 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 3160951..4b4b501 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -254,11 +254,16 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
}
while (page_nr < spd_pages)
- page_cache_release(spd->pages[page_nr++]);
+ spd->spd_release(spd, page_nr++);
return ret;
}
+static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
+{
+ page_cache_release(spd->pages[i]);
+}
+
static int
__generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
@@ -277,6 +282,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
.partial = partial,
.flags = flags,
.ops = &page_cache_pipe_buf_ops,
+ .spd_release = spd_release_page,
};
index = *ppos >> PAGE_CACHE_SHIFT;
@@ -1674,6 +1680,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
.partial = partial,
.flags = flags,
.ops = &user_page_pipe_buf_ops,
+ .spd_release = spd_release_page,
};
pipe = pipe_info(file->f_path.dentry->d_inode);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 2c08456..b8fa41e 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -54,6 +54,7 @@ struct splice_pipe_desc {
int nr_pages; /* number of pages in map */
unsigned int flags; /* splice flags */
const struct pipe_buf_operations *ops;/* ops associated with output pipe */
+ void (*spd_release)(struct splice_pipe_desc *, unsigned int);
};
typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
--
1.5.3.rc0.90.gbaa79
[-- Attachment #3: 0002-TCP-splice-receive-support.patch --]
[-- Type: text/x-patch, Size: 14675 bytes --]
>From b62e4a5a3e3220702e837e556427972dc591ff59 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 20 Jun 2007 09:54:14 +0200
Subject: [PATCH] TCP splice receive support
Support for network splice receive.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
include/linux/net.h | 3 +
include/linux/skbuff.h | 5 +
include/net/tcp.h | 3 +
net/core/skbuff.c | 246 ++++++++++++++++++++++++++++++++++++++++++++++++
net/ipv4/af_inet.c | 1 +
net/ipv4/tcp.c | 129 +++++++++++++++++++++++++
net/socket.c | 13 +++
7 files changed, 400 insertions(+), 0 deletions(-)
diff --git a/include/linux/net.h b/include/linux/net.h
index efc4517..472ee12 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
#define _LINUX_NET_H
#include <linux/wait.h>
+#include <linux/splice.h>
#include <asm/socket.h>
struct poll_table_struct;
@@ -165,6 +166,8 @@ struct proto_ops {
struct vm_area_struct * vma);
ssize_t (*sendpage) (struct socket *sock, struct page *page,
int offset, size_t size, int flags);
+ ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len, unsigned int flags);
};
struct net_proto_family {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f0b2f7..177bffc 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1504,6 +1504,11 @@ extern int skb_store_bits(struct sk_buff *skb, int offset,
extern __wsum skb_copy_and_csum_bits(const struct sk_buff *skb,
int offset, u8 *to, int len,
__wsum csum);
+extern int skb_splice_bits(struct sk_buff *skb,
+ unsigned int offset,
+ struct pipe_inode_info *pipe,
+ unsigned int len,
+ unsigned int flags);
extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
extern void skb_split(struct sk_buff *skb,
struct sk_buff *skb1, const u32 len);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8af9ae..8e86697 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -308,6 +308,9 @@ extern int tcp_twsk_unique(struct sock *sk,
extern void tcp_twsk_destructor(struct sock *sk);
+extern ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3943c3a..158e287 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -52,6 +52,7 @@
#endif
#include <linux/string.h>
#include <linux/skbuff.h>
+#include <linux/splice.h>
#include <linux/cache.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
@@ -71,6 +72,40 @@
static struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
+static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+ kfree_skb(skb);
+}
+
+static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ struct sk_buff *skb = (struct sk_buff *) buf->private;
+
+ skb_get(skb);
+}
+
+static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+
+/* Pipe buffer operations for a socket. */
+static struct pipe_buf_operations sock_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = sock_pipe_buf_release,
+ .steal = sock_pipe_buf_steal,
+ .get = sock_pipe_buf_get,
+};
+
/*
* Keep out-of-line to prevent kernel bloat.
* __builtin_return_address is not used because it is not always
@@ -1116,6 +1151,217 @@ fault:
return -EFAULT;
}
+/*
+ * Callback from splice_to_pipe(), if we need to release some pages
+ * at the end of the spd in case we error'ed out in filling the pipe.
+ */
+static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
+{
+ struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
+
+ kfree_skb(skb);
+}
+
+/*
+ * Fill page/offset/length into spd, if it can hold more pages.
+ */
+static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+ unsigned int len, unsigned int offset,
+ struct sk_buff *skb)
+{
+ if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+ return 1;
+
+ spd->pages[spd->nr_pages] = page;
+ spd->partial[spd->nr_pages].len = len;
+ spd->partial[spd->nr_pages].offset = offset;
+ spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
+ spd->nr_pages++;
+ return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. Returns number of
+ * pages mapped.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+ unsigned int *total_len,
+ struct splice_pipe_desc *spd)
+{
+ unsigned int nr_pages = spd->nr_pages;
+ unsigned int poff, plen, len, toff, tlen;
+ int headlen, seg;
+
+ toff = *offset;
+ tlen = *total_len;
+ if (!tlen)
+ goto err;
+
+ /*
+ * if the offset is greater than the linear part, go directly to
+ * the fragments.
+ */
+ headlen = skb_headlen(skb);
+ if (toff >= headlen) {
+ toff -= headlen;
+ goto map_frag;
+ }
+
+ /*
+ * first map the linear region into the pages/partial map, skipping
+ * any potential initial offset.
+ */
+ len = 0;
+ while (len < headlen) {
+ void *p = skb->data + len;
+
+ poff = (unsigned long) p & (PAGE_SIZE - 1);
+ plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
+ len += plen;
+
+ if (toff) {
+ if (plen <= toff) {
+ toff -= plen;
+ continue;
+ }
+ plen -= toff;
+ poff += toff;
+ toff = 0;
+ }
+
+ plen = min(plen, tlen);
+ if (!plen)
+ break;
+
+ /*
+ * just jump directly to update and return, no point
+ * in going over fragments when the output is full.
+ */
+ if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
+ goto done;
+
+ tlen -= plen;
+ }
+
+ /*
+ * then map the fragments
+ */
+map_frag:
+ for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
+ const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+
+ plen = f->size;
+ poff = f->page_offset;
+
+ if (toff) {
+ if (plen <= toff) {
+ toff -= plen;
+ continue;
+ }
+ plen -= toff;
+ poff += toff;
+ toff = 0;
+ }
+
+ plen = min(plen, tlen);
+ if (!plen)
+ break;
+
+ if (spd_fill_page(spd, f->page, plen, poff, skb))
+ break;
+
+ tlen -= plen;
+ }
+
+done:
+ if (spd->nr_pages - nr_pages) {
+ *offset = 0;
+ *total_len = tlen;
+ return 0;
+ }
+err:
+ return 1;
+}
+
+/*
+ * Map data from the skb to a pipe. Should handle both the linear part,
+ * the fragments, and the frag list. It does NOT handle frag lists within
+ * the frag list, if such a thing exists. We'd probably need to recurse to
+ * handle that cleanly.
+ */
+int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+ struct pipe_inode_info *pipe, unsigned int tlen,
+ unsigned int flags)
+{
+ struct partial_page partial[PIPE_BUFFERS];
+ struct page *pages[PIPE_BUFFERS];
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &sock_pipe_buf_ops,
+ .spd_release = sock_spd_release,
+ };
+ struct sk_buff *skb;
+
+ /*
+ * I'd love to avoid the clone here, but tcp_read_sock()
+ * ignores reference counts and unconditonally kills the sk_buff
+ * on return from the actor.
+ */
+ skb = skb_clone(__skb, GFP_KERNEL);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ /*
+ * __skb_splice_bits() only fails if the output has no room left,
+ * so no point in going over the frag_list for the error case.
+ */
+ if (__skb_splice_bits(skb, &offset, &tlen, &spd))
+ goto done;
+ else if (!tlen)
+ goto done;
+
+ /*
+ * now see if we have a frag_list to map
+ */
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list && tlen; list = list->next) {
+ if (__skb_splice_bits(list, &offset, &tlen, &spd))
+ break;
+ }
+ }
+
+done:
+ /*
+ * drop our reference to the clone, the pipe consumption will
+ * drop the rest.
+ */
+ kfree_skb(skb);
+
+ if (spd.nr_pages) {
+ int ret;
+
+ /*
+ * Drop the socket lock, otherwise we have reverse
+ * locking dependencies between sk_lock and i_mutex
+ * here as compared to sendfile(). We enter here
+ * with the socket lock held, and splice_to_pipe() will
+ * grab the pipe inode lock. For sendfile() emulation,
+ * we call into ->sendpage() with the i_mutex lock held
+ * and networking will grab the socket lock.
+ */
+ release_sock(__skb->sk);
+ ret = splice_to_pipe(pipe, &spd);
+ lock_sock(__skb->sk);
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* skb_store_bits - store bits from kernel buffer to skb
* @skb: destination buffer
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 041fba3..0ff9f86 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -835,6 +835,7 @@ const struct proto_ops inet_stream_ops = {
.recvmsg = sock_common_recvmsg,
.mmap = sock_no_mmap,
.sendpage = tcp_sendpage,
+ .splice_read = tcp_splice_read,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 450f44b..63efd7a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -253,6 +253,10 @@
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/skbuff.h>
+#include <linux/splice.h>
+#include <linux/net.h>
+#include <linux/socket.h>
#include <linux/random.h>
#include <linux/bootmem.h>
#include <linux/cache.h>
@@ -264,6 +268,7 @@
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/netdma.h>
+#include <net/sock.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -291,6 +296,15 @@ EXPORT_SYMBOL(tcp_memory_allocated);
EXPORT_SYMBOL(tcp_sockets_allocated);
/*
+ * TCP splice context
+ */
+struct tcp_splice_state {
+ struct pipe_inode_info *pipe;
+ size_t len;
+ unsigned int flags;
+};
+
+/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
* All the sk_stream_mem_schedule() is of this nature: accounting
@@ -500,6 +514,120 @@ static inline void tcp_push(struct sock *sk, int flags, int mss_now,
}
}
+int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
+ unsigned int offset, size_t len)
+{
+ struct tcp_splice_state *tss = rd_desc->arg.data;
+
+ return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags);
+}
+
+static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
+{
+ /* Store TCP splice context information in read_descriptor_t. */
+ read_descriptor_t rd_desc = {
+ .arg.data = tss,
+ };
+
+ return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
+}
+
+/**
+ * tcp_splice_read - splice data from TCP socket to a pipe
+ * @sock: socket to splice from
+ * @ppos: position (not valid)
+ * @pipe: pipe to splice to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Description:
+ * Will read pages from given socket and fill them into a pipe.
+ *
+ **/
+ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct sock *sk = sock->sk;
+ struct tcp_splice_state tss = {
+ .pipe = pipe,
+ .len = len,
+ .flags = flags,
+ };
+ long timeo;
+ ssize_t spliced;
+ int ret;
+
+ /*
+ * We can't seek on a socket input
+ */
+ if (unlikely(*ppos))
+ return -ESPIPE;
+
+ ret = spliced = 0;
+
+ lock_sock(sk);
+
+ timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+ while (tss.len) {
+ ret = __tcp_splice_read(sk, &tss);
+ if (ret < 0)
+ break;
+ else if (!ret) {
+ if (spliced)
+ break;
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+ if (sock_flag(sk, SOCK_DONE))
+ break;
+ if (sk->sk_err) {
+ ret = sock_error(sk);
+ break;
+ }
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ break;
+ if (sk->sk_state == TCP_CLOSE) {
+ /*
+ * This occurs when user tries to read
+ * from never connected socket.
+ */
+ if (!sock_flag(sk, SOCK_DONE))
+ ret = -ENOTCONN;
+ break;
+ }
+ if (!timeo) {
+ ret = -EAGAIN;
+ break;
+ }
+ sk_wait_data(sk, &timeo);
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(timeo);
+ break;
+ }
+ continue;
+ }
+ tss.len -= ret;
+ spliced += ret;
+
+ release_sock(sk);
+ lock_sock(sk);
+
+ if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||
+ signal_pending(current))
+ break;
+ }
+
+ release_sock(sk);
+
+ if (spliced)
+ return spliced;
+
+ return ret;
+}
+
static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
size_t psize, int flags)
{
@@ -2515,6 +2643,7 @@ EXPORT_SYMBOL(tcp_poll);
EXPORT_SYMBOL(tcp_read_sock);
EXPORT_SYMBOL(tcp_recvmsg);
EXPORT_SYMBOL(tcp_sendmsg);
+EXPORT_SYMBOL(tcp_splice_read);
EXPORT_SYMBOL(tcp_sendpage);
EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/socket.c b/net/socket.c
index f453019..41240f5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -111,6 +111,9 @@ static long compat_sock_ioctl(struct file *file,
static int sock_fasync(int fd, struct file *filp, int on);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);
+static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags);
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
@@ -133,6 +136,7 @@ static const struct file_operations socket_file_ops = {
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
+ .splice_read = sock_splice_read,
};
/*
@@ -691,6 +695,15 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
return sock->ops->sendpage(sock, page, offset, size, flags);
}
+static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct socket *sock = file->private_data;
+
+ return sock->ops->splice_read(sock, ppos, pipe, len, flags);
+}
+
static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
struct sock_iocb *siocb)
{
--
1.5.3.rc0.90.gbaa79
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-11 9:19 [PATCH][RFC] network splice receive v3 Jens Axboe
@ 2007-07-11 18:17 ` Joel Becker
2007-07-11 18:26 ` Jens Axboe
2007-07-12 17:02 ` Evgeniy Polyakov
2007-07-19 9:05 ` YOSHIFUJI Hideaki / 吉藤英明
2 siblings, 1 reply; 8+ messages in thread
From: Joel Becker @ 2007-07-11 18:17 UTC (permalink / raw)
To: Jens Axboe; +Cc: netdev, linux-kernel, olaf.kirch, johnpol
On Wed, Jul 11, 2007 at 11:19:27AM +0200, Jens Axboe wrote:
> Subject: [PATCH] splice: don't assume regular pages in splice_to_pipe()
>
> Allow caller to pass in a release function, there might be
> other resources that need releasing as well. Needed for
> network receive.
>
> diff --git a/fs/splice.c b/fs/splice.c
> index 3160951..4b4b501 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -254,11 +254,16 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
> }
>
> while (page_nr < spd_pages)
> - page_cache_release(spd->pages[page_nr++]);
> + spd->spd_release(spd, page_nr++);
Rather than requiring the caller set this, shouldn't we just
allow it? Especially given there is only one non-page user?
while (page_nr < spd_pages)
- page_cache_release(spd->pages[page_nr++]);
+ if (spd->spd_release)
+ spd->spd_release(spd, page_nr++);
+ else
+ page_cache_release(spd->pages[page_nr++]);
Joel
--
"Any man who is under 30, and is not a liberal, has not heart;
and any man who is over 30, and is not a conservative, has no brains."
- Sir Winston Churchill
Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker@oracle.com
Phone: (650) 506-8127
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-11 18:17 ` Joel Becker
@ 2007-07-11 18:26 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2007-07-11 18:26 UTC (permalink / raw)
To: netdev, linux-kernel, olaf.kirch, johnpol
On Wed, Jul 11 2007, Joel Becker wrote:
> On Wed, Jul 11, 2007 at 11:19:27AM +0200, Jens Axboe wrote:
> > Subject: [PATCH] splice: don't assume regular pages in splice_to_pipe()
> >
> > Allow caller to pass in a release function, there might be
> > other resources that need releasing as well. Needed for
> > network receive.
> >
> > diff --git a/fs/splice.c b/fs/splice.c
> > index 3160951..4b4b501 100644
> > --- a/fs/splice.c
> > +++ b/fs/splice.c
> > @@ -254,11 +254,16 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
> > }
> >
> > while (page_nr < spd_pages)
> > - page_cache_release(spd->pages[page_nr++]);
> > + spd->spd_release(spd, page_nr++);
>
> Rather than requiring the caller set this, shouldn't we just
> allow it? Especially given there is only one non-page user?
>
> while (page_nr < spd_pages)
> - page_cache_release(spd->pages[page_nr++]);
> + if (spd->spd_release)
> + spd->spd_release(spd, page_nr++);
> + else
> + page_cache_release(spd->pages[page_nr++]);
Certainly possible, I think it's cleaner with it always being set
though. If it grows other out-of-splice.c users, then your change may be
a good idea though.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-11 9:19 [PATCH][RFC] network splice receive v3 Jens Axboe
2007-07-11 18:17 ` Joel Becker
@ 2007-07-12 17:02 ` Evgeniy Polyakov
2007-07-13 12:21 ` Jens Axboe
2007-07-19 9:05 ` YOSHIFUJI Hideaki / 吉藤英明
2 siblings, 1 reply; 8+ messages in thread
From: Evgeniy Polyakov @ 2007-07-12 17:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: netdev, linux-kernel, olaf.kirch
On Wed, Jul 11, 2007 at 11:19:27AM +0200, Jens Axboe (jens.axboe@oracle.com) wrote:
> Hi,
Hi Jens.
> Here's an updated implementation of tcp network splice receive support.
> It actually works for me now, no data corruption seen.
>
> For the original announcement and how to test it, see:
>
> http://marc.info/?l=linux-netdev&m=118103093400770&w=2
>
> The splice core changes needed to support this are now merged in
> 2.6.22-git, so the patchset shrinks to just two patches - one for adding
> a release hook, and one for the networking changes.
>
> The code is also available in the splice-net branch here:
>
> git://git.kernel.dk/data/git/linux-2.6-block.git splice-net
>
> There's a third experimental patch in there that allows vmsplice
> directly to user memory, that still needs some work though.
>
> Comments, testing welcome!
It looks like you included all bits we found in the previous runs, so
likely it will work good, but so far I have conflicts merging todays git
and your tree in include/linux/splice.h, fs/ext2/file.c, fs/splice.c and
mm/filemap_xip.c. This can be a problem with my tree though.
It really looks like the last tree we tested, so if you think additional
one will not hurt, feel free to ping, so I will completely rebase
testing tree.
> --
> Jens Axboe
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-12 17:02 ` Evgeniy Polyakov
@ 2007-07-13 12:21 ` Jens Axboe
2007-07-19 8:44 ` Evgeniy Polyakov
0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2007-07-13 12:21 UTC (permalink / raw)
To: Evgeniy Polyakov; +Cc: netdev, linux-kernel, olaf.kirch
On Thu, Jul 12 2007, Evgeniy Polyakov wrote:
> On Wed, Jul 11, 2007 at 11:19:27AM +0200, Jens Axboe (jens.axboe@oracle.com) wrote:
> > Hi,
>
> Hi Jens.
>
> > Here's an updated implementation of tcp network splice receive support.
> > It actually works for me now, no data corruption seen.
> >
> > For the original announcement and how to test it, see:
> >
> > http://marc.info/?l=linux-netdev&m=118103093400770&w=2
> >
> > The splice core changes needed to support this are now merged in
> > 2.6.22-git, so the patchset shrinks to just two patches - one for adding
> > a release hook, and one for the networking changes.
> >
> > The code is also available in the splice-net branch here:
> >
> > git://git.kernel.dk/data/git/linux-2.6-block.git splice-net
> >
> > There's a third experimental patch in there that allows vmsplice
> > directly to user memory, that still needs some work though.
> >
> > Comments, testing welcome!
>
> It looks like you included all bits we found in the previous runs, so
> likely it will work good, but so far I have conflicts merging todays git
> and your tree in include/linux/splice.h, fs/ext2/file.c, fs/splice.c and
> mm/filemap_xip.c. This can be a problem with my tree though.
Hmm, the patch should apply directly to the tree as of when I posted
this original mail, or any later one. I just tried a rebase, and it
rebased fine on top of the current -git as well. So I think the issue is
with your tree, sorry!
> It really looks like the last tree we tested, so if you think additional
> one will not hurt, feel free to ping, so I will completely rebase
> testing tree.
It would be great if you could retest! There are some minor changes in
there, and some extra testing definitely will not hurt.
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-13 12:21 ` Jens Axboe
@ 2007-07-19 8:44 ` Evgeniy Polyakov
0 siblings, 0 replies; 8+ messages in thread
From: Evgeniy Polyakov @ 2007-07-19 8:44 UTC (permalink / raw)
To: Jens Axboe; +Cc: netdev, linux-kernel, olaf.kirch
Hi.
On Fri, Jul 13, 2007 at 02:21:00PM +0200, Jens Axboe (jens.axboe@oracle.com) wrote:
> > It really looks like the last tree we tested, so if you think additional
> > one will not hurt, feel free to ping, so I will completely rebase
> > testing tree.
>
> It would be great if you could retest! There are some minor changes in
> there, and some extra testing definitely will not hurt.
I've just tested it with 2.6.22
(e1c1e98d2a3f57b22a0d4136c8160e54404aa437 commit) and did not found any
problems - after qute big files were transferred there is no observed
previously skb leak, no crashes (quite a few debug options are turned on
in config) and files are correct on both peers, so it works good.
> --
> Jens Axboe
--
Evgeniy Polyakov
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-11 9:19 [PATCH][RFC] network splice receive v3 Jens Axboe
2007-07-11 18:17 ` Joel Becker
2007-07-12 17:02 ` Evgeniy Polyakov
@ 2007-07-19 9:05 ` YOSHIFUJI Hideaki / 吉藤英明
2007-07-19 9:07 ` Jens Axboe
2 siblings, 1 reply; 8+ messages in thread
From: YOSHIFUJI Hideaki / 吉藤英明 @ 2007-07-19 9:05 UTC (permalink / raw)
To: jens.axboe; +Cc: netdev, linux-kernel, olaf.kirch, johnpol, yoshfuji
Hello.
In article <20070711091927.GQ4587@kernel.dk> (at Wed, 11 Jul 2007 11:19:27 +0200), Jens Axboe <jens.axboe@oracle.com> says:
> @@ -835,6 +835,7 @@ const struct proto_ops inet_stream_ops = {
> .recvmsg = sock_common_recvmsg,
> .mmap = sock_no_mmap,
> .sendpage = tcp_sendpage,
> + .splice_read = tcp_splice_read,
> #ifdef CONFIG_COMPAT
> .compat_setsockopt = compat_sock_common_setsockopt,
> .compat_getsockopt = compat_sock_common_getsockopt,
Please add similar bits in net/ipv6/af_inet6.c
unless there are any dependency on IPv4.
(And if there are, it is not good.)
--yoshfuji
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][RFC] network splice receive v3
2007-07-19 9:05 ` YOSHIFUJI Hideaki / 吉藤英明
@ 2007-07-19 9:07 ` Jens Axboe
0 siblings, 0 replies; 8+ messages in thread
From: Jens Axboe @ 2007-07-19 9:07 UTC (permalink / raw)
To: YOSHIFUJI Hideaki / ?$B5HF#1QL@; +Cc: netdev, linux-kernel, olaf.kirch, johnpol
On Thu, Jul 19 2007, YOSHIFUJI Hideaki / ?$B5HF#1QL@ wrote:
> Hello.
>
> In article <20070711091927.GQ4587@kernel.dk> (at Wed, 11 Jul 2007 11:19:27 +0200), Jens Axboe <jens.axboe@oracle.com> says:
>
> > @@ -835,6 +835,7 @@ const struct proto_ops inet_stream_ops = {
> > .recvmsg = sock_common_recvmsg,
> > .mmap = sock_no_mmap,
> > .sendpage = tcp_sendpage,
> > + .splice_read = tcp_splice_read,
> > #ifdef CONFIG_COMPAT
> > .compat_setsockopt = compat_sock_common_setsockopt,
> > .compat_getsockopt = compat_sock_common_getsockopt,
>
> Please add similar bits in net/ipv6/af_inet6.c
> unless there are any dependency on IPv4.
> (And if there are, it is not good.)
There are no specific ipv4 depedencies, it's just an oversight. So
thanks for the clue, I'll add it!
--
Jens Axboe
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2007-07-19 9:07 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-11 9:19 [PATCH][RFC] network splice receive v3 Jens Axboe
2007-07-11 18:17 ` Joel Becker
2007-07-11 18:26 ` Jens Axboe
2007-07-12 17:02 ` Evgeniy Polyakov
2007-07-13 12:21 ` Jens Axboe
2007-07-19 8:44 ` Evgeniy Polyakov
2007-07-19 9:05 ` YOSHIFUJI Hideaki / 吉藤英明
2007-07-19 9:07 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).