* [PATCH 1/18] splice: abstract out actor data
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
@ 2007-06-12 6:57 ` Jens Axboe
2007-06-12 15:31 ` Eric Dumazet
2007-06-12 6:57 ` [PATCH 2/18] vmsplice: add vmsplice-to-user support Jens Axboe
` (16 subsequent siblings)
17 siblings, 1 reply; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:57 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
For direct splicing (or private splicing), the output may not be a file.
So abstract out the handling into a specified actor function and put
the data in the splice_desc structure earlier, so we can build on top
of that.
This is the first step in better splice handling for drivers, and also
for implementing vmsplice _to_ user memory.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/ocfs2/file.c | 11 ++++-
fs/splice.c | 100 ++++++++++++++++++++++++++++++++-------------
include/linux/pipe_fs_i.h | 10 +++--
3 files changed, 85 insertions(+), 36 deletions(-)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c964..eb97b20 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
int ret, err;
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
-
- ret = __splice_from_pipe(pipe, out, ppos, len, flags,
- ocfs2_splice_write_actor);
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ };
+
+ sd.file = out;
+ ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
if (ret > 0) {
*ppos += ret;
diff --git a/fs/splice.c b/fs/splice.c
index cb21136..6871de6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -666,31 +666,24 @@ out_ret:
* key here is the 'actor' worker passed in that actually moves the data
* to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
*/
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
- struct file *out, loff_t *ppos, size_t len,
- unsigned int flags, splice_actor *actor)
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+ splice_actor *actor)
{
int ret, do_wakeup, err;
- struct splice_desc sd;
ret = 0;
do_wakeup = 0;
- sd.total_len = len;
- sd.flags = flags;
- sd.file = out;
- sd.pos = *ppos;
-
for (;;) {
if (pipe->nrbufs) {
struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
const struct pipe_buf_operations *ops = buf->ops;
- sd.len = buf->len;
- if (sd.len > sd.total_len)
- sd.len = sd.total_len;
+ sd->len = buf->len;
+ if (sd->len > sd->total_len)
+ sd->len = sd->total_len;
- err = actor(pipe, buf, &sd);
+ err = actor(pipe, buf, sd);
if (err <= 0) {
if (!ret && err != -ENODATA)
ret = err;
@@ -702,10 +695,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
buf->offset += err;
buf->len -= err;
- sd.len -= err;
- sd.pos += err;
- sd.total_len -= err;
- if (sd.len)
+ sd->len -= err;
+ sd->pos += err;
+ sd->total_len -= err;
+ if (sd->len)
continue;
if (!buf->len) {
@@ -717,7 +710,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
do_wakeup = 1;
}
- if (!sd.total_len)
+ if (!sd->total_len)
break;
}
@@ -730,7 +723,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
break;
}
- if (flags & SPLICE_F_NONBLOCK) {
+ if (sd->flags & SPLICE_F_NONBLOCK) {
if (!ret)
ret = -EAGAIN;
break;
@@ -770,6 +763,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
{
ssize_t ret;
struct inode *inode = out->f_mapping->host;
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ };
+
+ sd.file = out;
/*
* The actor worker might be calling ->prepare_write and
@@ -778,7 +778,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
* pipe->inode, we have to order lock acquiry here.
*/
inode_double_lock(inode, pipe->inode);
- ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+ ret = __splice_from_pipe(pipe, &sd, actor);
inode_double_unlock(inode, pipe->inode);
return ret;
@@ -802,6 +802,11 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
{
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ };
ssize_t ret;
int err;
@@ -809,7 +814,8 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
if (unlikely(err))
return err;
- ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+ sd.file = out;
+ ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
if (ret > 0) {
*ppos += ret;
@@ -948,14 +954,17 @@ static long do_splice_to(struct file *in, loff_t *ppos,
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
-long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
- size_t len, unsigned int flags)
+/*
+ * Splices from an input file to an actor, using a 'direct' pipe.
+ */
+ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+ splice_direct_actor *actor)
{
struct pipe_inode_info *pipe;
long ret, bytes;
- loff_t out_off;
umode_t i_mode;
- int i;
+ size_t len;
+ int i, flags;
/*
* We require the input being a regular file, as we don't want to
@@ -991,7 +1000,13 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
*/
ret = 0;
bytes = 0;
- out_off = 0;
+ len = sd->total_len;
+ flags = sd->flags;
+
+ /*
+ * Don't block on output, we have to drain the direct pipe.
+ */
+ sd->flags &= ~SPLICE_F_NONBLOCK;
while (len) {
size_t read_len, max_read_len;
@@ -1001,19 +1016,19 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
*/
max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
- ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
+ ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags);
if (unlikely(ret < 0))
goto out_release;
read_len = ret;
+ sd->total_len = read_len;
/*
* NOTE: nonblocking mode only applies to the input. We
* must not do the output in nonblocking mode as then we
* could get stuck data in the internal pipe:
*/
- ret = do_splice_from(pipe, out, &out_off, read_len,
- flags & ~SPLICE_F_NONBLOCK);
+ ret = actor(pipe, sd);
if (unlikely(ret < 0))
goto out_release;
@@ -1058,6 +1073,33 @@ out_release:
return bytes;
return ret;
+
+}
+EXPORT_SYMBOL(splice_direct_to_actor);
+
+static int direct_splice_actor(struct pipe_inode_info *pipe,
+ struct splice_desc *sd)
+{
+ struct file *file = sd->file;
+
+ return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+}
+
+long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+ size_t len, unsigned int flags)
+{
+ struct splice_desc sd = {
+ .len = len,
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ };
+ size_t ret;
+
+ sd.file = out;
+ ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
+ *ppos = sd.pos;
+ return ret;
}
/*
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index c8884f9..883ba9b 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -94,13 +94,15 @@ struct splice_desc {
typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
struct splice_desc *);
+typedef int (splice_direct_actor)(struct pipe_inode_info *,
+ struct splice_desc *);
extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int,
splice_actor *);
-
-extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct file *,
- loff_t *, size_t, unsigned int,
- splice_actor *);
+extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
+ struct splice_desc *, splice_actor *);
+extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
+ splice_direct_actor *);
#endif
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* Re: [PATCH 1/18] splice: abstract out actor data
2007-06-12 6:57 ` [PATCH 1/18] splice: abstract out actor data Jens Axboe
@ 2007-06-12 15:31 ` Eric Dumazet
2007-06-12 16:22 ` Jens Axboe
0 siblings, 1 reply; 23+ messages in thread
From: Eric Dumazet @ 2007-06-12 15:31 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-kernel
On Tue, 12 Jun 2007 08:57:57 +0200
Jens Axboe <jens.axboe@oracle.com> wrote:
> + struct splice_desc sd = {
> + .total_len = len,
> + .flags = flags,
> + .pos = *ppos,
> + };
> +
> + sd.file = out;
minor remark, why sd.file is setup differently than other fields ?
(this several times on this patch)
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 1/18] splice: abstract out actor data
2007-06-12 15:31 ` Eric Dumazet
@ 2007-06-12 16:22 ` Jens Axboe
2007-06-13 8:48 ` Peter Zijlstra
0 siblings, 1 reply; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 16:22 UTC (permalink / raw)
To: Eric Dumazet; +Cc: linux-kernel
On Tue, Jun 12 2007, Eric Dumazet wrote:
> On Tue, 12 Jun 2007 08:57:57 +0200
> Jens Axboe <jens.axboe@oracle.com> wrote:
>
>
> > + struct splice_desc sd = {
> > + .total_len = len,
> > + .flags = flags,
> > + .pos = *ppos,
> > + };
> > +
> > + sd.file = out;
>
> minor remark, why sd.file is setup differently than other fields ?
>
> (this several times on this patch)
yeah, it's inside an anonymous union, and apparently gcc doesn't like it
being initialized that way.
--
Jens Axboe
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 1/18] splice: abstract out actor data
2007-06-12 16:22 ` Jens Axboe
@ 2007-06-13 8:48 ` Peter Zijlstra
2007-06-13 8:48 ` Jens Axboe
0 siblings, 1 reply; 23+ messages in thread
From: Peter Zijlstra @ 2007-06-13 8:48 UTC (permalink / raw)
To: Jens Axboe; +Cc: Eric Dumazet, linux-kernel
On Tue, 2007-06-12 at 18:22 +0200, Jens Axboe wrote:
> On Tue, Jun 12 2007, Eric Dumazet wrote:
> > On Tue, 12 Jun 2007 08:57:57 +0200
> > Jens Axboe <jens.axboe@oracle.com> wrote:
> >
> >
> > > + struct splice_desc sd = {
> > > + .total_len = len,
> > > + .flags = flags,
> > > + .pos = *ppos,
> > > + };
> > > +
> > > + sd.file = out;
> >
> > minor remark, why sd.file is setup differently than other fields ?
> >
> > (this several times on this patch)
>
> yeah, it's inside an anonymous union, and apparently gcc doesn't like it
> being initialized that way.
The one time I had such a situation something along the lines of:
struct splice_desc sd = {
.total_len = len,
.flags = flags,
.pos = *ppos,
{ .file = out },
};
worked.
^ permalink raw reply [flat|nested] 23+ messages in thread* Re: [PATCH 1/18] splice: abstract out actor data
2007-06-13 8:48 ` Peter Zijlstra
@ 2007-06-13 8:48 ` Jens Axboe
0 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-13 8:48 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Eric Dumazet, linux-kernel
On Wed, Jun 13 2007, Peter Zijlstra wrote:
> On Tue, 2007-06-12 at 18:22 +0200, Jens Axboe wrote:
> > On Tue, Jun 12 2007, Eric Dumazet wrote:
> > > On Tue, 12 Jun 2007 08:57:57 +0200
> > > Jens Axboe <jens.axboe@oracle.com> wrote:
> > >
> > >
> > > > + struct splice_desc sd = {
> > > > + .total_len = len,
> > > > + .flags = flags,
> > > > + .pos = *ppos,
> > > > + };
> > > > +
> > > > + sd.file = out;
> > >
> > > minor remark, why sd.file is setup differently than other fields ?
> > >
> > > (this several times on this patch)
> >
> > yeah, it's inside an anonymous union, and apparently gcc doesn't like it
> > being initialized that way.
>
> The one time I had such a situation something along the lines of:
>
> struct splice_desc sd = {
> .total_len = len,
> .flags = flags,
> .pos = *ppos,
> { .file = out },
> };
>
> worked.
OK, that looks pretty handy. But I just threw the towel into the ring
and named the union instead yesterday. The outside initialization of
file/userptr/data was an eyesore to me.
--
Jens Axboe
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 2/18] vmsplice: add vmsplice-to-user support
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
2007-06-12 6:57 ` [PATCH 1/18] splice: abstract out actor data Jens Axboe
@ 2007-06-12 6:57 ` Jens Axboe
2007-06-12 6:57 ` [PATCH 3/18] sys_sendfile: switch to using ->splice_read, if available Jens Axboe
` (15 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:57 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
A bit of a cheat, it actually just copies the data to userspace. But
this makes the interface nice and symmetric and enables people to build
on splice, with room for future improvement in performance.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/splice.c | 142 ++++++++++++++++++++++++++++++++++++++-------
include/linux/pipe_fs_i.h | 8 ++-
2 files changed, 127 insertions(+), 23 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 6871de6..837be82 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1282,28 +1282,107 @@ static int get_iovec_page_array(const struct iovec __user *iov,
return error;
}
+static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
+{
+ int ret;
+
+ ret = buf->ops->pin(pipe, buf);
+ if (!ret) {
+ /*
+ * use non-atomic map, can be optimized to map atomically if we
+ * prefault the user memory.
+ */
+ char *src = buf->ops->map(pipe, buf, 0);
+
+ if (copy_to_user(sd->userptr, src, sd->len))
+ ret = -EFAULT;
+
+ buf->ops->unmap(pipe, buf, src);
+
+ if (!ret)
+ return sd->len;
+ }
+
+ return ret;
+}
+
+/*
+ * For lack of a better implementation, implement vmsplice() to userspace
+ * as a simple copy of the pipes pages to the user iov.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
+{
+ struct pipe_inode_info *pipe;
+ struct splice_desc sd;
+ ssize_t size;
+ int error;
+ long ret;
+
+ pipe = pipe_info(file->f_path.dentry->d_inode);
+ if (!pipe)
+ return -EBADF;
+
+ if (pipe->inode)
+ mutex_lock(&pipe->inode->i_mutex);
+
+ ret = 0;
+ while (nr_segs) {
+ void __user *base;
+ size_t len;
+
+ /*
+ * Get user address base and length for this iovec.
+ */
+ error = get_user(base, &iov->iov_base);
+ if (unlikely(error))
+ break;
+ error = get_user(len, &iov->iov_len);
+ if (unlikely(error))
+ break;
+
+ /*
+ * Sanity check this iovec. 0 read succeeds.
+ */
+ if (unlikely(!len))
+ break;
+ error = -EFAULT;
+ if (unlikely(!base))
+ break;
+
+ sd.len = 0;
+ sd.total_len = len;
+ sd.flags = flags;
+ sd.userptr = base;
+ sd.pos = 0;
+
+ size = __splice_from_pipe(pipe, &sd, pipe_to_user);
+ if (size < 0) {
+ if (!ret)
+ ret = size;
+
+ break;
+ }
+
+ nr_segs--;
+ iov++;
+ ret += size;
+ }
+
+ if (pipe->inode)
+ mutex_unlock(&pipe->inode->i_mutex);
+
+ return ret;
+}
+
/*
* vmsplice splices a user address range into a pipe. It can be thought of
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
- *
- * Note that vmsplice only supports splicing _from_ user memory to a pipe,
- * not the other way around. Splicing from user memory is a simple operation
- * that can be supported without any funky alignment restrictions or nasty
- * vm tricks. We simply map in the user memory and fill them into a pipe.
- * The reverse isn't quite as easy, though. There are two possible solutions
- * for that:
- *
- * - memcpy() the data internally, at which point we might as well just
- * do a regular read() on the buffer anyway.
- * - Lots of nasty vm tricks, that are neither fast nor flexible (it
- * has restriction limitations on both ends of the pipe).
- *
- * Alas, it isn't here.
- *
*/
-static long do_vmsplice(struct file *file, const struct iovec __user *iov,
- unsigned long nr_segs, unsigned int flags)
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+ unsigned long nr_segs, unsigned int flags)
{
struct pipe_inode_info *pipe;
struct page *pages[PIPE_BUFFERS];
@@ -1318,10 +1397,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
pipe = pipe_info(file->f_path.dentry->d_inode);
if (!pipe)
return -EBADF;
- if (unlikely(nr_segs > UIO_MAXIOV))
- return -EINVAL;
- else if (unlikely(!nr_segs))
- return 0;
spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
flags & SPLICE_F_GIFT);
@@ -1331,6 +1406,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
return splice_to_pipe(pipe, &spd);
}
+/*
+ * Note that vmsplice only really supports true splicing _from_ user memory
+ * to a pipe, not the other way around. Splicing from user memory is a simple
+ * operation that can be supported without any funky alignment restrictions
+ * or nasty vm tricks. We simply map in the user memory and fill them into
+ * a pipe. The reverse isn't quite as easy, though. There are two possible
+ * solutions for that:
+ *
+ * - memcpy() the data internally, at which point we might as well just
+ * do a regular read() on the buffer anyway.
+ * - Lots of nasty vm tricks, that are neither fast nor flexible (it
+ * has restriction limitations on both ends of the pipe).
+ *
+ * Currently we punt and implement it as a normal copy, see pipe_to_user().
+ *
+ */
asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
unsigned long nr_segs, unsigned int flags)
{
@@ -1338,11 +1429,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
long error;
int fput;
+ if (unlikely(nr_segs > UIO_MAXIOV))
+ return -EINVAL;
+ else if (unlikely(!nr_segs))
+ return 0;
+
error = -EBADF;
file = fget_light(fd, &fput);
if (file) {
if (file->f_mode & FMODE_WRITE)
- error = do_vmsplice(file, iov, nr_segs, flags);
+ error = vmsplice_to_pipe(file, iov, nr_segs, flags);
+ else if (file->f_mode & FMODE_READ)
+ error = vmsplice_to_user(file, iov, nr_segs, flags);
fput_light(file, fput);
}
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 883ba9b..c2bda03 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -88,7 +88,13 @@ int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
struct splice_desc {
unsigned int len, total_len; /* current and remaining length */
unsigned int flags; /* splice flags */
- struct file *file; /* file to read/write */
+ /*
+ * actor() private data
+ */
+ union {
+ void __user *userptr; /* memory to write to */
+ struct file *file; /* file to read/write */
+ };
loff_t pos; /* file position */
};
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 3/18] sys_sendfile: switch to using ->splice_read, if available
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
2007-06-12 6:57 ` [PATCH 1/18] splice: abstract out actor data Jens Axboe
2007-06-12 6:57 ` [PATCH 2/18] vmsplice: add vmsplice-to-user support Jens Axboe
@ 2007-06-12 6:57 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 4/18] sendfile: remove .sendfile from filesystems that use generic_file_sendfile() Jens Axboe
` (14 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:57 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
This patch makes sendfile prefer to use ->splice_read(), if it's
available in the file_operations structure.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/read_write.c | 24 ++++++++++++++++++++----
1 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008..47da8a4 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
+#include <linux/pipe_fs_i.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.mmap = generic_file_readonly_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
struct inode * in_inode, * out_inode;
loff_t pos;
ssize_t retval;
- int fput_needed_in, fput_needed_out;
+ int fput_needed_in, fput_needed_out, fl;
/*
* Get input file, and verify that it is ok..
@@ -723,7 +724,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
in_inode = in_file->f_path.dentry->d_inode;
if (!in_inode)
goto fput_in;
- if (!in_file->f_op || !in_file->f_op->sendfile)
+ if (!in_file->f_op || (!in_file->f_op->sendfile &&
+ !in_file->f_op->splice_read))
goto fput_in;
retval = -ESPIPE;
if (!ppos)
@@ -776,7 +778,21 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
count = max - pos;
}
- retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
+ if (in_file->f_op->splice_read) {
+ fl = 0;
+#if 0
+ /*
+ * We need to debate whether we can enable this or not. The
+ * man page documents EAGAIN return for the output at least,
+ * and the application is arguably buggy if it doesn't expect
+ * EAGAIN on a non-blocking file descriptor.
+ */
+ if (in_file->f_flags & O_NONBLOCK)
+ fl = SPLICE_F_NONBLOCK;
+#endif
+ retval = do_splice_direct(in_file, ppos, out_file, count, fl);
+ } else
+ retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
if (retval > 0) {
add_rchar(current, retval);
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 4/18] sendfile: remove .sendfile from filesystems that use generic_file_sendfile()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (2 preceding siblings ...)
2007-06-12 6:57 ` [PATCH 3/18] sys_sendfile: switch to using ->splice_read, if available Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 5/18] sendfile: kill generic_file_sendfile() Jens Axboe
` (13 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
They can use generic_file_splice_read() instead. Since sys_sendfile() now
prefers that, there should be no change in behaviour.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/adfs/file.c | 2 +-
fs/affs/file.c | 2 +-
fs/afs/file.c | 2 +-
fs/bfs/file.c | 2 +-
fs/block_dev.c | 1 -
fs/cifs/cifsfs.c | 8 +++---
fs/coda/file.c | 11 +++++----
fs/ecryptfs/file.c | 15 +++++++------
fs/ext2/file.c | 1 -
fs/ext3/file.c | 1 -
fs/ext4/file.c | 1 -
fs/fat/file.c | 2 +-
fs/fuse/file.c | 4 +-
fs/gfs2/ops_file.c | 1 -
fs/hfs/inode.c | 2 +-
fs/hfsplus/inode.c | 2 +-
fs/hostfs/hostfs_kern.c | 2 +-
fs/hpfs/file.c | 2 +-
fs/jffs2/file.c | 2 +-
fs/jfs/file.c | 1 -
fs/minix/file.c | 2 +-
fs/ntfs/file.c | 2 +-
fs/ocfs2/file.c | 1 -
fs/qnx4/file.c | 2 +-
fs/ramfs/file-mmu.c | 2 +-
fs/ramfs/file-nommu.c | 2 +-
fs/reiserfs/file.c | 1 -
fs/smbfs/file.c | 9 ++++---
fs/sysv/file.c | 2 +-
fs/udf/file.c | 2 +-
fs/ufs/file.c | 2 +-
fs/xfs/linux-2.6/xfs_file.c | 26 ------------------------
fs/xfs/linux-2.6/xfs_linux.h | 1 -
fs/xfs/linux-2.6/xfs_lrw.c | 44 ------------------------------------------
fs/xfs/linux-2.6/xfs_lrw.h | 3 --
fs/xfs/linux-2.6/xfs_vnode.h | 6 -----
fs/xfs/xfs_vnodeops.c | 3 --
37 files changed, 43 insertions(+), 131 deletions(-)
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a28..36e381c 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
.fsync = file_fsync,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c879690..c314a35 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
.open = affs_file_open,
.release = affs_file_release,
.fsync = file_fsync,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721..aede7eb 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
.aio_read = generic_file_aio_read,
.aio_write = afs_file_write,
.mmap = generic_file_readonly_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.fsync = afs_fsync,
};
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa..24310e9 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a..b3e9bfa 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_blkdev_ioctl,
#endif
- .sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7c04752..8b0cbf4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
#ifdef CONFIG_CIFS_POSIX
.ioctl = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
.lock = cifs_lock,
.fsync = cifs_fsync,
.flush = cifs_flush,
- .sendfile = generic_file_sendfile, /* BB removeme BB */
+ .splice_read = generic_file_splice_read,
#ifdef CONFIG_CIFS_POSIX
.ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.llseek = cifs_llseek,
#ifdef CONFIG_CIFS_POSIX
.ioctl = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.release = cifs_close,
.fsync = cifs_fsync,
.flush = cifs_flush,
- .sendfile = generic_file_sendfile, /* BB removeme BB */
+ .splice_read = generic_file_splice_read,
#ifdef CONFIG_CIFS_POSIX
.ioctl = cifs_ioctl,
#endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b60..99dbe86 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
}
static ssize_t
-coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
- read_actor_t actor, void *target)
+coda_file_splice_read(struct file *coda_file, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t count,
+ unsigned int flags)
{
struct coda_file_info *cfi;
struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
- if (!host_file->f_op || !host_file->f_op->sendfile)
+ if (!host_file->f_op || !host_file->f_op->splice_read)
return -EINVAL;
- return host_file->f_op->sendfile(host_file, ppos, count, actor, target);
+ return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
}
static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
.flush = coda_flush,
.release = coda_release,
.fsync = coda_fsync,
- .sendfile = coda_file_sendfile,
+ .splice_read = coda_file_splice_read,
};
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d8..94f456f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
return rc;
}
-static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
- size_t count, read_actor_t actor, void *target)
+static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
+ struct pipe_inode_info *pipe, size_t count,
+ unsigned int flags)
{
struct file *lower_file = NULL;
int rc = -EINVAL;
lower_file = ecryptfs_file_to_lower(file);
- if (lower_file->f_op && lower_file->f_op->sendfile)
- rc = lower_file->f_op->sendfile(lower_file, ppos, count,
- actor, target);
+ if (lower_file->f_op && lower_file->f_op->splice_read)
+ rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
+ count, flags);
return rc;
}
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
.release = ecryptfs_release,
.fsync = ecryptfs_fsync,
.fasync = ecryptfs_fasync,
- .sendfile = ecryptfs_sendfile,
+ .splice_read = ecryptfs_splice_read,
};
const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
.release = ecryptfs_release,
.fsync = ecryptfs_fsync,
.fasync = ecryptfs_fasync,
- .sendfile = ecryptfs_sendfile,
+ .splice_read = ecryptfs_splice_read,
};
static int
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2..072a190 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f138..acc4913 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
.open = generic_file_open,
.release = ext3_release_file,
.fsync = ext3_sync_file,
- .sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd..d4c8186 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
.open = generic_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
- .sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c74..69a83b5 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
.release = fat_file_release,
.ioctl = fat_generic_ioctl,
.fsync = file_fsync,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995..f79de7c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
.release = fuse_release,
.fsync = fuse_fsync,
.lock = fuse_file_lock,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
.release = fuse_release,
.fsync = fuse_fsync,
.lock = fuse_file_lock,
- /* no mmap and sendfile */
+ /* no mmap and splice_read */
};
static const struct address_space_operations fuse_file_aops = {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df88..7dc3be1 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
.release = gfs2_close,
.fsync = gfs2_fsync,
.lock = gfs2_lock,
- .sendfile = generic_file_sendfile,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db..bc835f2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.fsync = file_fsync,
.open = hfs_file_open,
.release = hfs_file_release,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d..409ce54 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.fsync = file_fsync,
.open = hfsplus_file_open,
.release = hfsplus_file_release,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491..c778620 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
.read = do_sync_read,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.write = do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0..5b53e5c 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
.mmap = generic_file_mmap,
.release = hpfs_file_release,
.fsync = hpfs_file_fsync,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations hpfs_file_iops =
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 9987127..c253019 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
- .sendfile = generic_file_sendfile
+ .splice_read = generic_file_splice_read,
};
/* jffs2_file_inode_operations */
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff..87eb936 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.fsync = jfs_fsync,
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1..17765f6 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = minix_sync_file,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed5639..ffcc504 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
mounted filesystem. */
.mmap = generic_file_mmap, /* Mmap file. */
.open = ntfs_file_open, /* Open file. */
- .sendfile = generic_file_sendfile, /* Zero-copy data send with
+ .splice_read = generic_file_splice_read /* Zero-copy data send with
the data source being on
the ntfs partition. We do
not need to care about the
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index eb97b20..4f6033a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1822,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
const struct file_operations ocfs2_fops = {
.read = do_sync_read,
.write = do_sync_write,
- .sendfile = generic_file_sendfile,
.mmap = ocfs2_mmap,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 4464998..867f42b 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
#ifdef CONFIG_QNX4FS_RW
.write = do_sync_write,
.aio_write = generic_file_aio_write,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774..97bdc0b 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.llseek = generic_file_llseek,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5d258c4..cad2b7a 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
.llseek = generic_file_llseek,
};
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a6..30eebfb 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
.open = generic_file_open,
.release = reiserfs_file_release,
.fsync = reiserfs_sync_file,
- .sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8a..c5d78a7 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
}
static ssize_t
-smb_file_sendfile(struct file *file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
+smb_file_splice_read(struct file *file, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t count,
+ unsigned int flags)
{
struct dentry *dentry = file->f_path.dentry;
ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
DENTRY_PATH(dentry), status);
goto out;
}
- status = generic_file_sendfile(file, ppos, count, actor, target);
+ status = generic_file_splice_read(file, ppos, pipe, count, flags);
out:
return status;
}
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
.open = smb_file_open,
.release = smb_file_release,
.fsync = smb_fsync,
- .sendfile = smb_file_sendfile,
+ .splice_read = smb_file_splice_read,
};
const struct inode_operations smb_file_inode_operations =
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb..589be21 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = sysv_sync_file,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764..df070be 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
.aio_write = udf_file_aio_write,
.release = udf_release_file,
.fsync = udf_fsync_file,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e09632..6705d74 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
.mmap = generic_file_mmap,
.open = generic_file_open,
.fsync = ufs_sync_file,
- .sendfile = generic_file_sendfile,
+ .splice_read = generic_file_splice_read,
};
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc9..8c43cd2 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
}
STATIC ssize_t
-xfs_file_sendfile(
- struct file *filp,
- loff_t *pos,
- size_t count,
- read_actor_t actor,
- void *target)
-{
- return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
- filp, pos, 0, count, actor, target, NULL);
-}
-
-STATIC ssize_t
-xfs_file_sendfile_invis(
- struct file *filp,
- loff_t *pos,
- size_t count,
- read_actor_t actor,
- void *target)
-{
- return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
- filp, pos, IO_INVIS, count, actor, target, NULL);
-}
-
-STATIC ssize_t
xfs_file_splice_read(
struct file *infilp,
loff_t *ppos,
@@ -452,7 +428,6 @@ const struct file_operations xfs_file_operations = {
.write = do_sync_write,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
- .sendfile = xfs_file_sendfile,
.splice_read = xfs_file_splice_read,
.splice_write = xfs_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
@@ -475,7 +450,6 @@ const struct file_operations xfs_invis_file_operations = {
.write = do_sync_write,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
- .sendfile = xfs_file_sendfile_invis,
.splice_read = xfs_file_splice_read_invis,
.splice_write = xfs_file_splice_write_invis,
.unlocked_ioctl = xfs_file_ioctl_invis,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad..af24a45 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
* Feature macros (disable/enable)
*/
#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
-#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
#ifdef CONFIG_SMP
#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 86fb671..23978d3 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -287,50 +287,6 @@ xfs_read(
}
ssize_t
-xfs_sendfile(
- bhv_desc_t *bdp,
- struct file *filp,
- loff_t *offset,
- int ioflags,
- size_t count,
- read_actor_t actor,
- void *target,
- cred_t *credp)
-{
- xfs_inode_t *ip = XFS_BHVTOI(bdp);
- xfs_mount_t *mp = ip->i_mount;
- ssize_t ret;
-
- XFS_STATS_INC(xs_read_calls);
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
-
- if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
- (!(ioflags & IO_INVIS))) {
- bhv_vrwlock_t locktype = VRWLOCK_READ;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
- *offset, count,
- FILP_DELAY_FLAG(filp), &locktype);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return -error;
- }
- }
- xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
- (void *)(unsigned long)target, count, *offset, ioflags);
- ret = generic_file_sendfile(filp, offset, count, actor, target);
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
-}
-
-ssize_t
xfs_splice_read(
bhv_desc_t *bdp,
struct file *infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1..7c60a1e 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, int, struct cred *);
-extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
- loff_t *, int, size_t, read_actor_t,
- void *, struct cred *);
extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
struct pipe_inode_info *, size_t, int, int,
struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01..013048a 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -139,9 +139,6 @@ typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, int, struct cred *);
-typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
- loff_t *, int, size_t, read_actor_t,
- void *, struct cred *);
typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
struct pipe_inode_info *, size_t, int, int,
struct cred *);
@@ -206,7 +203,6 @@ typedef struct bhv_vnodeops {
vop_close_t vop_close;
vop_read_t vop_read;
vop_write_t vop_write;
- vop_sendfile_t vop_sendfile;
vop_splice_read_t vop_splice_read;
vop_splice_write_t vop_splice_write;
vop_ioctl_t vop_ioctl;
@@ -254,8 +250,6 @@ typedef struct bhv_vnodeops {
VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
#define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr) \
VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
-#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr) \
- VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
#define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr) \
VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
#define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr) \
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed..70bc82f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4680,9 +4680,6 @@ bhv_vnodeops_t xfs_vnodeops = {
.vop_open = xfs_open,
.vop_close = xfs_close,
.vop_read = xfs_read,
-#ifdef HAVE_SENDFILE
- .vop_sendfile = xfs_sendfile,
-#endif
#ifdef HAVE_SPLICE
.vop_splice_read = xfs_splice_read,
.vop_splice_write = xfs_splice_write,
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 5/18] sendfile: kill generic_file_sendfile()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (3 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 4/18] sendfile: remove .sendfile from filesystems that use generic_file_sendfile() Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 6/18] splice: add void cookie to the actor data Jens Axboe
` (12 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
It's no longer used.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
include/linux/fs.h | 1 -
mm/filemap.c | 20 --------------------
2 files changed, 0 insertions(+), 21 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b3ae77c..61ebf32 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1735,7 +1735,6 @@ extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t, loff_t *, size_t, ssize_t);
extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
extern void do_generic_mapping_read(struct address_space *mapping,
struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
diff --git a/mm/filemap.c b/mm/filemap.c
index edb1b0b..14ad45a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1245,26 +1245,6 @@ int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long o
return written;
}
-ssize_t generic_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
-{
- read_descriptor_t desc;
-
- if (!count)
- return 0;
-
- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
-
- do_generic_file_read(in_file, ppos, &desc, actor);
- if (desc.written)
- return desc.written;
- return desc.error;
-}
-EXPORT_SYMBOL(generic_file_sendfile);
-
static ssize_t
do_readahead(struct address_space *mapping, struct file *filp,
unsigned long index, unsigned long nr)
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 6/18] splice: add void cookie to the actor data
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (4 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 5/18] sendfile: kill generic_file_sendfile() Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 7/18] loop: convert to using splice_direct_to_actor() instead of sendfile() Jens Axboe
` (11 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
We need that for passing driver private info.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
include/linux/pipe_fs_i.h | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index c2bda03..1789ec7 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -94,6 +94,7 @@ struct splice_desc {
union {
void __user *userptr; /* memory to write to */
struct file *file; /* file to read/write */
+ void *data; /* cookie */
};
loff_t pos; /* file position */
};
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 7/18] loop: convert to using splice_direct_to_actor() instead of sendfile()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (5 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 6/18] splice: add void cookie to the actor data Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 8/18] sendfile: convert nfs to using splice_read() Jens Axboe
` (10 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
This gets rid of the dependency on ->sendfile() for receiving data
and converts loop to ->splice_read() instead.
Also includes an IV offset fix from Hugh Dickins.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
drivers/block/loop.c | 64 ++++++++++++++++++++++++++++++++++---------------
1 files changed, 44 insertions(+), 20 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0ed5470..562d329 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/kthread.h>
+#include <linux/pipe_fs_i.h>
#include <asm/uaccess.h>
@@ -401,50 +402,73 @@ struct lo_read_data {
};
static int
-lo_read_actor(read_descriptor_t *desc, struct page *page,
- unsigned long offset, unsigned long size)
+lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
{
- unsigned long count = desc->count;
- struct lo_read_data *p = desc->arg.data;
+ struct lo_read_data *p = sd->data;
struct loop_device *lo = p->lo;
+ struct page *page = buf->page;
sector_t IV;
+ size_t size;
+ int ret;
- IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
+ ret = buf->ops->pin(pipe, buf);
+ if (unlikely(ret))
+ return ret;
- if (size > count)
- size = count;
+ IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
+ (buf->offset >> 9);
+ size = sd->len;
+ if (size > p->bsize)
+ size = p->bsize;
- if (lo_do_transfer(lo, READ, page, offset, p->page, p->offset, size, IV)) {
- size = 0;
+ if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
printk(KERN_ERR "loop: transfer error block %ld\n",
page->index);
- desc->error = -EINVAL;
+ size = -EINVAL;
}
flush_dcache_page(p->page);
- desc->count = count - size;
- desc->written += size;
- p->offset += size;
+ if (size > 0)
+ p->offset += size;
+
return size;
}
static int
+lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+ return __splice_from_pipe(pipe, sd, lo_splice_actor);
+}
+
+static int
do_lo_receive(struct loop_device *lo,
struct bio_vec *bvec, int bsize, loff_t pos)
{
struct lo_read_data cookie;
+ struct splice_desc sd;
struct file *file;
- int retval;
+ long retval;
cookie.lo = lo;
cookie.page = bvec->bv_page;
cookie.offset = bvec->bv_offset;
cookie.bsize = bsize;
+
+ sd.len = 0;
+ sd.total_len = bvec->bv_len;
+ sd.flags = 0;
+ sd.pos = pos;
+ sd.data = &cookie;
+
file = lo->lo_backing_file;
- retval = file->f_op->sendfile(file, &pos, bvec->bv_len,
- lo_read_actor, &cookie);
- return (retval < 0)? retval: 0;
+ retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
+
+ if (retval < 0)
+ return retval;
+
+ return 0;
}
static int
@@ -679,8 +703,8 @@ static int loop_change_fd(struct loop_device *lo, struct file *lo_file,
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
goto out_putf;
- /* new backing store needs to support loop (eg sendfile) */
- if (!inode->i_fop->sendfile)
+ /* new backing store needs to support loop (eg splice_read) */
+ if (!inode->i_fop->splice_read)
goto out_putf;
/* size of the new backing store needs to be the same */
@@ -760,7 +784,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
* If we can't read - sorry. If we only can't write - well,
* it's going to be read-only.
*/
- if (!file->f_op->sendfile)
+ if (!file->f_op->splice_read)
goto out_putf;
if (aops->prepare_write && aops->commit_write)
lo_flags |= LO_FLAGS_USE_AOPS;
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 8/18] sendfile: convert nfs to using splice_read()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (6 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 7/18] loop: convert to using splice_direct_to_actor() instead of sendfile() Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 9/18] sendfile: convert nfsd to splice_direct_to_actor() Jens Axboe
` (9 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/nfs/file.c | 15 +++++++++------
1 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4..8689b73 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
static int nfs_file_release(struct inode *, struct file *);
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t count, unsigned int flags);
static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
.fsync = nfs_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
- .sendfile = nfs_file_sendfile,
+ .splice_read = nfs_file_splice_read,
.check_flags = nfs_check_flags,
};
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
}
static ssize_t
-nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
- read_actor_t actor, void *target)
+nfs_file_splice_read(struct file *filp, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t count,
+ unsigned int flags)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
ssize_t res;
- dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+ dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) count, (unsigned long long) *ppos);
res = nfs_revalidate_mapping(inode, filp->f_mapping);
if (!res)
- res = generic_file_sendfile(filp, ppos, count, actor, target);
+ res = generic_file_splice_read(filp, ppos, pipe, count, flags);
return res;
}
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 9/18] sendfile: convert nfsd to splice_direct_to_actor()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (7 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 8/18] sendfile: convert nfs to using splice_read() Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 10/18] splice: relay support Jens Axboe
` (8 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/nfsd/vfs.c | 47 ++++++++++++++++++++++++------------
include/linux/sunrpc/svc.h | 2 +-
net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
net/sunrpc/svc.c | 2 +-
4 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa24..96f76e2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/major.h>
-#include <linux/ext2_fs.h>
+#include <linux/pipe_fs_i.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
}
/*
- * Grab and keep cached pages assosiated with a file in the svc_rqst
- * so that they can be passed to the netowork sendmsg/sendpage routines
- * directrly. They will be released after the sending has completed.
+ * Grab and keep cached pages associated with a file in the svc_rqst
+ * so that they can be passed to the network sendmsg/sendpage routines
+ * directly. They will be released after the sending has completed.
*/
static int
-nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+ struct splice_desc *sd)
{
- unsigned long count = desc->count;
- struct svc_rqst *rqstp = desc->arg.data;
+ struct svc_rqst *rqstp = sd->data;
struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+ struct page *page = buf->page;
+ size_t size;
+ int ret;
+
+ ret = buf->ops->pin(pipe, buf);
+ if (unlikely(ret))
+ return ret;
- if (size > count)
- size = count;
+ size = sd->len;
if (rqstp->rq_res.page_len == 0) {
get_page(page);
put_page(*pp);
*pp = page;
rqstp->rq_resused++;
- rqstp->rq_res.page_base = offset;
+ rqstp->rq_res.page_base = buf->offset;
rqstp->rq_res.page_len = size;
} else if (page != pp[-1]) {
get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
} else
rqstp->rq_res.page_len += size;
- desc->count = count - size;
- desc->written += size;
return size;
}
+static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
+ struct splice_desc *sd)
+{
+ return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
+}
+
static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,15 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (ra && ra->p_set)
file->f_ra = ra->p_ra;
- if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
- rqstp->rq_resused = 1;
- host_err = file->f_op->sendfile(file, &offset, *count,
- nfsd_read_actor, rqstp);
+ if (file->f_op->splice_read && rqstp->rq_splice_ok) {
+ struct splice_desc sd = {
+ .len = 0,
+ .total_len = *count,
+ .pos = offset,
+ };
+
+ sd.data = rqstp;
+ host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
} else {
oldfs = get_fs();
set_fs(KERNEL_DS);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4a7ae8a..129d50f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -253,7 +253,7 @@ struct svc_rqst {
* determine what device number
* to report (real or virtual)
*/
- int rq_sendfile_ok; /* turned off in gss privacy
+ int rq_splice_ok; /* turned off in gss privacy
* to prevent encrypting page
* cache pages */
wait_queue_head_t rq_wait; /* synchronization */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 099a983..c094583 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -853,7 +853,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
u32 priv_len, maj_stat;
int pad, saved_len, remaining_len, offset;
- rqstp->rq_sendfile_ok = 0;
+ rqstp->rq_splice_ok = 0;
priv_len = svc_getnl(&buf->head[0]);
if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e673ef9..55ea6df 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -814,7 +814,7 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_base = NULL;
rqstp->rq_res.tail[0].iov_len = 0;
/* Will be turned off only in gss privacy case: */
- rqstp->rq_sendfile_ok = 1;
+ rqstp->rq_splice_ok = 1;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putnl(resv, 0);
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 10/18] splice: relay support
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (8 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 9/18] sendfile: convert nfsd to splice_direct_to_actor() Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 11/18] splice: divorce the splice structure/function definitions from the pipe header Jens Axboe
` (7 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Tom Zanussi, Jens Axboe
From: Tom Zanussi <zanussi@us.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
kernel/relay.c | 241 ++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 191 insertions(+), 50 deletions(-)
diff --git a/kernel/relay.c b/kernel/relay.c
index 4311101..3950c1b 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/cpu.h>
+#include <linux/pipe_fs_i.h>
/* list of open channels, for cpu hotplug */
static DEFINE_MUTEX(relay_channels_mutex);
@@ -121,6 +122,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size)
buf->page_array[i] = alloc_page(GFP_KERNEL);
if (unlikely(!buf->page_array[i]))
goto depopulate;
+ set_page_private(buf->page_array[i], (unsigned long)buf);
}
mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL);
if (!mem)
@@ -963,43 +965,6 @@ static int subbuf_read_actor(size_t read_start,
return ret;
}
-/*
- * subbuf_send_actor - send up to one subbuf's worth of data
- */
-static int subbuf_send_actor(size_t read_start,
- struct rchan_buf *buf,
- size_t avail,
- read_descriptor_t *desc,
- read_actor_t actor)
-{
- unsigned long pidx, poff;
- unsigned int subbuf_pages;
- int ret = 0;
-
- subbuf_pages = buf->chan->alloc_size >> PAGE_SHIFT;
- pidx = (read_start / PAGE_SIZE) % subbuf_pages;
- poff = read_start & ~PAGE_MASK;
- while (avail) {
- struct page *p = buf->page_array[pidx];
- unsigned int len;
-
- len = PAGE_SIZE - poff;
- if (len > avail)
- len = avail;
-
- len = actor(desc, p, poff, len);
- if (desc->error)
- break;
-
- avail -= len;
- ret += len;
- poff = 0;
- pidx = (pidx + 1) % subbuf_pages;
- }
-
- return ret;
-}
-
typedef int (*subbuf_actor_t) (size_t read_start,
struct rchan_buf *buf,
size_t avail,
@@ -1060,19 +1025,195 @@ static ssize_t relay_file_read(struct file *filp,
NULL, &desc);
}
-static ssize_t relay_file_sendfile(struct file *filp,
- loff_t *ppos,
- size_t count,
- read_actor_t actor,
- void *target)
+static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
- read_descriptor_t desc;
- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
- return relay_file_read_subbufs(filp, ppos, subbuf_send_actor,
- actor, &desc);
+ struct rchan_buf *rbuf;
+
+ rbuf = (struct rchan_buf *)page_private(buf->page);
+
+ rbuf->bytes_consumed += PAGE_SIZE;
+
+ if (rbuf->bytes_consumed == rbuf->chan->subbuf_size) {
+ relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
+ rbuf->bytes_consumed = 0;
+ }
+}
+
+static struct pipe_buf_operations relay_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .pin = generic_pipe_buf_pin,
+ .release = relay_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+/**
+ * subbuf_splice_actor - splice up to one subbuf's worth of data
+ */
+static int subbuf_splice_actor(struct file *in,
+ loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len,
+ unsigned int flags,
+ int *nonpad_ret)
+{
+ unsigned int pidx, poff;
+ unsigned int subbuf_pages;
+ int ret = 0;
+ int do_wakeup = 0;
+ struct rchan_buf *rbuf = in->private_data;
+ unsigned int subbuf_size = rbuf->chan->subbuf_size;
+ size_t read_start = ((size_t)*ppos) % rbuf->chan->alloc_size;
+ size_t avail = subbuf_size - read_start % subbuf_size;
+ size_t read_subbuf = read_start / subbuf_size;
+ size_t padding = rbuf->padding[read_subbuf];
+ size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
+
+ if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
+ return 0;
+
+ if (len > avail)
+ len = avail;
+
+ if (pipe->inode)
+ mutex_lock(&pipe->inode->i_mutex);
+
+ subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
+ pidx = (read_start / PAGE_SIZE) % subbuf_pages;
+ poff = read_start & ~PAGE_MASK;
+
+ for (;;) {
+ unsigned int this_len;
+ unsigned int this_end;
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
+
+ if (!pipe->readers) {
+ send_sig(SIGPIPE, current, 0);
+ if (!ret)
+ ret = -EPIPE;
+ break;
+ }
+
+ if (pipe->nrbufs < PIPE_BUFFERS) {
+ this_len = PAGE_SIZE - poff;
+ if (this_len > avail)
+ this_len = avail;
+
+ buf->page = rbuf->page_array[pidx];
+ buf->offset = poff;
+ this_end = read_start + ret + this_len;
+ if (this_end > nonpad_end) {
+ if (read_start + ret >= nonpad_end)
+ buf->len = 0;
+ else
+ buf->len = nonpad_end - (read_start + ret);
+ } else
+ buf->len = this_len;
+
+ *nonpad_ret += buf->len;
+
+ buf->ops = &relay_pipe_buf_ops;
+ pipe->nrbufs++;
+
+ avail -= this_len;
+ ret += this_len;
+ poff = 0;
+ pidx = (pidx + 1) % subbuf_pages;
+
+ if (pipe->inode)
+ do_wakeup = 1;
+
+ if (!avail)
+ break;
+
+ if (pipe->nrbufs < PIPE_BUFFERS)
+ continue;
+
+ break;
+ }
+
+ if (flags & SPLICE_F_NONBLOCK) {
+ if (!ret)
+ ret = -EAGAIN;
+ break;
+ }
+
+ if (signal_pending(current)) {
+ if (!ret)
+ ret = -ERESTARTSYS;
+ break;
+ }
+
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ do_wakeup = 0;
+ }
+
+ pipe->waiting_writers++;
+ pipe_wait(pipe);
+ pipe->waiting_writers--;
+ }
+
+ if (pipe->inode)
+ mutex_unlock(&pipe->inode->i_mutex);
+
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ }
+
+ return ret;
+}
+
+static ssize_t relay_file_splice_read(struct file *in,
+ loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len,
+ unsigned int flags)
+{
+ ssize_t spliced;
+ int ret;
+ int nonpad_ret = 0;
+
+ ret = 0;
+ spliced = 0;
+
+ while (len) {
+ ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
+ if (ret < 0)
+ break;
+ else if (!ret) {
+ break;
+ if (spliced)
+ break;
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+ }
+
+ *ppos += ret;
+ if (ret > len)
+ len = 0;
+ else
+ len -= ret;
+ spliced += nonpad_ret;
+ nonpad_ret = 0;
+ }
+
+ if (spliced)
+ return spliced;
+
+ return ret;
}
const struct file_operations relay_file_operations = {
@@ -1082,7 +1223,7 @@ const struct file_operations relay_file_operations = {
.read = relay_file_read,
.llseek = no_llseek,
.release = relay_file_release,
- .sendfile = relay_file_sendfile,
+ .splice_read = relay_file_splice_read,
};
EXPORT_SYMBOL_GPL(relay_file_operations);
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 11/18] splice: divorce the splice structure/function definitions from the pipe header
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (9 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 10/18] splice: relay support Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 12/18] pipe: allow passing around of ops private pointer Jens Axboe
` (6 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
We need to move even more stuff into the header so that folks can use
the splice_to_pipe() implementation instead of open-coding a lot of
pipe knowledge (see relay implementation), so move to our own header
file finally.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
drivers/block/loop.c | 2 +-
drivers/char/mem.c | 2 +-
fs/nfsd/vfs.c | 2 +-
fs/ocfs2/file.c | 2 +-
fs/read_write.c | 2 +-
fs/splice.c | 26 +++-------------
include/linux/pipe_fs_i.h | 41 -------------------------
include/linux/splice.h | 72 +++++++++++++++++++++++++++++++++++++++++++++
kernel/relay.c | 2 +-
9 files changed, 83 insertions(+), 68 deletions(-)
create mode 100644 include/linux/splice.h
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 562d329..8ffbac3 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -74,7 +74,7 @@
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/kthread.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include <asm/uaccess.h>
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index cc9a9d0..d2e4cfd 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -24,7 +24,7 @@
#include <linux/crash_dump.h>
#include <linux/backing-dev.h>
#include <linux/bootmem.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include <linux/pfn.h>
#include <asm/uaccess.h>
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 96f76e2..0b42580 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/major.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4f6033a..c549332 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include <linux/mount.h>
#include <linux/writeback.h>
diff --git a/fs/read_write.c b/fs/read_write.c
index 47da8a4..2527cf0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,7 +15,7 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include "read_write.h"
#include <asm/uaccess.h>
diff --git a/fs/splice.c b/fs/splice.c
index 837be82..15d3f4b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/pagemap.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
#include <linux/mm_inline.h>
#include <linux/swap.h>
#include <linux/writeback.h>
@@ -29,22 +29,6 @@
#include <linux/syscalls.h>
#include <linux/uio.h>
-struct partial_page {
- unsigned int offset;
- unsigned int len;
-};
-
-/*
- * Passed to splice_to_pipe
- */
-struct splice_pipe_desc {
- struct page **pages; /* page map */
- struct partial_page *partial; /* pages[] may not be contig */
- int nr_pages; /* number of pages in map */
- unsigned int flags; /* splice flags */
- const struct pipe_buf_operations *ops;/* ops associated with output pipe */
-};
-
/*
* Attempt to steal a page from a pipe buffer. This should perhaps go into
* a vm helper function, it's already simplified quite a bit by the
@@ -170,11 +154,11 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
};
/*
- * Pipe output worker. This sets up our pipe format with the page cache
- * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
+ * Pipe output worker. This fills a pipe with the information contained
+ * from splice_pipe_desc().
*/
-static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
{
int ret, do_wakeup, page_nr;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 1789ec7..7ba228d 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -71,45 +71,4 @@ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *);
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
-/*
- * splice is tied to pipes as a transport (at least for now), so we'll just
- * add the splice flags here.
- */
-#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */
-#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
- /* we may still block on the fd we splice */
- /* from/to, of course */
-#define SPLICE_F_MORE (0x04) /* expect more data */
-#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */
-
-/*
- * Passed to the actors
- */
-struct splice_desc {
- unsigned int len, total_len; /* current and remaining length */
- unsigned int flags; /* splice flags */
- /*
- * actor() private data
- */
- union {
- void __user *userptr; /* memory to write to */
- struct file *file; /* file to read/write */
- void *data; /* cookie */
- };
- loff_t pos; /* file position */
-};
-
-typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
- struct splice_desc *);
-typedef int (splice_direct_actor)(struct pipe_inode_info *,
- struct splice_desc *);
-
-extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
- loff_t *, size_t, unsigned int,
- splice_actor *);
-extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
- struct splice_desc *, splice_actor *);
-extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
- splice_direct_actor *);
-
#endif
diff --git a/include/linux/splice.h b/include/linux/splice.h
new file mode 100644
index 0000000..b3f1528
--- /dev/null
+++ b/include/linux/splice.h
@@ -0,0 +1,72 @@
+/*
+ * Function declerations and data structures related to the splice
+ * implementation.
+ *
+ * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
+ *
+ */
+#ifndef SPLICE_H
+#define SPLICE_H
+
+#include <linux/pipe_fs_i.h>
+
+/*
+ * splice is tied to pipes as a transport (at least for now), so we'll just
+ * add the splice flags here.
+ */
+#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */
+#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */
+ /* we may still block on the fd we splice */
+ /* from/to, of course */
+#define SPLICE_F_MORE (0x04) /* expect more data */
+#define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */
+
+/*
+ * Passed to the actors
+ */
+struct splice_desc {
+ unsigned int len, total_len; /* current and remaining length */
+ unsigned int flags; /* splice flags */
+ /*
+ * actor() private data
+ */
+ union {
+ void __user *userptr; /* memory to write to */
+ struct file *file; /* file to read/write */
+ void *data; /* cookie */
+ };
+ loff_t pos; /* file position */
+};
+
+struct partial_page {
+ unsigned int offset;
+ unsigned int len;
+};
+
+/*
+ * Passed to splice_to_pipe
+ */
+struct splice_pipe_desc {
+ struct page **pages; /* page map */
+ struct partial_page *partial; /* pages[] may not be contig */
+ int nr_pages; /* number of pages in map */
+ unsigned int flags; /* splice flags */
+ const struct pipe_buf_operations *ops;/* ops associated with output pipe */
+};
+
+typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
+ struct splice_desc *);
+typedef int (splice_direct_actor)(struct pipe_inode_info *,
+ struct splice_desc *);
+
+extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
+ loff_t *, size_t, unsigned int,
+ splice_actor *);
+extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
+ struct splice_desc *, splice_actor *);
+extern ssize_t splice_to_pipe(struct pipe_inode_info *,
+ struct splice_pipe_desc *);
+extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
+ splice_direct_actor *);
+
+#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index 3950c1b..ae13040 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -21,7 +21,7 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/cpu.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
/* list of open channels, for cpu hotplug */
static DEFINE_MUTEX(relay_channels_mutex);
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 12/18] pipe: allow passing around of ops private pointer
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (10 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 11/18] splice: divorce the splice structure/function definitions from the pipe header Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 13/18] relay: use splice_to_pipe() instead of open-coding the pipe loop Jens Axboe
` (5 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
relay needs this for proper consumption handling, and the network
receive support needs it as well to lookup the sk_buff on pipe
release.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/splice.c | 1 +
include/linux/pipe_fs_i.h | 1 +
include/linux/splice.h | 1 +
3 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 15d3f4b..ff62ac9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -184,6 +184,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
buf->page = spd->pages[page_nr];
buf->offset = spd->partial[page_nr].offset;
buf->len = spd->partial[page_nr].len;
+ buf->private = spd->partial[page_nr].private;
buf->ops = spd->ops;
if (spd->flags & SPLICE_F_GIFT)
buf->flags |= PIPE_BUF_FLAG_GIFT;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 7ba228d..4409167 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -14,6 +14,7 @@ struct pipe_buffer {
unsigned int offset, len;
const struct pipe_buf_operations *ops;
unsigned int flags;
+ unsigned long private;
};
struct pipe_inode_info {
diff --git a/include/linux/splice.h b/include/linux/splice.h
index b3f1528..1a1182b 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -41,6 +41,7 @@ struct splice_desc {
struct partial_page {
unsigned int offset;
unsigned int len;
+ unsigned long private;
};
/*
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 13/18] relay: use splice_to_pipe() instead of open-coding the pipe loop
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (11 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 12/18] pipe: allow passing around of ops private pointer Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 14/18] shmem: convert to using splice instead of sendfile() Jens Axboe
` (4 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
It cleans up the relay splice implementation a lot, and gets rid of
a lot of internal pipe knowledge that should not be in there.
Plus fixes for padding and partial first page (and lots more) from
Tom Zanussi.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
kernel/relay.c | 146 +++++++++++++++++++++-----------------------------------
1 files changed, 55 insertions(+), 91 deletions(-)
diff --git a/kernel/relay.c b/kernel/relay.c
index ae13040..0c2971e 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1025,19 +1025,23 @@ static ssize_t relay_file_read(struct file *filp,
NULL, &desc);
}
+static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
+{
+ rbuf->bytes_consumed += bytes_consumed;
+
+ if (rbuf->bytes_consumed >= rbuf->chan->subbuf_size) {
+ relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
+ rbuf->bytes_consumed %= rbuf->chan->subbuf_size;
+ }
+}
+
static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct rchan_buf *rbuf;
rbuf = (struct rchan_buf *)page_private(buf->page);
-
- rbuf->bytes_consumed += PAGE_SIZE;
-
- if (rbuf->bytes_consumed == rbuf->chan->subbuf_size) {
- relay_subbufs_consumed(rbuf->chan, rbuf->cpu, 1);
- rbuf->bytes_consumed = 0;
- }
+ relay_consume_bytes(rbuf, buf->private);
}
static struct pipe_buf_operations relay_pipe_buf_ops = {
@@ -1060,118 +1064,79 @@ static int subbuf_splice_actor(struct file *in,
unsigned int flags,
int *nonpad_ret)
{
- unsigned int pidx, poff;
- unsigned int subbuf_pages;
- int ret = 0;
- int do_wakeup = 0;
+ unsigned int pidx, poff, total_len, subbuf_pages, ret;
struct rchan_buf *rbuf = in->private_data;
unsigned int subbuf_size = rbuf->chan->subbuf_size;
size_t read_start = ((size_t)*ppos) % rbuf->chan->alloc_size;
- size_t avail = subbuf_size - read_start % subbuf_size;
size_t read_subbuf = read_start / subbuf_size;
size_t padding = rbuf->padding[read_subbuf];
size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .nr_pages = 0,
+ .partial = partial,
+ .flags = flags,
+ .ops = &relay_pipe_buf_ops,
+ };
if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
return 0;
- if (len > avail)
- len = avail;
-
- if (pipe->inode)
- mutex_lock(&pipe->inode->i_mutex);
+ /*
+ * Adjust read len, if longer than what is available
+ */
+ if (len > (subbuf_size - read_start % subbuf_size))
+ len = subbuf_size - read_start % subbuf_size;
subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
pidx = (read_start / PAGE_SIZE) % subbuf_pages;
poff = read_start & ~PAGE_MASK;
- for (;;) {
- unsigned int this_len;
- unsigned int this_end;
- int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
- struct pipe_buffer *buf = pipe->bufs + newbuf;
+ for (total_len = 0; spd.nr_pages < subbuf_pages; spd.nr_pages++) {
+ unsigned int this_len, this_end, private;
+ unsigned int cur_pos = read_start + total_len;
- if (!pipe->readers) {
- send_sig(SIGPIPE, current, 0);
- if (!ret)
- ret = -EPIPE;
+ if (!len)
break;
- }
-
- if (pipe->nrbufs < PIPE_BUFFERS) {
- this_len = PAGE_SIZE - poff;
- if (this_len > avail)
- this_len = avail;
-
- buf->page = rbuf->page_array[pidx];
- buf->offset = poff;
- this_end = read_start + ret + this_len;
- if (this_end > nonpad_end) {
- if (read_start + ret >= nonpad_end)
- buf->len = 0;
- else
- buf->len = nonpad_end - (read_start + ret);
- } else
- buf->len = this_len;
-
- *nonpad_ret += buf->len;
-
- buf->ops = &relay_pipe_buf_ops;
- pipe->nrbufs++;
- avail -= this_len;
- ret += this_len;
- poff = 0;
- pidx = (pidx + 1) % subbuf_pages;
+ this_len = min_t(unsigned long, len, PAGE_SIZE - poff);
+ private = this_len;
- if (pipe->inode)
- do_wakeup = 1;
+ spd.pages[spd.nr_pages] = rbuf->page_array[pidx];
+ spd.partial[spd.nr_pages].offset = poff;
- if (!avail)
- break;
-
- if (pipe->nrbufs < PIPE_BUFFERS)
- continue;
-
- break;
+ this_end = cur_pos + this_len;
+ if (this_end >= nonpad_end) {
+ this_len = nonpad_end - cur_pos;
+ private = this_len + padding;
}
+ spd.partial[spd.nr_pages].len = this_len;
+ spd.partial[spd.nr_pages].private = private;
- if (flags & SPLICE_F_NONBLOCK) {
- if (!ret)
- ret = -EAGAIN;
- break;
- }
+ len -= this_len;
+ total_len += this_len;
+ poff = 0;
+ pidx = (pidx + 1) % subbuf_pages;
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
+ if (this_end >= nonpad_end) {
+ spd.nr_pages++;
break;
}
-
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- do_wakeup = 0;
- }
-
- pipe->waiting_writers++;
- pipe_wait(pipe);
- pipe->waiting_writers--;
}
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ if (!spd.nr_pages)
+ return 0;
- if (do_wakeup) {
- smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- }
+ ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
+ if (ret < 0 || ret < total_len)
+ return ret;
- return ret;
+ if (read_start + ret == nonpad_end)
+ ret += padding;
+
+ return ret;
}
static ssize_t relay_file_splice_read(struct file *in,
@@ -1192,7 +1157,6 @@ static ssize_t relay_file_splice_read(struct file *in,
if (ret < 0)
break;
else if (!ret) {
- break;
if (spliced)
break;
if (flags & SPLICE_F_NONBLOCK) {
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 14/18] shmem: convert to using splice instead of sendfile()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (12 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 13/18] relay: use splice_to_pipe() instead of open-coding the pipe loop Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 15/18] sendfile: remove bad_sendfile() from bad_file_ops Jens Axboe
` (3 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Hugh Dickins, Jens Axboe
From: Hugh Dickins <hugh@veritas.com>
Remove shmem_file_sendfile and resurrect shmem_readpage, as used by tmpfs
to support loop and sendfile in 2.4 and 2.5. Now tmpfs can support splice,
loop and sendfile in the simplest way, using generic_file_splice_read and
generic_file_splice_write (with the aid of shmem_prepare_write).
We could make some efficiency tweaks later, if there's a real need;
but this is stable and works well as is.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
mm/shmem.c | 42 +++++++++++++++++-------------------------
1 files changed, 17 insertions(+), 25 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index b6aae2b..0493e4d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1100,9 +1100,9 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
* Normally, filepage is NULL on entry, and either found
* uptodate immediately, or allocated and zeroed, or read
* in under swappage, which is then assigned to filepage.
- * But shmem_prepare_write passes in a locked filepage,
- * which may be found not uptodate by other callers too,
- * and may need to be copied from the swappage read in.
+ * But shmem_readpage and shmem_prepare_write pass in a locked
+ * filepage, which may be found not uptodate by other callers
+ * too, and may need to be copied from the swappage read in.
*/
repeat:
if (!filepage)
@@ -1485,9 +1485,18 @@ static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_symlink_inline_operations;
/*
- * Normally tmpfs makes no use of shmem_prepare_write, but it
- * lets a tmpfs file be used read-write below the loop driver.
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * but providing them allows a tmpfs file to be used for splice, sendfile, and
+ * below the loop driver, in the generic fashion that many filesystems support.
*/
+static int shmem_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
+ unlock_page(page);
+ return error;
+}
+
static int
shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
@@ -1711,25 +1720,6 @@ static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count
return desc.error;
}
-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
-{
- read_descriptor_t desc;
-
- if (!count)
- return 0;
-
- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
-
- do_shmem_file_read(in_file, ppos, &desc, actor);
- if (desc.written)
- return desc.written;
- return desc.error;
-}
-
static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2386,6 +2376,7 @@ static const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
.set_page_dirty = __set_page_dirty_no_writeback,
#ifdef CONFIG_TMPFS
+ .readpage = shmem_readpage,
.prepare_write = shmem_prepare_write,
.commit_write = simple_commit_write,
#endif
@@ -2399,7 +2390,8 @@ static const struct file_operations shmem_file_operations = {
.read = shmem_file_read,
.write = shmem_file_write,
.fsync = simple_sync_file,
- .sendfile = shmem_file_sendfile,
+ .splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
#endif
};
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 15/18] sendfile: remove bad_sendfile() from bad_file_ops
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (13 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 14/18] shmem: convert to using splice instead of sendfile() Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 16/18] splice: completely document external interface with kerneldoc Jens Axboe
` (2 subsequent siblings)
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
do_sendfile() prefers splice over sendfile, so it should not trigger
(directly, at least).
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/bad_inode.c | 7 -------
1 files changed, 0 insertions(+), 7 deletions(-)
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee47..521ff7c 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
return -EIO;
}
-static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
-{
- return -EIO;
-}
-
static ssize_t bad_file_sendpage(struct file *file, struct page *page,
int off, size_t len, loff_t *pos, int more)
{
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
.aio_fsync = bad_file_aio_fsync,
.fasync = bad_file_fasync,
.lock = bad_file_lock,
- .sendfile = bad_file_sendfile,
.sendpage = bad_file_sendpage,
.get_unmapped_area = bad_file_get_unmapped_area,
.check_flags = bad_file_check_flags,
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 16/18] splice: completely document external interface with kerneldoc
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (14 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 15/18] sendfile: remove bad_sendfile() from bad_file_ops Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 17/18] ext2 xip: replace sendfile with splice Jens Axboe
2007-06-12 6:58 ` [PATCH 18/18] Remove remnants of sendfile() Jens Axboe
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
Also add fs/splice.c as a kerneldoc target with a smaller blurb that
should be expanded to better explain the overview of splice.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
Documentation/DocBook/kernel-api.tmpl | 11 ++++
fs/splice.c | 101 +++++++++++++++++++++++++++------
2 files changed, 94 insertions(+), 18 deletions(-)
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 38f88b6..8c5698a 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -643,4 +643,15 @@ X!Idrivers/video/console/fonts.c
!Edrivers/spi/spi.c
</chapter>
+ <chapter id="splice">
+ <title>splice API</title>
+ <para>)
+ splice is a method for moving blocks of data around inside the
+ kernel, without continually transferring it between the kernel
+ and user space.
+ </para>
+!Iinclude/linux/splice.h
+!Ffs/splice.c
+ </chapter>
+
</book>
diff --git a/fs/splice.c b/fs/splice.c
index ff62ac9..f24e367 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -153,9 +153,16 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
-/*
- * Pipe output worker. This fills a pipe with the information contained
- * from splice_pipe_desc().
+/**
+ * splice_to_pipe - fill passed data into a pipe
+ * @pipe: pipe to fill
+ * @spd: data to fill
+ *
+ * Description:
+ * @spd contains a map of pages and len/offset tupples, a long with
+ * the struct pipe_buf_operations associated with these pages. This
+ * function will link that data to the pipe.
+ *
*/
ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd)
@@ -453,11 +460,16 @@ fill_it:
/**
* generic_file_splice_read - splice data from file to a pipe
* @in: file to splice from
+ * @ppos: position in @in
* @pipe: pipe to splice to
* @len: number of bytes to splice
* @flags: splice modifier flags
*
- * Will read pages from given file and fill them into a pipe.
+ * Description:
+ * Will read pages from given file and fill them into a pipe. Can be
+ * used as long as the address_space operations for the source implements
+ * a readpage() hook.
+ *
*/
ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
@@ -646,10 +658,18 @@ out_ret:
return ret;
}
-/*
- * Pipe input worker. Most of this logic works like a regular pipe, the
- * key here is the 'actor' worker passed in that actually moves the data
- * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
+/**
+ * __splice_from_pipe - splice data from a pipe to given actor
+ * @pipe: pipe to splice from
+ * @sd: information to @actor
+ * @actor: handler that splices the data
+ *
+ * Description:
+ * This function does little more than loop over the pipe and call
+ * @actor to do the actual moving of a single struct pipe_buffer to
+ * the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ * pipe_to_user.
+ *
*/
ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
splice_actor *actor)
@@ -742,6 +762,20 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
}
EXPORT_SYMBOL(__splice_from_pipe);
+/**
+ * splice_from_pipe - splice data from a pipe to a file
+ * @pipe: pipe to splice from
+ * @out: file to splice to
+ * @ppos: position in @out
+ * @len: how many bytes to splice
+ * @flags: splice modifier flags
+ * @actor: handler that splices the data
+ *
+ * Description:
+ * See __splice_from_pipe. This function locks the input and output inodes,
+ * otherwise it's identical to __splice_from_pipe().
+ *
+ */
ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags,
splice_actor *actor)
@@ -773,12 +807,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
* generic_file_splice_write_nolock - generic_file_splice_write without mutexes
* @pipe: pipe info
* @out: file to write to
+ * @ppos: position in @out
* @len: number of bytes to splice
* @flags: splice modifier flags
*
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file. The caller is responsible
- * for acquiring i_mutex on both inodes.
+ * Description:
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file. The caller is responsible
+ * for acquiring i_mutex on both inodes.
*
*/
ssize_t
@@ -827,11 +863,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
* generic_file_splice_write - splice data from a pipe to a file
* @pipe: pipe info
* @out: file to write to
+ * @ppos: position in @out
* @len: number of bytes to splice
* @flags: splice modifier flags
*
- * Will either move or copy pages (determined by @flags options) from
- * the given pipe inode to the given file.
+ * Description:
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file.
*
*/
ssize_t
@@ -879,13 +917,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
/**
* generic_splice_sendpage - splice data from a pipe to a socket
- * @inode: pipe inode
+ * @pipe: pipe to splice from
* @out: socket to write to
+ * @ppos: position in @out
* @len: number of bytes to splice
* @flags: splice modifier flags
*
- * Will send @len bytes from the pipe to a network socket. No data copying
- * is involved.
+ * Description:
+ * Will send @len bytes from the pipe to a network socket. No data copying
+ * is involved.
*
*/
ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -939,8 +979,18 @@ static long do_splice_to(struct file *in, loff_t *ppos,
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
-/*
- * Splices from an input file to an actor, using a 'direct' pipe.
+/**
+ * splice_direct_to_actor - splices data directly between two non-pipes
+ * @in: file to splice from
+ * @sd: actor information on where to splice to
+ * @actor: handles the data splicing
+ *
+ * Description:
+ * This is a special case helper to splice directly between two
+ * points, without requiring an explicit pipe. Internally an allocated
+ * pipe is cached in the process, and reused during the life time of
+ * that process.
+ *
*/
ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
splice_direct_actor *actor)
@@ -1070,6 +1120,21 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
}
+/**
+ * do_splice_direct - splices data directly between two files
+ * @in: file to splice from
+ * @ppos: input file offset
+ * @out: file to splice to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Description:
+ * For use by do_sendfile(). splice can easily emulate sendfile, but
+ * doing it in the application would incur an extra system call
+ * (splice in + splice out, as compared to just sendfile()). So this helper
+ * can splice directly through a process-private pipe.
+ *
+ */
long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
size_t len, unsigned int flags)
{
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 17/18] ext2 xip: replace sendfile with splice
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (15 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 16/18] splice: completely document external interface with kerneldoc Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
2007-06-12 6:58 ` [PATCH 18/18] Remove remnants of sendfile() Jens Axboe
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Carsten Otte, Jens Axboe
From: Carsten Otte <cotte@de.ibm.com>
This patch removes xip_file_sendfile, and replaces it with
xip_file_splice_read. It passes the ltp testcase on a ext2 filesystem
that is mounted -o xip,rw.
I am not exactly happy with this patch, because it introduces waaaay too
much code duplication from fs/splice.c. I want to dig into making
generic_file_splice_read able to deal with xip files aswell. That would
allow to do the same for splice write.
If I cannot find a nicer solution, I'd vote for removing sendfile from
filemap_xip without replacement: a xip filesystem is intended to be used
for executables and libraries. This content is very rarely subject to
splice.
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/ext2/file.c | 2 +-
include/linux/fs.h | 4 +-
mm/filemap_xip.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 192 insertions(+), 20 deletions(-)
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 072a190..5f4a17c 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -70,7 +70,7 @@ const struct file_operations ext2_xip_file_operations = {
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .sendfile = xip_file_sendfile,
+ .splice_read = xip_file_splice_read,
};
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 61ebf32..a5bf63e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1764,13 +1764,11 @@ extern int nonseekable_open(struct inode * inode, struct file * filp);
#ifdef CONFIG_FS_XIP
extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
loff_t *ppos);
-extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor,
- void *target);
extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
size_t len, loff_t *ppos);
extern int xip_truncate_page(struct address_space *mapping, loff_t from);
+extern ssize_t xip_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
#else
static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
{
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index fa360e5..f1f3134 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -14,6 +14,7 @@
#include <linux/uio.h>
#include <linux/rmap.h>
#include <linux/sched.h>
+#include <linux/splice.h>
#include <asm/tlbflush.h>
#include "filemap.h"
@@ -92,7 +93,11 @@ do_xip_mapping_read(struct address_space *mapping,
if (unlikely(IS_ERR(page))) {
if (PTR_ERR(page) == -ENODATA) {
/* sparse */
- page = ZERO_PAGE(0);
+ page = xip_sparse_page();
+ if (!page) {
+ desc->error = -ENOMEM;
+ goto out;
+ }
} else {
desc->error = PTR_ERR(page);
goto out;
@@ -159,27 +164,196 @@ xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
}
EXPORT_SYMBOL_GPL(xip_file_read);
-ssize_t
-xip_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
+static int xip_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
- read_descriptor_t desc;
+ return 1;
+}
- if (!count)
+static void xip_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ page_cache_release(buf->page);
+}
+
+static const struct pipe_buf_operations xip_pipe_buf_ops = {
+ .can_merge = 0,
+ .pin = generic_pipe_buf_pin,
+ .get = generic_pipe_buf_get,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .release = xip_pipe_buf_release,
+ .steal = xip_pipe_buf_steal,
+};
+
+static int
+__xip_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct address_space *mapping = in->f_mapping;
+ unsigned int loff, nr_pages;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct page *page;
+ pgoff_t index, end_index;
+ loff_t isize;
+ size_t total_len;
+ int error, page_nr;
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &xip_pipe_buf_ops,
+ };
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ loff = *ppos & ~PAGE_CACHE_MASK;
+ nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ if (nr_pages > PIPE_BUFFERS)
+ nr_pages = PIPE_BUFFERS;
+
+ error = 0;
+ total_len = 0;
+
+ spd.nr_pages = 0;
+ while (spd.nr_pages < nr_pages) {
+ page = mapping->a_ops->get_xip_page(mapping,
+ index*(PAGE_SIZE/512), 0);
+ if (!page) {
+ error = -EIO;
+ break;
+ }
+ if (unlikely(IS_ERR(page))) {
+ if (PTR_ERR(page) == -ENODATA) {
+ /* sparse */
+ page = xip_sparse_page();
+ if (!page) {
+ error = -ENOMEM;
+ break;
+ }
+ } else {
+ error = PTR_ERR(page);
+ break;
+ }
+ }
+ page_cache_get(page);
+ pages[spd.nr_pages++] = page;
+ index++;
+ }
+
+ /*
+ * Now loop over the map and fill in the partial map
+ */
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ nr_pages = spd.nr_pages;
+ spd.nr_pages = 0;
+ for (page_nr = 0; page_nr < nr_pages; page_nr++) {
+ unsigned int this_len;
+
+ if (!len)
+ break;
+
+ /*
+ * this_len is the max we'll use from this page
+ */
+ this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+ page = pages[page_nr];
+ isize = i_size_read(mapping->host);
+ end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+ if (unlikely(!isize || index > end_index))
+ break;
+
+ /*
+ * if this is the last page, see if we need to shrink
+ * the length and stop
+ */
+ if (end_index == index) {
+ unsigned int plen;
+
+ /*
+ * max good bytes in this page
+ */
+ plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+ if (plen <= loff)
+ break;
+
+ /*
+ * force quit after adding this page
+ */
+ this_len = min(this_len, plen - loff);
+ len = this_len;
+ }
+
+ partial[page_nr].offset = loff;
+ partial[page_nr].len = this_len;
+ len -= this_len;
+ loff = 0;
+ spd.nr_pages++;
+ index++;
+ }
+
+ if (spd.nr_pages)
+ return splice_to_pipe(pipe, &spd);
+
+ return error;
+}
+
+/**
+ * xip_file_splice_read - splice data from file to a pipe
+ * @in: file to splice from
+ * @pipe: pipe to splice to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * derived from generic_file_splice_read in fs/splice_read.c
+ */
+ssize_t xip_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ ssize_t spliced;
+ int ret;
+ loff_t isize, left;
+
+ BUG_ON(!in->f_mapping->a_ops->get_xip_page);
+
+ isize = i_size_read(in->f_mapping->host);
+ if (unlikely(*ppos >= isize))
return 0;
- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
+ left = isize - *ppos;
+ if (unlikely(left < len))
+ len = left;
- do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file,
- ppos, &desc, actor);
- if (desc.written)
- return desc.written;
- return desc.error;
+ ret = 0;
+ spliced = 0;
+ while (len) {
+ ret = __xip_file_splice_read(in, ppos, pipe, len, flags);
+
+ if (ret < 0)
+ break;
+ else if (!ret) {
+ if (spliced)
+ break;
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+ }
+
+ *ppos += ret;
+ len -= ret;
+ spliced += ret;
+ }
+
+ if (spliced)
+ return spliced;
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(xip_file_sendfile);
+EXPORT_SYMBOL(xip_file_splice_read);
/*
* __xip_unmap is invoked from xip_unmap and
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread* [PATCH 18/18] Remove remnants of sendfile()
2007-06-12 6:57 [PATCH 0/18] Convert sendfile to splice Jens Axboe
` (16 preceding siblings ...)
2007-06-12 6:58 ` [PATCH 17/18] ext2 xip: replace sendfile with splice Jens Axboe
@ 2007-06-12 6:58 ` Jens Axboe
17 siblings, 0 replies; 23+ messages in thread
From: Jens Axboe @ 2007-06-12 6:58 UTC (permalink / raw)
To: linux-kernel; +Cc: Jens Axboe
There are now zero users of .sendfile() in the kernel, so kill
it from the file_operations structure and in do_sendfile().
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
fs/read_write.c | 26 +++++++++++---------------
include/linux/fs.h | 3 +--
2 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 2527cf0..507ddff 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -724,8 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
in_inode = in_file->f_path.dentry->d_inode;
if (!in_inode)
goto fput_in;
- if (!in_file->f_op || (!in_file->f_op->sendfile &&
- !in_file->f_op->splice_read))
+ if (!in_file->f_op || !in_file->f_op->splice_read)
goto fput_in;
retval = -ESPIPE;
if (!ppos)
@@ -778,21 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
count = max - pos;
}
- if (in_file->f_op->splice_read) {
- fl = 0;
+ fl = 0;
#if 0
- /*
- * We need to debate whether we can enable this or not. The
- * man page documents EAGAIN return for the output at least,
- * and the application is arguably buggy if it doesn't expect
- * EAGAIN on a non-blocking file descriptor.
- */
- if (in_file->f_flags & O_NONBLOCK)
- fl = SPLICE_F_NONBLOCK;
+ /*
+ * We need to debate whether we can enable this or not. The
+ * man page documents EAGAIN return for the output at least,
+ * and the application is arguably buggy if it doesn't expect
+ * EAGAIN on a non-blocking file descriptor.
+ */
+ if (in_file->f_flags & O_NONBLOCK)
+ fl = SPLICE_F_NONBLOCK;
#endif
- retval = do_splice_direct(in_file, ppos, out_file, count, fl);
- } else
- retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
+ retval = do_splice_direct(in_file, ppos, out_file, count, fl);
if (retval > 0) {
add_rchar(current, retval);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a5bf63e..5ef52bc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1054,7 +1054,7 @@ struct block_device_operations {
};
/*
- * "descriptor" for what we're up to with a read for sendfile().
+ * "descriptor" for what we're up to with a read.
* This allows us to use the same read code yet
* have multiple different users of the data that
* we read from a file.
@@ -1105,7 +1105,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
--
1.5.2.1.174.gcd03
^ permalink raw reply related [flat|nested] 23+ messages in thread