From: Josef Bacik <josef@redhat.com>
To: Oren Laadan <orenl@cs.columbia.edu>
Cc: Andrew Morton <akpm@linux-foundation.org>,
containers@lists.linux-foundation.org,
linux-kernel@vger.kernel.org, Serge Hallyn <serue@us.ibm.com>,
Matt Helsley <matthltc@us.ibm.com>,
Pavel Emelyanov <xemul@openvz.org>,
linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v21 019/100] Make file_pos_read/write() public and export kernel_write()
Date: Thu, 6 May 2010 08:26:19 -0400 [thread overview]
Message-ID: <20100506122618.GA2327@localhost.localdomain> (raw)
In-Reply-To: <1272723382-19470-20-git-send-email-orenl@cs.columbia.edu>
On Sat, May 01, 2010 at 10:15:01AM -0400, Oren Laadan wrote:
> These three are used in a subsequent patch to allow the kernel c/r
> code to call vfs_read/write() to read and write data to and from the
> checkpoint image.
>
> This patch makes the following changes:
>
> 1) Move kernel_write() from fs/splice.c to fs/exec.c to be near
> kernel_read()
>
> 2) Make kernel_read/write() iterate if they face partial reads or
> writes, and retry if they face -EAGAIN.
>
> 3) Adjust prototypes of kernel_read/write() to use size_t and ssize_t
>
> 4) Move file_pos_read/write() to include/linux/fs.h
>
> Changelog [ckpt-v21]
> - Introduce kernel_write(), fix kernel_read()
>
> Cc: linux-fsdevel@vger.kernel.org
> Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
> Acked-by: Serge E. Hallyn <serue@us.ibm.com>
> ---
> fs/exec.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++----
> fs/read_write.c | 10 -------
> fs/splice.c | 17 +------------
> include/linux/fs.h | 13 +++++++++-
> 4 files changed, 77 insertions(+), 32 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index 49cdaa1..7bacb6a 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -693,23 +693,82 @@ exit:
> }
> EXPORT_SYMBOL(open_exec);
>
> -int kernel_read(struct file *file, loff_t offset,
> - char *addr, unsigned long count)
> +static ssize_t _kernel_read(struct file *file, loff_t offset,
> + char __user *ubuf, size_t count)
> {
> - mm_segment_t old_fs;
> + ssize_t nread;
> + size_t nleft;
> loff_t pos = offset;
> - int result;
> +
> + for (nleft = count; nleft; nleft -= nread) {
> + nread = vfs_read(file, ubuf, nleft, &pos);
> + if (nread <= 0) {
> + if (nread == -EAGAIN) {
> + nread = 0;
> + continue;
> + } else if (nread == 0)
> + break;
> + else
> + return nread;
> + }
> + ubuf += nread;
> + }
> + return count - nleft;
> +}
> +
> +ssize_t kernel_read(struct file *file, loff_t offset,
> + char *addr, size_t count)
> +{
> + mm_segment_t old_fs;
> + ssize_t result;
>
> old_fs = get_fs();
> set_fs(get_ds());
> /* The cast to a user pointer is valid due to the set_fs() */
> - result = vfs_read(file, (void __user *)addr, count, &pos);
> + result = _kernel_read(file, offset, (void __user *)addr, count);
> set_fs(old_fs);
> return result;
> }
>
> EXPORT_SYMBOL(kernel_read);
>
> +static ssize_t _kernel_write(struct file *file, loff_t offset,
> + const char __user *ubuf, size_t count)
> +{
> + ssize_t nwrite;
> + size_t nleft;
> + loff_t pos = offset;
> +
> + for (nleft = count; nleft; nleft -= nwrite) {
> + nwrite = vfs_write(file, ubuf, nleft, &pos);
> + if (nwrite < 0) {
> + if (nwrite == -EAGAIN) {
> + nwrite = 0;
> + continue;
> + } else
> + return nwrite;
> + }
> + ubuf += nwrite;
> + }
> + return count - nleft;
> +}
I'm not entirely sure if this can happen, but if vfs_write doesn't write
anything, but doesn't have an error, we could end up in an infinite loop. Like
I said I'm not sure if thats even possible, but its definitely one of those
things that if it is possible some random security guy is going to figure out
how to exploit it at some point down the line.
> +
> +ssize_t kernel_write(struct file *file, loff_t offset,
> + const char *addr, size_t count)
> +{
> + mm_segment_t old_fs;
> + ssize_t result;
> +
> + old_fs = get_fs();
> + set_fs(get_ds());
> + /* The cast to a user pointer is valid due to the set_fs() */
> + result = _kernel_write(file, offset, (void __user *)addr, count);
> + set_fs(old_fs);
> + return result;
> +}
> +
> +EXPORT_SYMBOL(kernel_write);
> +
> static int exec_mmap(struct mm_struct *mm)
> {
> struct task_struct *tsk;
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 113386d..67b7d83 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -361,16 +361,6 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
>
> EXPORT_SYMBOL(vfs_write);
>
> -static inline loff_t file_pos_read(struct file *file)
> -{
> - return file->f_pos;
> -}
> -
> -static inline void file_pos_write(struct file *file, loff_t pos)
> -{
> - file->f_pos = pos;
> -}
> -
> SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
> {
> struct file *file;
> diff --git a/fs/splice.c b/fs/splice.c
> index 9313b61..188e17d 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -538,21 +538,6 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
> return res;
> }
>
> -static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
> - loff_t pos)
> -{
> - mm_segment_t old_fs;
> - ssize_t res;
> -
> - old_fs = get_fs();
> - set_fs(get_ds());
> - /* The cast to a user pointer is valid due to the set_fs() */
> - res = vfs_write(file, (const char __user *)buf, count, &pos);
> - set_fs(old_fs);
> -
> - return res;
> -}
> -
> ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
> struct pipe_inode_info *pipe, size_t len,
> unsigned int flags)
> @@ -1011,7 +996,7 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
> return ret;
>
> data = buf->ops->map(pipe, buf, 0);
> - ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
> + ret = kernel_write(sd->u.file, sd->pos, data + buf->offset, sd->len);
> buf->ops->unmap(pipe, buf, data);
>
> return ret;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 39d57bc..9e8b171 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1548,6 +1548,16 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
> struct iovec *fast_pointer,
> struct iovec **ret_pointer);
>
> +static inline loff_t file_pos_read(struct file *file)
> +{
> + return file->f_pos;
> +}
> +
> +static inline void file_pos_write(struct file *file, loff_t pos)
> +{
> + file->f_pos = pos;
> +}
> +
> extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
> extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
> extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
> @@ -2127,7 +2137,8 @@ extern struct file *do_filp_open(int dfd, const char *pathname,
> int open_flag, int mode, int acc_mode);
> extern int may_open(struct path *, int, int);
>
> -extern int kernel_read(struct file *, loff_t, char *, unsigned long);
> +extern ssize_t kernel_read(struct file *, loff_t, char *, size_t);
> +extern ssize_t kernel_write(struct file *, loff_t, const char *, size_t);
> extern struct file * open_exec(const char *);
>
> /* fs/dcache.c -- generic fs support functions */
> --
I'd say fix that little nit I had above and you can add
Reviewed-by: Josef Bacik <josef@redhat.com>
Thanks,
Josef
next prev parent reply other threads:[~2010-05-06 12:26 UTC|newest]
Thread overview: 126+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-05-01 14:14 [PATCH v21 00/100] Kernel based checkpoint/restart Oren Laadan
2010-05-01 14:14 ` [PATCH v21 001/100] eclone (1/11): Factor out code to allocate pidmap page Oren Laadan
2010-05-01 22:10 ` David Miller
2010-05-02 0:14 ` Josh Boyer
2010-05-02 0:25 ` Matt Helsley
2010-05-03 8:48 ` Brian K. White
2010-05-03 21:02 ` Dave Hansen
2010-05-03 21:12 ` David Miller
2010-05-01 14:14 ` [PATCH v21 002/100] eclone (2/11): Have alloc_pidmap() return actual error code Oren Laadan
2010-05-01 14:14 ` [PATCH v21 003/100] eclone (3/11): Define set_pidmap() function Oren Laadan
2010-05-01 14:14 ` [PATCH v21 004/100] eclone (4/11): Add target_pids parameter to alloc_pid() Oren Laadan
2010-05-01 14:14 ` [PATCH v21 005/100] eclone (5/11): Add target_pids parameter to copy_process() Oren Laadan
2010-05-01 14:14 ` [PATCH v21 006/100] eclone (6/11): Check invalid clone flags Oren Laadan
2010-05-01 14:14 ` [PATCH v21 007/100] eclone (7/11): Define do_fork_with_pids() Oren Laadan
2010-05-01 14:14 ` [PATCH v21 008/100] eclone (8/11): Implement sys_eclone for x86 (32,64) Oren Laadan
2010-05-01 14:14 ` [PATCH v21 009/100] eclone (9/11): Implement sys_eclone for s390 Oren Laadan
2010-05-01 14:14 ` [PATCH v21 010/100] eclone (10/11): Implement sys_eclone for powerpc Oren Laadan
2010-05-01 14:14 ` [PATCH v21 011/100] eclone (11/11): Document sys_eclone Oren Laadan
2010-05-05 21:14 ` Randy Dunlap
2010-05-05 22:25 ` Sukadev Bhattiprolu
2010-05-01 14:14 ` [PATCH v21 012/100] c/r: extend arch_setup_additional_pages() Oren Laadan
2010-05-01 14:14 ` [PATCH v21 013/100] c/r: break out new_user_ns() Oren Laadan
2010-05-01 14:14 ` [PATCH v21 014/100] c/r: split core function out of some set*{u,g}id functions Oren Laadan
2010-05-01 14:14 ` [PATCH v21 015/100] cgroup freezer: Update stale locking comments Oren Laadan
2010-05-06 19:40 ` Rafael J. Wysocki
2010-05-06 20:31 ` Matt Helsley
2010-05-06 22:34 ` Matt Helsley
2010-05-06 21:25 ` Oren Laadan
2010-05-10 21:01 ` Rafael J. Wysocki
2010-05-10 21:07 ` Matt Helsley
2010-05-10 21:12 ` Rafael J. Wysocki
2010-05-01 14:14 ` [PATCH v21 016/100] cgroup freezer: Add CHECKPOINTING state to safeguard container checkpoint Oren Laadan
2010-05-01 14:14 ` [PATCH v21 017/100] cgroup freezer: interface to freeze a cgroup from within the kernel Oren Laadan
2010-05-01 14:15 ` [PATCH v21 018/100] Namespaces submenu Oren Laadan
2010-05-01 14:15 ` [PATCH v21 019/100] Make file_pos_read/write() public and export kernel_write() Oren Laadan
2010-05-06 12:26 ` Josef Bacik [this message]
2010-05-01 14:15 ` [PATCH v21 020/100] c/r: documentation Oren Laadan
2010-05-06 20:27 ` Randy Dunlap
2010-05-07 6:54 ` Oren Laadan
2010-05-01 14:15 ` [PATCH v21 021/100] c/r: create syscalls: sys_checkpoint, sys_restart Oren Laadan
2010-05-01 14:15 ` [PATCH v21 022/100] c/r: basic infrastructure for checkpoint/restart Oren Laadan
2010-05-01 14:15 ` [PATCH v21 023/100] c/r: x86_32 support " Oren Laadan
2010-05-01 14:15 ` [PATCH v21 024/100] c/r: x86-64: checkpoint/restart implementation Oren Laadan
2010-05-01 14:15 ` [PATCH v21 025/100] c/r: external checkpoint of a task other than ourself Oren Laadan
2010-05-01 14:15 ` [PATCH v21 026/100] c/r: export functionality used in next patch for restart-blocks Oren Laadan
2010-05-01 14:15 ` [PATCH v21 027/100] c/r: restart-blocks Oren Laadan
2010-05-01 14:15 ` [PATCH v21 028/100] c/r: checkpoint multiple processes Oren Laadan
2010-05-01 14:15 ` [PATCH v21 029/100] c/r: restart " Oren Laadan
2010-05-01 14:15 ` [PATCH v21 030/100] c/r: introduce PF_RESTARTING, and skip notification on exit Oren Laadan
2010-05-01 14:15 ` [PATCH v21 031/100] c/r: support for zombie processes Oren Laadan
2010-05-01 14:15 ` [PATCH v21 032/100] c/r: Save and restore the [compat_]robust_list member of the task struct Oren Laadan
2010-05-03 16:10 ` Darren Hart
2010-05-03 18:02 ` Matt Helsley
2010-05-01 14:15 ` [PATCH v21 033/100] c/r: infrastructure for shared objects Oren Laadan
2010-05-01 14:15 ` [PATCH v21 034/100] c/r: detect resource leaks for whole-container checkpoint Oren Laadan
2010-05-01 14:15 ` [PATCH v21 035/100] deferqueue: generic queue to defer work Oren Laadan
2010-05-01 14:15 ` [PATCH v21 036/100] c/r: introduce vfs_fcntl() Oren Laadan
2010-05-01 14:15 ` [PATCH v21 037/100] c/r: introduce new 'file_operations': ->checkpoint, ->collect() Oren Laadan
2010-05-01 14:15 ` [PATCH v21 038/100] c/r: checkpoint and restart open file descriptors Oren Laadan
2010-05-01 14:15 ` [PATCH v21 039/100] c/r: introduce method '->checkpoint()' in struct vm_operations_struct Oren Laadan
2010-05-01 14:15 ` [PATCH v21 040/100] Introduce FOLL_DIRTY to follow_page() for "dirty" pages Oren Laadan
2010-05-01 14:15 ` [PATCH v21 041/100] c/r: dump memory address space (private memory) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 042/100] c/r: add generic '->checkpoint' f_op to ext fses Oren Laadan
2010-05-01 14:15 ` [PATCH v21 043/100] c/r: add generic '->checkpoint()' f_op to simple devices Oren Laadan
2010-05-01 14:15 ` [PATCH v21 044/100] c/r: add checkpoint operation for opened files of generic filesystems Oren Laadan
2010-05-01 14:15 ` [PATCH v21 045/100] c/r: export shmem_getpage() to support shared memory Oren Laadan
2010-05-01 14:15 ` [PATCH v21 046/100] c/r: dump anonymous- and file-mapped- " Oren Laadan
2010-05-01 14:15 ` [PATCH v21 047/100] splice: export pipe/file-to-pipe/file functionality Oren Laadan
2010-05-01 14:15 ` [PATCH v21 048/100] c/r: support for open pipes Oren Laadan
2010-05-01 14:15 ` [PATCH v21 049/100] c/r: checkpoint and restore FIFOs Oren Laadan
2010-05-01 14:15 ` [PATCH v21 050/100] c/r: refuse to checkpoint if monitoring directories with dnotify Oren Laadan
2010-05-01 14:15 ` [PATCH v21 051/100] c/r: make ckpt_may_checkpoint_task() check each namespace individually Oren Laadan
2010-05-01 14:15 ` [PATCH v21 052/100] c/r: support for UTS namespace Oren Laadan
2010-05-01 14:15 ` [PATCH v21 053/100] c/r (ipc): allow allocation of a desired ipc identifier Oren Laadan
2010-05-07 16:32 ` Manfred Spraul
2010-05-07 17:08 ` Oren Laadan
2010-05-01 14:15 ` [PATCH v21 054/100] c/r: save and restore sysvipc namespace basics Oren Laadan
2010-05-01 14:15 ` [PATCH v21 055/100] c/r: support share-memory sysv-ipc Oren Laadan
2010-05-01 14:15 ` [PATCH v21 056/100] c/r: support message-queues sysv-ipc Oren Laadan
2010-05-01 14:15 ` [PATCH v21 057/100] c/r: support semaphore sysv-ipc Oren Laadan
2010-05-01 14:15 ` [PATCH v21 058/100] c/r: (s390): expose a constant for the number of words (CRs) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 059/100] c/r: add CKPT_COPY() macro Oren Laadan
2010-05-01 14:15 ` [PATCH v21 060/100] c/r: define s390-specific checkpoint-restart code Oren Laadan
2010-05-01 14:15 ` [PATCH v21 061/100] c/r: capabilities: define checkpoint and restore fns Oren Laadan
2010-05-01 14:15 ` [PATCH v21 062/100] c/r: checkpoint and restore task credentials Oren Laadan
2010-05-01 14:15 ` [PATCH v21 063/100] c/r: restore file->f_cred Oren Laadan
2010-05-01 14:15 ` [PATCH v21 064/100] c/r: checkpoint and restore (shared) task's sighand_struct Oren Laadan
2010-05-01 14:15 ` [PATCH v21 065/100] c/r: [signal 1/4] blocked and template for shared signals Oren Laadan
2010-05-01 14:15 ` [PATCH v21 066/100] c/r: [signal 2/4] checkpoint/restart of rlimit Oren Laadan
2010-05-01 14:15 ` [PATCH v21 067/100] c/r: [signal 3/4] pending signals (private, shared) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 068/100] c/r: [signal 4/4] support for real/virt/prof itimers Oren Laadan
2010-05-01 14:15 ` [PATCH v21 069/100] Expose may_setuid() in user.h and add may_setgid() (v2) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 070/100] c/r: correctly restore pgid Oren Laadan
2010-05-01 14:15 ` [PATCH v21 071/100] Add common socket helpers to unify the security hooks Oren Laadan
2010-05-01 14:15 ` [PATCH v21 072/100] c/r: introduce checkpoint/restore methods to struct proto_ops Oren Laadan
2010-05-01 14:15 ` [PATCH v21 073/100] c/r: Add AF_UNIX support (v12) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 074/100] c/r: add support for listening INET sockets (v2) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 075/100] c/r: add support for connected INET sockets (v5) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 076/100] c/r: [pty 1/2] allow allocation of desired pty slave Oren Laadan
2010-05-01 14:15 ` [PATCH v21 077/100] c/r: [pty 2/2] support for pseudo terminals Oren Laadan
2010-05-01 14:16 ` [PATCH v21 078/100] c/r: support for controlling terminal and job control Oren Laadan
2010-05-01 14:16 ` [PATCH v21 079/100] c/r: checkpoint/restart epoll sets Oren Laadan
2010-05-01 14:16 ` [PATCH v21 080/100] c/r: checkpoint/restart eventfd Oren Laadan
2010-05-01 14:16 ` [PATCH v21 081/100] c/r: restore task fs_root and pwd (v3) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 082/100] c/r: preliminary support mounts namespace Oren Laadan
2010-05-01 14:16 ` [PATCH v21 083/100] c/r: nested pid namespaces (v3) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 084/100] powerpc: reserve checkpoint arch identifiers Oren Laadan
2010-05-01 14:16 ` [PATCH v21 085/100] powerpc: provide APIs for validating and updating DABR Oren Laadan
2010-05-01 14:16 ` [PATCH v21 086/100] powerpc: checkpoint/restart implementation Oren Laadan
2010-05-01 14:16 ` [PATCH v21 087/100] powerpc: wire up checkpoint and restart syscalls Oren Laadan
2010-05-01 14:16 ` [PATCH v21 088/100] powerpc: enable checkpoint support in Kconfig Oren Laadan
2010-05-01 14:16 ` [PATCH v21 089/100] c/r: add lsm name and lsm_info (policy header) to container info Oren Laadan
2010-05-01 14:16 ` [PATCH v21 090/100] c/r: add generic LSM c/r support (v7) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 091/100] c/r: add smack support to lsm c/r (v4) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 092/100] c/r: add selinux support (v6) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 093/100] c/r: Add checkpoint and collect hooks to net_device_ops Oren Laadan
2010-05-01 14:16 ` [PATCH v21 094/100] c/r: Basic support for network namespaces and devices (v6) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 095/100] c/r: Add rtnl_dellink() helper Oren Laadan
2010-05-01 14:16 ` [PATCH v21 096/100] c/r: Add checkpoint support for veth devices (v2) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 097/100] c/r: Add loopback checkpoint support (v2) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 098/100] c/r: Add a checkpoint handler to the 'sit' device Oren Laadan
2010-05-01 14:16 ` [PATCH v21 099/100] c/r: Add checkpoint support to macvlan driver Oren Laadan
2010-05-01 14:16 ` [PATCH v21 100/100] c/r: add an entry for checkpoint/restart in MAINTAINERS Oren Laadan
2010-05-01 15:17 ` [PATCH v21 00/100] Kernel based checkpoint/restart Oren Laadan
2010-05-04 14:43 ` [PATCH v21 001/100] eclone (1/11): Factor out code to allocate pidmap page David Howells
2010-05-05 15:13 ` Oren Laadan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100506122618.GA2327@localhost.localdomain \
--to=josef@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=containers@lists.linux-foundation.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=matthltc@us.ibm.com \
--cc=orenl@cs.columbia.edu \
--cc=serue@us.ibm.com \
--cc=xemul@openvz.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).