linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Josef Bacik <josef@redhat.com>
To: Oren Laadan <orenl@cs.columbia.edu>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	containers@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, Serge Hallyn <serue@us.ibm.com>,
	Matt Helsley <matthltc@us.ibm.com>,
	Pavel Emelyanov <xemul@openvz.org>,
	linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v21 019/100] Make file_pos_read/write() public and export kernel_write()
Date: Thu, 6 May 2010 08:26:19 -0400	[thread overview]
Message-ID: <20100506122618.GA2327@localhost.localdomain> (raw)
In-Reply-To: <1272723382-19470-20-git-send-email-orenl@cs.columbia.edu>

On Sat, May 01, 2010 at 10:15:01AM -0400, Oren Laadan wrote:
> These three are used in a subsequent patch to allow the kernel c/r
> code to call vfs_read/write() to read and write data to and from the
> checkpoint image.
> 
> This patch makes the following changes:
> 
> 1) Move kernel_write() from fs/splice.c to fs/exec.c to be near
> kernel_read()
> 
> 2) Make kernel_read/write() iterate if they face partial reads or
> writes, and retry if they face -EAGAIN.
> 
> 3) Adjust prototypes of kernel_read/write() to use size_t and ssize_t
> 
> 4) Move file_pos_read/write() to include/linux/fs.h
> 
> Changelog [ckpt-v21]
>   - Introduce kernel_write(), fix kernel_read()
> 
> Cc: linux-fsdevel@vger.kernel.org
> Signed-off-by: Oren Laadan <orenl@cs.columbia.edu>
> Acked-by: Serge E. Hallyn <serue@us.ibm.com>
> ---
>  fs/exec.c          |   69 ++++++++++++++++++++++++++++++++++++++++++++++++----
>  fs/read_write.c    |   10 -------
>  fs/splice.c        |   17 +------------
>  include/linux/fs.h |   13 +++++++++-
>  4 files changed, 77 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 49cdaa1..7bacb6a 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -693,23 +693,82 @@ exit:
>  }
>  EXPORT_SYMBOL(open_exec);
>  
> -int kernel_read(struct file *file, loff_t offset,
> -		char *addr, unsigned long count)
> +static ssize_t _kernel_read(struct file *file, loff_t offset,
> +			    char __user *ubuf, size_t count)
>  {
> -	mm_segment_t old_fs;
> +	ssize_t nread;
> +	size_t nleft;
>  	loff_t pos = offset;
> -	int result;
> +
> +	for (nleft = count; nleft; nleft -= nread) {
> +		nread = vfs_read(file, ubuf, nleft, &pos);
> +		if (nread <= 0) {
> +			if (nread == -EAGAIN) {
> +				nread = 0;
> +				continue;
> +			} else if (nread == 0)
> +				break;
> +			else
> +				return nread;
> +		}
> +		ubuf += nread;
> +	}
> +	return count - nleft;
> +}
> +
> +ssize_t kernel_read(struct file *file, loff_t offset,
> +		    char *addr, size_t count)
> +{
> +	mm_segment_t old_fs;
> +	ssize_t result;
>  
>  	old_fs = get_fs();
>  	set_fs(get_ds());
>  	/* The cast to a user pointer is valid due to the set_fs() */
> -	result = vfs_read(file, (void __user *)addr, count, &pos);
> +	result = _kernel_read(file, offset, (void __user *)addr, count);
>  	set_fs(old_fs);
>  	return result;
>  }
>  
>  EXPORT_SYMBOL(kernel_read);
>  
> +static ssize_t _kernel_write(struct file *file, loff_t offset,
> +			     const char __user *ubuf, size_t count)
> +{
> +	ssize_t nwrite;
> +	size_t nleft;
> +	loff_t pos = offset;
> +
> +	for (nleft = count; nleft; nleft -= nwrite) {
> +		nwrite = vfs_write(file, ubuf, nleft, &pos);
> +		if (nwrite < 0) {
> +			if (nwrite == -EAGAIN) {
> +				nwrite = 0;
> +				continue;
> +			} else
> +				return nwrite;
> +		}
> +		ubuf += nwrite;
> +	}
> +	return count - nleft;
> +}

I'm not entirely sure if this can happen, but if vfs_write doesn't write
anything, but doesn't have an error, we could end up in an infinite loop.  Like
I said I'm not sure if thats even possible, but its definitely one of those
things that if it is possible some random security guy is going to figure out
how to exploit it at some point down the line.

> +
> +ssize_t kernel_write(struct file *file, loff_t offset,
> +		     const char *addr, size_t count)
> +{
> +	mm_segment_t old_fs;
> +	ssize_t result;
> +
> +	old_fs = get_fs();
> +	set_fs(get_ds());
> +	/* The cast to a user pointer is valid due to the set_fs() */
> +	result = _kernel_write(file, offset, (void __user *)addr, count);
> +	set_fs(old_fs);
> +	return result;
> +}
> +
> +EXPORT_SYMBOL(kernel_write);
> +
>  static int exec_mmap(struct mm_struct *mm)
>  {
>  	struct task_struct *tsk;
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 113386d..67b7d83 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -361,16 +361,6 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
>  
>  EXPORT_SYMBOL(vfs_write);
>  
> -static inline loff_t file_pos_read(struct file *file)
> -{
> -	return file->f_pos;
> -}
> -
> -static inline void file_pos_write(struct file *file, loff_t pos)
> -{
> -	file->f_pos = pos;
> -}
> -
>  SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
>  {
>  	struct file *file;
> diff --git a/fs/splice.c b/fs/splice.c
> index 9313b61..188e17d 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -538,21 +538,6 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
>  	return res;
>  }
>  
> -static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
> -			    loff_t pos)
> -{
> -	mm_segment_t old_fs;
> -	ssize_t res;
> -
> -	old_fs = get_fs();
> -	set_fs(get_ds());
> -	/* The cast to a user pointer is valid due to the set_fs() */
> -	res = vfs_write(file, (const char __user *)buf, count, &pos);
> -	set_fs(old_fs);
> -
> -	return res;
> -}
> -
>  ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
>  				 struct pipe_inode_info *pipe, size_t len,
>  				 unsigned int flags)
> @@ -1011,7 +996,7 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
>  		return ret;
>  
>  	data = buf->ops->map(pipe, buf, 0);
> -	ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
> +	ret = kernel_write(sd->u.file, sd->pos, data + buf->offset, sd->len);
>  	buf->ops->unmap(pipe, buf, data);
>  
>  	return ret;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 39d57bc..9e8b171 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1548,6 +1548,16 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
>  				struct iovec *fast_pointer,
>  				struct iovec **ret_pointer);
>  
> +static inline loff_t file_pos_read(struct file *file)
> +{
> +	return file->f_pos;
> +}
> +
> +static inline void file_pos_write(struct file *file, loff_t pos)
> +{
> +	file->f_pos = pos;
> +}
> +
>  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
>  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
>  extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
> @@ -2127,7 +2137,8 @@ extern struct file *do_filp_open(int dfd, const char *pathname,
>  		int open_flag, int mode, int acc_mode);
>  extern int may_open(struct path *, int, int);
>  
> -extern int kernel_read(struct file *, loff_t, char *, unsigned long);
> +extern ssize_t kernel_read(struct file *, loff_t, char *, size_t);
> +extern ssize_t kernel_write(struct file *, loff_t, const char *, size_t);
>  extern struct file * open_exec(const char *);
>   
>  /* fs/dcache.c -- generic fs support functions */
> -- 

I'd say fix that little nit I had above and you can add

Reviewed-by: Josef Bacik <josef@redhat.com>

Thanks,

Josef

  reply	other threads:[~2010-05-06 12:26 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1272723382-19470-1-git-send-email-orenl@cs.columbia.edu>
2010-05-01 14:15 ` [PATCH v21 019/100] Make file_pos_read/write() public and export kernel_write() Oren Laadan
2010-05-06 12:26   ` Josef Bacik [this message]
2010-05-01 14:15 ` [PATCH v21 020/100] c/r: documentation Oren Laadan
2010-05-06 20:27   ` Randy Dunlap
2010-05-07  6:54     ` Oren Laadan
2010-05-01 14:15 ` [PATCH v21 022/100] c/r: basic infrastructure for checkpoint/restart Oren Laadan
2010-05-01 14:15 ` [PATCH v21 036/100] c/r: introduce vfs_fcntl() Oren Laadan
2010-05-01 14:15 ` [PATCH v21 037/100] c/r: introduce new 'file_operations': ->checkpoint, ->collect() Oren Laadan
2010-05-01 14:15 ` [PATCH v21 038/100] c/r: checkpoint and restart open file descriptors Oren Laadan
2010-05-01 14:15 ` [PATCH v21 039/100] c/r: introduce method '->checkpoint()' in struct vm_operations_struct Oren Laadan
2010-05-01 14:15 ` [PATCH v21 041/100] c/r: dump memory address space (private memory) Oren Laadan
2010-05-01 14:15 ` [PATCH v21 042/100] c/r: add generic '->checkpoint' f_op to ext fses Oren Laadan
2010-05-01 14:15 ` [PATCH v21 043/100] c/r: add generic '->checkpoint()' f_op to simple devices Oren Laadan
2010-05-01 14:15 ` [PATCH v21 044/100] c/r: add checkpoint operation for opened files of generic filesystems Oren Laadan
2010-05-01 14:15 ` [PATCH v21 046/100] c/r: dump anonymous- and file-mapped- shared memory Oren Laadan
2010-05-01 14:15 ` [PATCH v21 047/100] splice: export pipe/file-to-pipe/file functionality Oren Laadan
     [not found] ` <1272723382-19470-1-git-send-email-orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-05-01 14:15   ` [PATCH v21 048/100] c/r: support for open pipes Oren Laadan
2010-05-01 14:15 ` [PATCH v21 049/100] c/r: checkpoint and restore FIFOs Oren Laadan
2010-05-01 14:15 ` [PATCH v21 050/100] c/r: refuse to checkpoint if monitoring directories with dnotify Oren Laadan
2010-05-01 14:15 ` [PATCH v21 063/100] c/r: restore file->f_cred Oren Laadan
2010-05-01 14:16 ` [PATCH v21 079/100] c/r: checkpoint/restart epoll sets Oren Laadan
2010-05-01 14:16 ` [PATCH v21 080/100] c/r: checkpoint/restart eventfd Oren Laadan
2010-05-01 14:16 ` [PATCH v21 081/100] c/r: restore task fs_root and pwd (v3) Oren Laadan
2010-05-01 14:16 ` [PATCH v21 082/100] c/r: preliminary support mounts namespace Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100506122618.GA2327@localhost.localdomain \
    --to=josef@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=containers@lists.linux-foundation.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matthltc@us.ibm.com \
    --cc=orenl@cs.columbia.edu \
    --cc=serue@us.ibm.com \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).