* [PATCH v3] fuse: O_DIRECT support for files
@ 2012-02-06 7:12 Anand Avati
2012-02-06 14:36 ` Miklos Szeredi
0 siblings, 1 reply; 2+ messages in thread
From: Anand Avati @ 2012-02-06 7:12 UTC (permalink / raw)
To: miklos; +Cc: fuse-devel, linux-fsdevel, josef, chenk
Implement ->direct_IO() method in aops. The ->direct_IO() method combines
the existing fuse_direct_read/fuse_direct_write methods to implement
O_DIRECT functionality.
Reaching ->direct_IO() in the read path via generic_file_aio_read ensures
proper synchronization with page cache with its existing framework.
Reaching ->direct_IO() in the write path via fuse_file_aio_write is made
to come via generic_file_direct_write() which makes it play nice with
the page cache w.r.t other mmap pages etc.
On files marked 'direct_io' by the filesystem server, IO always follows
the fuse_direct_read/write path. There is no effect of fcntl(O_DIRECT)
and it always succeeds.
On files not marked with 'direct_io' by the filesystem server, the IO
path depends on O_DIRECT flag by the application. This can be passed
at the time of open() as well as via fcntl().
Note that asynchronous O_DIRECT iocb jobs are completed synchronously
always (this has been the case with FUSE even before this patch)
Signed-off-by: Anand Avati <avati@redhat.com>
---
Tested with concurrent read and write DDs with oflag=direct and iflag=direct set
in a few writes and a few reads
fs/fuse/dir.c | 3 -
fs/fuse/file.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 107 insertions(+), 20 deletions(-)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2066328..7e5dbd0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (fc->no_create)
return -ENOSYS;
- if (flags & O_DIRECT)
- return -EINVAL;
-
forget = fuse_alloc_forget();
if (!forget)
return -ENOMEM;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4a199fd..0f426b5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- /* VFS checks this, but only _after_ ->open() */
- if (file->f_flags & O_DIRECT)
- return -EINVAL;
-
err = generic_file_open(inode, file);
if (err)
return err;
@@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
size_t count = 0;
+ size_t ocount = 0;
ssize_t written = 0;
+ ssize_t written_buffered = 0;
struct inode *inode = mapping->host;
ssize_t err;
struct iov_iter i;
+ loff_t endbyte = 0;
WARN_ON(iocb->ki_pos != pos);
- err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+ ocount = 0;
+ err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
if (err)
return err;
+ count = ocount;
+
mutex_lock(&inode->i_mutex);
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,11 +964,36 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
file_update_time(file);
- iov_iter_init(&i, iov, nr_segs, count, 0);
- written = fuse_perform_write(file, mapping, &i, pos);
- if (written >= 0)
- iocb->ki_pos = pos + written;
+ if (file->f_flags & O_DIRECT) {
+ written = generic_file_direct_write(iocb, iov, &nr_segs,
+ pos, &iocb->ki_pos,
+ count, ocount);
+ if (written < 0 || written == count)
+ goto out;
+
+ pos += written;
+ count -= written;
+
+ iov_iter_init(&i, iov, nr_segs, count, 0);
+ written_buffered = fuse_perform_write(file, mapping, &i, pos);
+ if (written_buffered < 0) {
+ err = written_buffered;
+ goto out;
+ }
+ endbyte = pos + written_buffered - 1;
+ err = filemap_write_and_wait_range(file->f_mapping, pos,
+ endbyte);
+ if (err)
+ goto out;
+ written += written_buffered;
+ iocb->ki_pos = pos + written_buffered;
+ } else {
+ iov_iter_init(&i, iov, nr_segs, count, 0);
+ written = fuse_perform_write(file, mapping, &i, pos);
+ if (written >= 0)
+ iocb->ki_pos = pos + written;
+ }
out:
current->backing_dev_info = NULL;
mutex_unlock(&inode->i_mutex);
@@ -1101,30 +1128,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
return res;
}
-static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
{
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t res;
- if (is_bad_inode(inode))
- return -EIO;
-
- /* Don't allow parallel writes to the same file */
- mutex_lock(&inode->i_mutex);
res = generic_write_checks(file, ppos, &count, 0);
if (!res) {
res = fuse_direct_io(file, buf, count, ppos, 1);
if (res > 0)
fuse_write_update_size(inode, *ppos);
}
- mutex_unlock(&inode->i_mutex);
fuse_invalidate_attr(inode);
return res;
}
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ ssize_t res;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ /* Don't allow parallel writes to the same file */
+ mutex_lock(&inode->i_mutex);
+ res = __fuse_direct_write(file, buf, count, ppos);
+ mutex_unlock(&inode->i_mutex);
+
+ return res;
+}
+
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
__free_page(req->pages[0]);
@@ -2077,6 +2115,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
return 0;
}
+static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos, int rw)
+{
+ const struct iovec *vector = iov;
+ ssize_t ret = 0;
+
+ while (nr_segs > 0) {
+ void __user *base;
+ size_t len;
+ ssize_t nr;
+
+ base = vector->iov_base;
+ len = vector->iov_len;
+ vector++;
+ nr_segs--;
+
+ if (rw == WRITE)
+ nr = __fuse_direct_write(filp, base, len, ppos);
+ else
+ nr = fuse_direct_read(filp, base, len, ppos);
+
+ if (nr < 0) {
+ if (!ret)
+ ret = nr;
+ break;
+ }
+ ret += nr;
+ if (nr != len)
+ break;
+ }
+
+ return ret;
+}
+
+
+static ssize_t
+fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs)
+{
+ ssize_t ret = 0;
+ struct file *file = NULL;
+ loff_t pos = 0;
+
+ file = iocb->ki_filp;
+ pos = offset;
+
+ ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
+
+ return ret;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read = do_sync_read,
@@ -2120,6 +2209,7 @@ static const struct address_space_operations fuse_file_aops = {
.readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap,
+ .direct_IO = fuse_direct_IO,
};
void fuse_init_file_inode(struct inode *inode)
--
1.7.4.4
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v3] fuse: O_DIRECT support for files
2012-02-06 7:12 [PATCH v3] fuse: O_DIRECT support for files Anand Avati
@ 2012-02-06 14:36 ` Miklos Szeredi
0 siblings, 0 replies; 2+ messages in thread
From: Miklos Szeredi @ 2012-02-06 14:36 UTC (permalink / raw)
To: Anand Avati; +Cc: fuse-devel, linux-fsdevel, josef, chenk
Anand Avati <avati@redhat.com> writes:
> Implement ->direct_IO() method in aops. The ->direct_IO() method combines
> the existing fuse_direct_read/fuse_direct_write methods to implement
> O_DIRECT functionality.
>
> Reaching ->direct_IO() in the read path via generic_file_aio_read ensures
> proper synchronization with page cache with its existing framework.
>
> Reaching ->direct_IO() in the write path via fuse_file_aio_write is made
> to come via generic_file_direct_write() which makes it play nice with
> the page cache w.r.t other mmap pages etc.
>
> On files marked 'direct_io' by the filesystem server, IO always follows
> the fuse_direct_read/write path. There is no effect of fcntl(O_DIRECT)
> and it always succeeds.
>
> On files not marked with 'direct_io' by the filesystem server, the IO
> path depends on O_DIRECT flag by the application. This can be passed
> at the time of open() as well as via fcntl().
>
> Note that asynchronous O_DIRECT iocb jobs are completed synchronously
> always (this has been the case with FUSE even before this patch)
>
> Signed-off-by: Anand Avati <avati@redhat.com>
> ---
>
> Tested with concurrent read and write DDs with oflag=direct and iflag=direct set
> in a few writes and a few reads
>
> fs/fuse/dir.c | 3 -
> fs/fuse/file.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++--------
> 2 files changed, 107 insertions(+), 20 deletions(-)
>
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 2066328..7e5dbd0 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
> if (fc->no_create)
> return -ENOSYS;
>
> - if (flags & O_DIRECT)
> - return -EINVAL;
> -
> forget = fuse_alloc_forget();
> if (!forget)
> return -ENOMEM;
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 4a199fd..0f426b5 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
> struct fuse_conn *fc = get_fuse_conn(inode);
> int err;
>
> - /* VFS checks this, but only _after_ ->open() */
> - if (file->f_flags & O_DIRECT)
> - return -EINVAL;
> -
> err = generic_file_open(inode, file);
> if (err)
> return err;
> @@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
> struct file *file = iocb->ki_filp;
> struct address_space *mapping = file->f_mapping;
> size_t count = 0;
> + size_t ocount = 0;
> ssize_t written = 0;
> + ssize_t written_buffered = 0;
> struct inode *inode = mapping->host;
> ssize_t err;
> struct iov_iter i;
> + loff_t endbyte = 0;
>
> WARN_ON(iocb->ki_pos != pos);
>
> - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
> + ocount = 0;
> + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
> if (err)
> return err;
>
> + count = ocount;
> +
> mutex_lock(&inode->i_mutex);
> vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
>
> @@ -962,11 +964,36 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
>
> file_update_time(file);
>
> - iov_iter_init(&i, iov, nr_segs, count, 0);
> - written = fuse_perform_write(file, mapping, &i, pos);
> - if (written >= 0)
> - iocb->ki_pos = pos + written;
> + if (file->f_flags & O_DIRECT) {
> + written = generic_file_direct_write(iocb, iov, &nr_segs,
> + pos, &iocb->ki_pos,
> + count, ocount);
> + if (written < 0 || written == count)
> + goto out;
> +
> + pos += written;
> + count -= written;
> +
> + iov_iter_init(&i, iov, nr_segs, count, 0);
This codepath is still untested (hint: that last argument in
iov_iter_init() is wrong). Yeah, it's not easy to test, but you can do
something like passing a fraction of count to
generic_file_direct_write() so one half is written through the direct IO
and the other half through the buffered IO.
Thanks,
Miklos
> + written_buffered = fuse_perform_write(file, mapping, &i, pos);
> + if (written_buffered < 0) {
> + err = written_buffered;
> + goto out;
> + }
> + endbyte = pos + written_buffered - 1;
>
> + err = filemap_write_and_wait_range(file->f_mapping, pos,
> + endbyte);
> + if (err)
> + goto out;
> + written += written_buffered;
> + iocb->ki_pos = pos + written_buffered;
> + } else {
> + iov_iter_init(&i, iov, nr_segs, count, 0);
> + written = fuse_perform_write(file, mapping, &i, pos);
> + if (written >= 0)
> + iocb->ki_pos = pos + written;
> + }
> out:
> current->backing_dev_info = NULL;
> mutex_unlock(&inode->i_mutex);
> @@ -1101,30 +1128,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
> return res;
> }
>
> -static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
> - size_t count, loff_t *ppos)
> +static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> {
> struct inode *inode = file->f_path.dentry->d_inode;
> ssize_t res;
>
> - if (is_bad_inode(inode))
> - return -EIO;
> -
> - /* Don't allow parallel writes to the same file */
> - mutex_lock(&inode->i_mutex);
> res = generic_write_checks(file, ppos, &count, 0);
> if (!res) {
> res = fuse_direct_io(file, buf, count, ppos, 1);
> if (res > 0)
> fuse_write_update_size(inode, *ppos);
> }
> - mutex_unlock(&inode->i_mutex);
>
> fuse_invalidate_attr(inode);
>
> return res;
> }
>
> +static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct inode *inode = file->f_path.dentry->d_inode;
> + ssize_t res;
> +
> + if (is_bad_inode(inode))
> + return -EIO;
> +
> + /* Don't allow parallel writes to the same file */
> + mutex_lock(&inode->i_mutex);
> + res = __fuse_direct_write(file, buf, count, ppos);
> + mutex_unlock(&inode->i_mutex);
> +
> + return res;
> +}
> +
> static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
> {
> __free_page(req->pages[0]);
> @@ -2077,6 +2115,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
> return 0;
> }
>
> +static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
> + unsigned long nr_segs, loff_t *ppos, int rw)
> +{
> + const struct iovec *vector = iov;
> + ssize_t ret = 0;
> +
> + while (nr_segs > 0) {
> + void __user *base;
> + size_t len;
> + ssize_t nr;
> +
> + base = vector->iov_base;
> + len = vector->iov_len;
> + vector++;
> + nr_segs--;
> +
> + if (rw == WRITE)
> + nr = __fuse_direct_write(filp, base, len, ppos);
> + else
> + nr = fuse_direct_read(filp, base, len, ppos);
> +
> + if (nr < 0) {
> + if (!ret)
> + ret = nr;
> + break;
> + }
> + ret += nr;
> + if (nr != len)
> + break;
> + }
> +
> + return ret;
> +}
> +
> +
> +static ssize_t
> +fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
> + loff_t offset, unsigned long nr_segs)
> +{
> + ssize_t ret = 0;
> + struct file *file = NULL;
> + loff_t pos = 0;
> +
> + file = iocb->ki_filp;
> + pos = offset;
> +
> + ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
> +
> + return ret;
> +}
> +
> static const struct file_operations fuse_file_operations = {
> .llseek = fuse_file_llseek,
> .read = do_sync_read,
> @@ -2120,6 +2209,7 @@ static const struct address_space_operations fuse_file_aops = {
> .readpages = fuse_readpages,
> .set_page_dirty = __set_page_dirty_nobuffers,
> .bmap = fuse_bmap,
> + .direct_IO = fuse_direct_IO,
> };
>
> void fuse_init_file_inode(struct inode *inode)
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2012-02-06 14:36 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-06 7:12 [PATCH v3] fuse: O_DIRECT support for files Anand Avati
2012-02-06 14:36 ` Miklos Szeredi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).