* [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2
[not found] <cover.1415220890.git.milosz@adfin.com>
@ 2014-11-05 21:14 ` Milosz Tanski
2014-11-06 23:25 ` Jeff Moyer
2014-11-05 21:14 ` [PATCH v5 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
2014-11-05 21:14 ` [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
2 siblings, 1 reply; 13+ messages in thread
From: Milosz Tanski @ 2014-11-05 21:14 UTC (permalink / raw)
To: linux-kernel
Cc: Christoph Hellwig, linux-fsdevel, linux-aio, Mel Gorman,
Volker Lendecke, Tejun Heo, Jeff Moyer, Theodore Ts'o,
Al Viro, linux-api, Michael Kerrisk, linux-arch, linux-mm
New syscalls that take an flag argument. This change does not add any specific
flags.
Signed-off-by: Milosz Tanski <milosz@adfin.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
fs/read_write.c | 176 ++++++++++++++++++++++++++++++--------
include/linux/compat.h | 6 ++
include/linux/syscalls.h | 6 ++
include/uapi/asm-generic/unistd.h | 6 +-
mm/filemap.c | 5 +-
5 files changed, 158 insertions(+), 41 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 94b2d34..907735c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -866,6 +866,8 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
+ if (flags & ~0)
+ return -EINVAL;
return do_readv_writev(READ, file, vec, vlen, pos, flags);
}
@@ -879,21 +881,23 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
+ if (flags & ~0)
+ return -EINVAL;
return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
}
EXPORT_SYMBOL(vfs_writev);
-SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen)
+static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
- ret = vfs_readv(f.file, vec, vlen, &pos, 0);
+ ret = vfs_readv(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
@@ -905,15 +909,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
-SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen)
+static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
- ret = vfs_writev(f.file, vec, vlen, &pos, 0);
+ ret = vfs_writev(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
@@ -931,10 +935,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
}
-SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, loff_t pos, int flags)
{
- loff_t pos = pos_from_hilo(pos_h, pos_l);
struct fd f;
ssize_t ret = -EBADF;
@@ -945,7 +948,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
if (f.file) {
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PREAD)
- ret = vfs_readv(f.file, vec, vlen, &pos, 0);
+ ret = vfs_readv(f.file, vec, vlen, &pos, flags);
fdput(f);
}
@@ -955,10 +958,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
-SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, loff_t pos, int flags)
{
- loff_t pos = pos_from_hilo(pos_h, pos_l);
struct fd f;
ssize_t ret = -EBADF;
@@ -969,7 +971,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
if (f.file) {
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PWRITE)
- ret = vfs_writev(f.file, vec, vlen, &pos, 0);
+ ret = vfs_writev(f.file, vec, vlen, &pos, flags);
fdput(f);
}
@@ -979,11 +981,63 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
+SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen)
+{
+ return do_readv(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen)
+{
+ return do_writev(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ return do_preadv(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+ int, flags)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ if (pos == -1)
+ return do_readv(fd, vec, vlen, flags);
+
+ return do_preadv(fd, vec, vlen, pos, flags);
+}
+
+SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ return do_pwritev(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+ int, flags)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ if (pos == -1)
+ return do_writev(fd, vec, vlen, flags);
+
+ return do_pwritev(fd, vec, vlen, pos, flags);
+}
+
#ifdef CONFIG_COMPAT
static ssize_t compat_do_readv_writev(int type, struct file *file,
const struct compat_iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos)
+ unsigned long nr_segs, loff_t *pos, int flags)
{
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
@@ -1017,7 +1071,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
if (iter_fn)
ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
- pos, iter_fn, 0);
+ pos, iter_fn, flags);
else if (fnv)
ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
pos, fnv);
@@ -1041,7 +1095,7 @@ out:
static size_t compat_readv(struct file *file,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos)
+ unsigned long vlen, loff_t *pos, int flags)
{
ssize_t ret = -EBADF;
@@ -1052,7 +1106,7 @@ static size_t compat_readv(struct file *file,
if (!(file->f_mode & FMODE_CAN_READ))
goto out;
- ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+ ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
out:
if (ret > 0)
@@ -1061,9 +1115,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
- compat_ulong_t, vlen)
+static size_t __compat_sys_readv(compat_ulong_t fd,
+ const struct compat_iovec __user *vec,
+ compat_ulong_t vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
@@ -1072,28 +1126,34 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
if (!f.file)
return -EBADF;
pos = f.file->f_pos;
- ret = compat_readv(f.file, vec, vlen, &pos);
+ ret = compat_readv(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
f.file->f_pos = pos;
fdput_pos(f);
return ret;
+
+}
+
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
+ const struct compat_iovec __user *,vec,
+ compat_ulong_t, vlen)
+{
+ return __compat_sys_readv(fd, vec, vlen, 0);
}
static long __compat_sys_preadv64(unsigned long fd,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos)
+ unsigned long vlen, loff_t pos, int flags)
{
struct fd f;
ssize_t ret;
- if (pos < 0)
- return -EINVAL;
f = fdget(fd);
if (!f.file)
return -EBADF;
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PREAD)
- ret = compat_readv(f.file, vec, vlen, &pos);
+ ret = compat_readv(f.file, vec, vlen, &pos, flags);
fdput(f);
return ret;
}
@@ -1103,7 +1163,10 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos)
{
- return __compat_sys_preadv64(fd, vec, vlen, pos);
+ if (pos < 0)
+ return -EINVAL;
+
+ return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
}
#endif
@@ -1113,12 +1176,28 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return __compat_sys_preadv64(fd, vec, vlen, pos);
+ if (pos < 0)
+ return -EINVAL;
+
+ return __compat_sys_preadv64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
+ const struct compat_iovec __user *,vec,
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
+ int, flags)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+ if (pos == -1)
+ return __compat_sys_readv(fd, vec, vlen, flags);
+
+ return __compat_sys_preadv64(fd, vec, vlen, pos, flags);
}
static size_t compat_writev(struct file *file,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos)
+ unsigned long vlen, loff_t *pos, int flags)
{
ssize_t ret = -EBADF;
@@ -1129,7 +1208,7 @@ static size_t compat_writev(struct file *file,
if (!(file->f_mode & FMODE_CAN_WRITE))
goto out;
- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, flags);
out:
if (ret > 0)
@@ -1138,9 +1217,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
- const struct compat_iovec __user *, vec,
- compat_ulong_t, vlen)
+static size_t __compat_sys_writev(compat_ulong_t fd,
+ const struct compat_iovec __user* vec,
+ compat_ulong_t vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
@@ -1149,28 +1228,36 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
if (!f.file)
return -EBADF;
pos = f.file->f_pos;
- ret = compat_writev(f.file, vec, vlen, &pos);
+ ret = compat_writev(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
f.file->f_pos = pos;
fdput_pos(f);
return ret;
}
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
+ const struct compat_iovec __user *, vec,
+ compat_ulong_t, vlen)
+{
+ return __compat_sys_writev(fd, vec, vlen, 0);
+}
+
static long __compat_sys_pwritev64(unsigned long fd,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos)
+ unsigned long vlen, loff_t pos, int flags)
{
struct fd f;
ssize_t ret;
if (pos < 0)
return -EINVAL;
+
f = fdget(fd);
if (!f.file)
return -EBADF;
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PWRITE)
- ret = compat_writev(f.file, vec, vlen, &pos);
+ ret = compat_writev(f.file, vec, vlen, &pos, flags);
fdput(f);
return ret;
}
@@ -1180,7 +1267,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos)
{
- return __compat_sys_pwritev64(fd, vec, vlen, pos);
+ return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
}
#endif
@@ -1190,8 +1277,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return __compat_sys_pwritev64(fd, vec, vlen, pos);
+ return __compat_sys_pwritev64(fd, vec, vlen, pos, 0);
}
+
+COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
+ const struct compat_iovec __user *,vec,
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+ if (pos == -1)
+ return __compat_sys_writev(fd, vec, vlen, flags);
+
+ return __compat_sys_pwritev64(fd, vec, vlen, pos, flags);
+}
+
#endif
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e649426..63a94e2 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -340,6 +340,12 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd,
const struct compat_iovec __user *vec,
compat_ulong_t vlen, u32 pos_low, u32 pos_high);
+asmlinkage ssize_t compat_sys_preadv2(compat_ulong_t fd,
+ const struct compat_iovec __user *vec,
+ compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
+asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
+ const struct compat_iovec __user *vec,
+ compat_ulong_t vlen, u32 pos_low, u32 pos_high, int flags);
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
asmlinkage long compat_sys_preadv64(unsigned long fd,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bda9b81..cedc22e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -571,8 +571,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf,
size_t count, loff_t pos);
asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
+ int flags);
asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
+asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
+ int flags);
asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
asmlinkage long sys_chdir(const char __user *filename);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 22749c1..9406018 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -213,6 +213,10 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
#define __NR_pwritev 70
__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
+#define __NR_preadv2 281
+__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
+#define __NR_pwritev2 282
+__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
/* fs/sendfile.c */
#define __NR3264_sendfile 71
@@ -709,7 +713,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
__SYSCALL(__NR_bpf, sys_bpf)
#undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 283
/*
* All syscalls below here should go away really,
diff --git a/mm/filemap.c b/mm/filemap.c
index 14b4642..530c263 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1457,6 +1457,7 @@ static void shrink_readahead_size_eio(struct file *filp,
* @ppos: current file position
* @iter: data destination
* @written: already copied
+ * @flags: optional flags
*
* This is a generic file read routine, and uses the
* mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1465,7 +1466,7 @@ static void shrink_readahead_size_eio(struct file *filp,
* of the logic when it comes to error handling etc.
*/
static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
- struct iov_iter *iter, ssize_t written)
+ struct iov_iter *iter, ssize_t written, int flags)
{
struct address_space *mapping = filp->f_mapping;
struct inode *inode = mapping->host;
@@ -1735,7 +1736,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
}
- retval = do_generic_file_read(file, ppos, iter, retval);
+ retval = do_generic_file_read(file, ppos, iter, retval, iocb->ki_rwflags);
out:
return retval;
}
--
1.9.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 4/7] vfs: RWF_NONBLOCK flag for preadv2
[not found] <cover.1415220890.git.milosz@adfin.com>
2014-11-05 21:14 ` [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
@ 2014-11-05 21:14 ` Milosz Tanski
2014-11-10 16:07 ` Sage Weil
2014-11-05 21:14 ` [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
2 siblings, 1 reply; 13+ messages in thread
From: Milosz Tanski @ 2014-11-05 21:14 UTC (permalink / raw)
To: linux-kernel
Cc: Christoph Hellwig, linux-fsdevel, linux-aio, Mel Gorman,
Volker Lendecke, Tejun Heo, Jeff Moyer, Theodore Ts'o,
Al Viro, linux-api, Michael Kerrisk, linux-arch, ceph-devel,
linux-cifs, samba-technical, linux-nfs, linux-xfs, ocfs2-devel,
linux-mm
generic_file_read_iter() supports a new flag RWF_NONBLOCK which says that we
only want to read the data if it's already in the page cache.
Additionally, there are a few filesystems that we have to specifically
bail early if RWF_NONBLOCK because the op would block. Christoph Hellwig
contributed this code.
Signed-off-by: Milosz Tanski <milosz@adfin.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
---
fs/ceph/file.c | 2 ++
fs/cifs/file.c | 6 ++++++
fs/nfs/file.c | 5 ++++-
fs/ocfs2/file.c | 6 ++++++
fs/pipe.c | 3 ++-
fs/read_write.c | 38 +++++++++++++++++++++++++-------------
fs/xfs/xfs_file.c | 4 ++++
include/linux/fs.h | 3 +++
mm/filemap.c | 18 ++++++++++++++++++
mm/shmem.c | 4 ++++
10 files changed, 74 insertions(+), 15 deletions(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d7e0da8..b798b5c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -822,6 +822,8 @@ again:
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
(fi->flags & CEPH_F_SYNC)) {
+ if (iocb->ki_rwflags & O_NONBLOCK)
+ return -EAGAIN;
dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3e4d00a..c485afa 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3005,6 +3005,9 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
struct cifs_readdata *rdata, *tmp;
struct list_head rdata_list;
+ if (iocb->ki_rwflags & RWF_NONBLOCK)
+ return -EAGAIN;
+
len = iov_iter_count(to);
if (!len)
return 0;
@@ -3123,6 +3126,9 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
return generic_file_read_iter(iocb, to);
+ if (iocb->ki_rwflags & RWF_NONBLOCK)
+ return -EAGAIN;
+
/*
* We need to hold the sem to be sure nobody modifies lock list
* with a brlock that prevents reading.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2ab6f00..aa9046f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -171,8 +171,11 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
- if (iocb->ki_filp->f_flags & O_DIRECT)
+ if (iocb->ki_filp->f_flags & O_DIRECT) {
+ if (iocb->ki_rwflags & O_NONBLOCK)
+ return -EAGAIN;
return nfs_file_direct_read(iocb, to, iocb->ki_pos);
+ }
dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 324dc93..bb66ca4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2472,6 +2472,12 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
filp->f_path.dentry->d_name.name,
to->nr_segs); /* GRRRRR */
+ /*
+ * No non-blocking reads for ocfs2 for now. Might be doable with
+ * non-blocking cluster lock helpers.
+ */
+ if (iocb->ki_rwflags & RWF_NONBLOCK)
+ return -EAGAIN;
if (!inode) {
ret = -EINVAL;
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e5..212bf68 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -302,7 +302,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
*/
if (ret)
break;
- if (filp->f_flags & O_NONBLOCK) {
+ if ((filp->f_flags & O_NONBLOCK) ||
+ (iocb->ki_rwflags & RWF_NONBLOCK)) {
ret = -EAGAIN;
break;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 907735c..cba7d4c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -835,14 +835,19 @@ static ssize_t do_readv_writev(int type, struct file *file,
file_start_write(file);
}
- if (iter_fn)
+ if (iter_fn) {
ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
pos, iter_fn, flags);
- else if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
- else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ } else {
+ if (type == READ && (flags & RWF_NONBLOCK))
+ return -EAGAIN;
+
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ }
if (type != READ)
file_end_write(file);
@@ -866,8 +871,10 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
- if (flags & ~0)
+ if (flags & ~RWF_NONBLOCK)
return -EINVAL;
+ if ((file->f_flags & O_DIRECT) && (flags & RWF_NONBLOCK))
+ return -EAGAIN;
return do_readv_writev(READ, file, vec, vlen, pos, flags);
}
@@ -1069,14 +1076,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
file_start_write(file);
}
- if (iter_fn)
+ if (iter_fn) {
ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
pos, iter_fn, flags);
- else if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
- else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ } else {
+ if (type == READ && (flags & RWF_NONBLOCK))
+ return -EAGAIN;
+
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ }
if (type != READ)
file_end_write(file);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index eb596b4..b1f6334 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -246,6 +246,10 @@ xfs_file_read_iter(
XFS_STATS_INC(xs_read_calls);
+ /* XXX: need a non-blocking iolock helper, shouldn't be too hard */
+ if (iocb->ki_rwflags & RWF_NONBLOCK)
+ return -EAGAIN;
+
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= XFS_IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ed5711..eaebd99 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1459,6 +1459,9 @@ struct block_device_operations;
#define HAVE_COMPAT_IOCTL 1
#define HAVE_UNLOCKED_IOCTL 1
+/* These flags are used for the readv/writev syscalls with flags. */
+#define RWF_NONBLOCK 0x00000001
+
struct iov_iter;
struct file_operations {
diff --git a/mm/filemap.c b/mm/filemap.c
index 530c263..09d3af3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1494,6 +1494,8 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
find_page:
page = find_get_page(mapping, index);
if (!page) {
+ if (flags & RWF_NONBLOCK)
+ goto would_block;
page_cache_sync_readahead(mapping,
ra, filp,
index, last_index - index);
@@ -1585,6 +1587,11 @@ page_ok:
continue;
page_not_up_to_date:
+ if (flags & RWF_NONBLOCK) {
+ page_cache_release(page);
+ goto would_block;
+ }
+
/* Get exclusive access to the page ... */
error = lock_page_killable(page);
if (unlikely(error))
@@ -1604,6 +1611,12 @@ page_not_up_to_date_locked:
goto page_ok;
}
+ if (flags & RWF_NONBLOCK) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto would_block;
+ }
+
readpage:
/*
* A previous I/O error may have been due to temporary
@@ -1674,6 +1687,8 @@ no_cached_page:
goto readpage;
}
+would_block:
+ error = -EAGAIN;
out:
ra->prev_pos = prev_index;
ra->prev_pos <<= PAGE_CACHE_SHIFT;
@@ -1707,6 +1722,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
size_t count = iov_iter_count(iter);
loff_t size;
+ if (iocb->ki_rwflags & RWF_NONBLOCK)
+ return -EAGAIN;
+
if (!count)
goto out; /* skip atime */
size = i_size_read(inode);
diff --git a/mm/shmem.c b/mm/shmem.c
index cd6fc75..5c30f04 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1531,6 +1531,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
ssize_t retval = 0;
loff_t *ppos = &iocb->ki_pos;
+ /* XXX: should be easily supportable */
+ if (iocb->ki_rwflags & RWF_NONBLOCK)
+ return -EAGAIN;
+
/*
* Might this read be for a stacking filesystem? Then when reading
* holes of a sparse file, we actually need to allocate those pages,
--
1.9.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 13+ messages in thread
* [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics
[not found] <cover.1415220890.git.milosz@adfin.com>
2014-11-05 21:14 ` [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
2014-11-05 21:14 ` [PATCH v5 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
@ 2014-11-05 21:14 ` Milosz Tanski
2014-11-06 23:46 ` Jeff Moyer
2014-11-10 16:07 ` [PATCH v5 7/7] fs: " Sage Weil
2 siblings, 2 replies; 13+ messages in thread
From: Milosz Tanski @ 2014-11-05 21:14 UTC (permalink / raw)
To: linux-kernel
Cc: Christoph Hellwig, Christoph Hellwig, linux-fsdevel, linux-aio,
Mel Gorman, Volker Lendecke, Tejun Heo, Jeff Moyer,
Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
linux-arch, ceph-devel, fuse-devel, linux-nfs, ocfs2-devel,
linux-mm
From: Christoph Hellwig <hch@lst.de>
With the new read/write with flags syscalls we can support a flag
to enable O_DSYNC semantics on a per-operation basis. This N?s
useful to implement protocols like SMB, NFS or SCSI that have such
per-operation flags.
Example program below:
cat > pwritev2.c << EOF
(off_t) val, \
(off_t) ((((uint64_t) (val)) >> (sizeof (long) * 4)) >> (sizeof (long) * 4))
static ssize_t
pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)
{
return syscall(__NR_pwritev2, fd, iov, iovcnt, LO_HI_LONG(offset),
flags);
}
int main(int argc, char **argv)
{
int fd = open(argv[1], O_WRONLY|O_CREAT|O_TRUNC, 0666);
char buf[1024];
struct iovec iov = { .iov_base = buf, .iov_len = 1024 };
int ret;
if (fd < 0) {
perror("open");
return 0;
}
memset(buf, 0xfe, sizeof(buf));
ret = pwritev2(fd, &iov, 1, 0, RWF_DSYNC);
if (ret < 0)
perror("pwritev2");
else
printf("ret = %d\n", ret);
return 0;
}
EOF
Signed-off-by: Christoph Hellwig <hch@lst.de>
[milosz@adfin.com: added flag check to compat_do_readv_writev()]
Signed-off-by: Milosz Tanski <milosz@adfin.com>
---
fs/ceph/file.c | 4 +++-
fs/fuse/file.c | 2 ++
fs/nfs/file.c | 10 ++++++----
fs/ocfs2/file.c | 6 ++++--
fs/read_write.c | 20 +++++++++++++++-----
include/linux/fs.h | 3 ++-
mm/filemap.c | 4 +++-
7 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b798b5c..2d4e15a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -983,7 +983,9 @@ retry_snap:
ceph_put_cap_refs(ci, got);
if (written >= 0 &&
- ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
+ ((file->f_flags & O_SYNC) ||
+ IS_SYNC(file->f_mapping->host) ||
+ (iocb->ki_rwflags & RWF_DSYNC) ||
ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
err = vfs_fsync_range(file, pos, pos + written - 1, 1);
if (err < 0)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index caa8d95..bb4fb23 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1248,6 +1248,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
} else {
+ if (iocb->ki_rwflags & RWF_DSYNC)
+ return -EINVAL;
written = fuse_perform_write(file, mapping, from, pos);
if (written >= 0)
iocb->ki_pos = pos + written;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index aa9046f..c59b0b7 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -652,13 +652,15 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
.remap_pages = generic_file_remap_pages,
};
-static int nfs_need_sync_write(struct file *filp, struct inode *inode)
+static int nfs_need_sync_write(struct kiocb *iocb, struct inode *inode)
{
struct nfs_open_context *ctx;
- if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
+ if (IS_SYNC(inode) ||
+ (iocb->ki_filp->f_flags & O_DSYNC) ||
+ (iocb->ki_rwflags & RWF_DSYNC))
return 1;
- ctx = nfs_file_open_context(filp);
+ ctx = nfs_file_open_context(iocb->ki_filp);
if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
nfs_ctx_key_to_expire(ctx))
return 1;
@@ -705,7 +707,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
written = result;
/* Return error values for O_DSYNC and IS_SYNC() */
- if (result >= 0 && nfs_need_sync_write(file, inode)) {
+ if (result >= 0 && nfs_need_sync_write(iocb, inode)) {
int err = vfs_fsync(file, 0);
if (err < 0)
result = err;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index bb66ca4..8f9a86b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2374,8 +2374,10 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
- if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
- ((file->f_flags & O_DIRECT) && !direct_io)) {
+ if (((file->f_flags & O_DSYNC) && !direct_io) ||
+ IS_SYNC(inode) ||
+ ((file->f_flags & O_DIRECT) && !direct_io) ||
+ (iocb->ki_rwflags & RWF_DSYNC)) {
ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
*ppos + count - 1);
if (ret < 0)
diff --git a/fs/read_write.c b/fs/read_write.c
index cba7d4c..3443265 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -839,8 +839,13 @@ static ssize_t do_readv_writev(int type, struct file *file,
ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
pos, iter_fn, flags);
} else {
- if (type == READ && (flags & RWF_NONBLOCK))
- return -EAGAIN;
+ if (type == READ) {
+ if (flags & RWF_NONBLOCK)
+ return -EAGAIN;
+ } else {
+ if (flags & RWF_DSYNC)
+ return -EINVAL;
+ }
if (fnv)
ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
@@ -888,7 +893,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
- if (flags & ~0)
+ if (flags & ~RWF_DSYNC)
return -EINVAL;
return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
@@ -1080,8 +1085,13 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
pos, iter_fn, flags);
} else {
- if (type == READ && (flags & RWF_NONBLOCK))
- return -EAGAIN;
+ if (type == READ) {
+ if (flags & RWF_NONBLOCK)
+ return -EAGAIN;
+ } else {
+ if (flags & RWF_DSYNC)
+ return -EINVAL;
+ }
if (fnv)
ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7d0e116..7786b88 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1460,7 +1460,8 @@ struct block_device_operations;
#define HAVE_UNLOCKED_IOCTL 1
/* These flags are used for the readv/writev syscalls with flags. */
-#define RWF_NONBLOCK 0x00000001
+#define RWF_NONBLOCK 0x00000001
+#define RWF_DSYNC 0x00000002
struct iov_iter;
diff --git a/mm/filemap.c b/mm/filemap.c
index 6107058..4fbef99 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2669,7 +2669,9 @@ int generic_write_sync(struct kiocb *iocb, loff_t count)
struct file *file = iocb->ki_filp;
if (count > 0 &&
- ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))) {
+ ((file->f_flags & O_DSYNC) ||
+ (iocb->ki_rwflags & RWF_DSYNC) ||
+ IS_SYNC(file->f_mapping->host))) {
bool fdatasync = !(file->f_flags & __O_SYNC);
ssize_t ret = 0;
--
1.9.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 13+ messages in thread
* Re: [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2
2014-11-05 21:14 ` [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
@ 2014-11-06 23:25 ` Jeff Moyer
2014-11-07 16:28 ` Milosz Tanski
0 siblings, 1 reply; 13+ messages in thread
From: Jeff Moyer @ 2014-11-06 23:25 UTC (permalink / raw)
To: Milosz Tanski
Cc: linux-kernel, Christoph Hellwig, linux-fsdevel, linux-aio,
Mel Gorman, Volker Lendecke, Tejun Heo, Theodore Ts'o,
Al Viro, linux-api, Michael Kerrisk, linux-arch, linux-mm
Milosz Tanski <milosz@adfin.com> writes:
> New syscalls that take an flag argument. This change does not add any specific
> flags.
>
> Signed-off-by: Milosz Tanski <milosz@adfin.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/read_write.c | 176 ++++++++++++++++++++++++++++++--------
> include/linux/compat.h | 6 ++
> include/linux/syscalls.h | 6 ++
> include/uapi/asm-generic/unistd.h | 6 +-
> mm/filemap.c | 5 +-
> 5 files changed, 158 insertions(+), 41 deletions(-)
>
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 94b2d34..907735c 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -866,6 +866,8 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
> return -EBADF;
> if (!(file->f_mode & FMODE_CAN_READ))
> return -EINVAL;
> + if (flags & ~0)
> + return -EINVAL;
>
> return do_readv_writev(READ, file, vec, vlen, pos, flags);
> }
> @@ -879,21 +881,23 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
> return -EBADF;
> if (!(file->f_mode & FMODE_CAN_WRITE))
> return -EINVAL;
> + if (flags & ~0)
> + return -EINVAL;
>
> return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
> }
Hi, Milosz,
You've checked for invalid flags for the normal system calls, but not
for the compat variants. Can you add that in, please?
Thanks!
Jeff
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics
2014-11-05 21:14 ` [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
@ 2014-11-06 23:46 ` Jeff Moyer
2014-11-07 4:22 ` [PATCH v5 7/7] " Anton Altaparmakov
2014-11-10 16:07 ` [PATCH v5 7/7] fs: " Sage Weil
1 sibling, 1 reply; 13+ messages in thread
From: Jeff Moyer @ 2014-11-06 23:46 UTC (permalink / raw)
To: Milosz Tanski
Cc: linux-kernel, Christoph Hellwig, Christoph Hellwig, linux-fsdevel,
linux-aio, Mel Gorman, Volker Lendecke, Tejun Heo,
Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
linux-arch, ceph-devel, fuse-devel, linux-nfs, ocfs2-devel,
linux-mm
Milosz Tanski <milosz@adfin.com> writes:
> - if (type == READ && (flags & RWF_NONBLOCK))
> - return -EAGAIN;
> + if (type == READ) {
> + if (flags & RWF_NONBLOCK)
> + return -EAGAIN;
> + } else {
> + if (flags & RWF_DSYNC)
> + return -EINVAL;
> + }
Minor nit, but I'd rather read something that looks like this:
if (type == READ && (flags & RWF_NONBLOCK))
return -EAGAIN;
else if (type == WRITE && (flags & RWF_DSYNC))
return -EINVAL;
I won't lose sleep over it, though.
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 7/7] add a flag for per-operation O_DSYNC semantics
2014-11-06 23:46 ` Jeff Moyer
@ 2014-11-07 4:22 ` Anton Altaparmakov
2014-11-07 5:52 ` [fuse-devel] " Anand Avati
0 siblings, 1 reply; 13+ messages in thread
From: Anton Altaparmakov @ 2014-11-07 4:22 UTC (permalink / raw)
To: Jeff Moyer
Cc: Milosz Tanski, linux-kernel, Christoph Hellwig, Christoph Hellwig,
linux-fsdevel, linux-aio, Mel Gorman, Volker Lendecke, Tejun Heo,
Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
linux-arch, ceph-devel, fuse-devel, linux-nfs, ocfs2-devel,
linux-mm
Hi Jeff,
> On 7 Nov 2014, at 01:46, Jeff Moyer <jmoyer@redhat.com> wrote:
>
> Milosz Tanski <milosz@adfin.com> writes:
>
>> - if (type == READ && (flags & RWF_NONBLOCK))
>> - return -EAGAIN;
>> + if (type == READ) {
>> + if (flags & RWF_NONBLOCK)
>> + return -EAGAIN;
>> + } else {
>> + if (flags & RWF_DSYNC)
>> + return -EINVAL;
>> + }
>
> Minor nit, but I'd rather read something that looks like this:
>
> if (type == READ && (flags & RWF_NONBLOCK))
> return -EAGAIN;
> else if (type == WRITE && (flags & RWF_DSYNC))
> return -EINVAL;
But your version is less logically efficient for the case where "type == READ" is true and "flags & RWF_NONBLOCK" is false because your version then has to do the "if (type == WRITE" check before discovering it does not need to take that branch either, whilst the original version does not have to do such a test at all.
Best regards,
Anton
> I won't lose sleep over it, though.
>
> Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
--
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
University of Cambridge Information Services, Roger Needham Building
7 JJ Thomson Avenue, Cambridge, CB3 0RB, UK
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [fuse-devel] [PATCH v5 7/7] add a flag for per-operation O_DSYNC semantics
2014-11-07 4:22 ` [PATCH v5 7/7] " Anton Altaparmakov
@ 2014-11-07 5:52 ` Anand Avati
2014-11-07 6:43 ` Anton Altaparmakov
0 siblings, 1 reply; 13+ messages in thread
From: Anand Avati @ 2014-11-07 5:52 UTC (permalink / raw)
To: Anton Altaparmakov
Cc: Jeff Moyer, linux-arch, linux-aio, linux-nfs, Volker Lendecke,
Theodore Ts'o, linux-mm, fuse-devel@lists.sourceforge.net,
linux-api, Linux Kernel Mailing List, Al Viro, Christoph Hellwig,
Tejun Heo, Milosz Tanski, linux-fsdevel, Michael Kerrisk,
ceph-devel, Christoph Hellwig, ocfs2-devel, Mel Gorman
[-- Attachment #1: Type: text/plain, Size: 928 bytes --]
On Thu, Nov 6, 2014 at 8:22 PM, Anton Altaparmakov <aia21@cam.ac.uk> wrote:
> > On 7 Nov 2014, at 01:46, Jeff Moyer <jmoyer@redhat.com> wrote:
> > Minor nit, but I'd rather read something that looks like this:
> >
> > if (type == READ && (flags & RWF_NONBLOCK))
> > return -EAGAIN;
> > else if (type == WRITE && (flags & RWF_DSYNC))
> > return -EINVAL;
>
> But your version is less logically efficient for the case where "type ==
> READ" is true and "flags & RWF_NONBLOCK" is false because your version then
> has to do the "if (type == WRITE" check before discovering it does not need
> to take that branch either, whilst the original version does not have to do
> such a test at all.
>
Seriously? Just focus on the code readability/maintainability which makes
the code most easily understood/obvious to a new pair of eyes, and leave
such micro-optimizations to the compiler..
Thanks
[-- Attachment #2: Type: text/html, Size: 1425 bytes --]
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [fuse-devel] [PATCH v5 7/7] add a flag for per-operation O_DSYNC semantics
2014-11-07 5:52 ` [fuse-devel] " Anand Avati
@ 2014-11-07 6:43 ` Anton Altaparmakov
2014-11-07 14:21 ` Roger Willcocks
0 siblings, 1 reply; 13+ messages in thread
From: Anton Altaparmakov @ 2014-11-07 6:43 UTC (permalink / raw)
To: Anand Avati
Cc: Jeff Moyer, linux-arch, linux-aio, linux-nfs, Volker Lendecke,
Theodore Ts'o, linux-mm, fuse-devel@lists.sourceforge.net,
linux-api, Linux Kernel Mailing List, Al Viro, Christoph Hellwig,
Tejun Heo, Milosz Tanski, linux-fsdevel, Michael Kerrisk,
ceph-devel, Christoph Hellwig, ocfs2-devel, Mel Gorman
Hi,
> On 7 Nov 2014, at 07:52, Anand Avati <avati@gluster.org> wrote:
> On Thu, Nov 6, 2014 at 8:22 PM, Anton Altaparmakov <aia21@cam.ac.uk> wrote:
> > On 7 Nov 2014, at 01:46, Jeff Moyer <jmoyer@redhat.com> wrote:
> > Minor nit, but I'd rather read something that looks like this:
> >
> > if (type == READ && (flags & RWF_NONBLOCK))
> > return -EAGAIN;
> > else if (type == WRITE && (flags & RWF_DSYNC))
> > return -EINVAL;
>
> But your version is less logically efficient for the case where "type == READ" is true and "flags & RWF_NONBLOCK" is false because your version then has to do the "if (type == WRITE" check before discovering it does not need to take that branch either, whilst the original version does not have to do such a test at all.
>
> Seriously?
Of course seriously.
> Just focus on the code readability/maintainability which makes the code most easily understood/obvious to a new pair of eyes, and leave such micro-optimizations to the compiler..
The original version is more readable (IMO) and this is not a micro-optimization. It is people like you who are responsible for the fact that we need faster and faster computers to cope with the inefficient/poor code being written more and more...
And I really wouldn't hedge my bets on gcc optimizing something like that. The amount of crap assembly produced from gcc that I have seen over the years suggests that it is quite likely it will make a hash of it instead...
Best regards,
Anton
> Thanks
--
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
University of Cambridge Information Services, Roger Needham Building
7 JJ Thomson Avenue, Cambridge, CB3 0RB, UK
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [fuse-devel] [PATCH v5 7/7] add a flag for per-operation O_DSYNC semantics
2014-11-07 6:43 ` Anton Altaparmakov
@ 2014-11-07 14:21 ` Roger Willcocks
2014-11-07 19:58 ` Milosz Tanski
0 siblings, 1 reply; 13+ messages in thread
From: Roger Willcocks @ 2014-11-07 14:21 UTC (permalink / raw)
To: Anton Altaparmakov
Cc: Anand Avati, linux-arch, linux-aio, linux-nfs, Volker Lendecke,
Theodore Ts'o, Mel Gorman, fuse-devel@lists.sourceforge.net,
linux-api, Linux Kernel Mailing List, Michael Kerrisk,
Christoph Hellwig, linux-mm, Jeff Moyer, Al Viro, Tejun Heo,
linux-fsdevel, ceph-devel, Christoph Hellwig, ocfs2-devel,
Milosz Tanski
On Fri, 2014-11-07 at 08:43 +0200, Anton Altaparmakov wrote:
> Hi,
>
> > On 7 Nov 2014, at 07:52, Anand Avati <avati@gluster.org> wrote:
> > On Thu, Nov 6, 2014 at 8:22 PM, Anton Altaparmakov <aia21@cam.ac.uk> wrote:
> > > On 7 Nov 2014, at 01:46, Jeff Moyer <jmoyer@redhat.com> wrote:
> > > Minor nit, but I'd rather read something that looks like this:
> > >
> > > if (type == READ && (flags & RWF_NONBLOCK))
> > > return -EAGAIN;
> > > else if (type == WRITE && (flags & RWF_DSYNC))
> > > return -EINVAL;
> >
> > But your version is less logically efficient for the case where "type == READ" is true and "flags & RWF_NONBLOCK" is false because your version then has to do the "if (type == WRITE" check before discovering it does not need to take that branch either, whilst the original version does not have to do such a test at all.
> >
> > Seriously?
>
> Of course seriously.
>
> > Just focus on the code readability/maintainability which makes the code most easily understood/obvious to a new pair of eyes, and leave such micro-optimizations to the compiler..
>
> The original version is more readable (IMO) and this is not a micro-optimization. It is people like you who are responsible for the fact that we need faster and faster computers to cope with the inefficient/poor code being written more and more...
>
Your original version needs me to know that type can only be either READ
or WRITE (and not, for instance, READONLY or READWRITE or some other
random special case) and it rings alarm bells when I first see it. If
you want to keep the micro optimization, you need an assertion to
acknowledge the potential bug and a comment to make the code obvious:
+ assert(type == READ || type == WRITE);
+ if (type == READ) {
+ if (flags & RWF_NONBLOCK)
+ return -EAGAIN;
+ } else { /* WRITE */
+ if (flags & RWF_DSYNC)
+ return -EINVAL;
+ }
but since what's really happening here is two separate and independent
error checks, Jeff's version is still better, even if it does take an
extra couple of nanoseconds.
Actually I'd probably write:
if (type == READ && (flags & RWF_NONBLOCK))
return -EAGAIN;
if (type == WRITE && (flags & RWF_DSYNC))
return -EINVAL;
(no 'else' since the code will never be reached if the first test is
true).
--
Roger Willcocks <roger@filmlight.ltd.uk>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2
2014-11-06 23:25 ` Jeff Moyer
@ 2014-11-07 16:28 ` Milosz Tanski
0 siblings, 0 replies; 13+ messages in thread
From: Milosz Tanski @ 2014-11-07 16:28 UTC (permalink / raw)
To: Jeff Moyer
Cc: LKML, Christoph Hellwig, linux-fsdevel@vger.kernel.org,
linux-aio@kvack.org, Mel Gorman, Volker Lendecke, Tejun Heo,
Theodore Ts'o, Al Viro, Linux API, Michael Kerrisk,
linux-arch, linux-mm
On Thu, Nov 6, 2014 at 6:25 PM, Jeff Moyer <jmoyer@redhat.com> wrote:
> Milosz Tanski <milosz@adfin.com> writes:
>
>> New syscalls that take an flag argument. This change does not add any specific
>> flags.
>>
>> Signed-off-by: Milosz Tanski <milosz@adfin.com>
>> Reviewed-by: Christoph Hellwig <hch@lst.de>
>> ---
>> fs/read_write.c | 176 ++++++++++++++++++++++++++++++--------
>> include/linux/compat.h | 6 ++
>> include/linux/syscalls.h | 6 ++
>> include/uapi/asm-generic/unistd.h | 6 +-
>> mm/filemap.c | 5 +-
>> 5 files changed, 158 insertions(+), 41 deletions(-)
>>
>> diff --git a/fs/read_write.c b/fs/read_write.c
>> index 94b2d34..907735c 100644
>> --- a/fs/read_write.c
>> +++ b/fs/read_write.c
>> @@ -866,6 +866,8 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
>> return -EBADF;
>> if (!(file->f_mode & FMODE_CAN_READ))
>> return -EINVAL;
>> + if (flags & ~0)
>> + return -EINVAL;
>>
>> return do_readv_writev(READ, file, vec, vlen, pos, flags);
>> }
>> @@ -879,21 +881,23 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
>> return -EBADF;
>> if (!(file->f_mode & FMODE_CAN_WRITE))
>> return -EINVAL;
>> + if (flags & ~0)
>> + return -EINVAL;
>>
>> return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
>> }
>
> Hi, Milosz,
>
> You've checked for invalid flags for the normal system calls, but not
> for the compat variants. Can you add that in, please?
>
> Thanks!
> Jeff
That's a good catch Jeff I'll fix this and it'll be in the next
version of the patch series.
- M
--
Milosz Tanski
CTO
16 East 34th Street, 15th floor
New York, NY 10016
p: 646-253-9055
e: milosz@adfin.com
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [fuse-devel] [PATCH v5 7/7] add a flag for per-operation O_DSYNC semantics
2014-11-07 14:21 ` Roger Willcocks
@ 2014-11-07 19:58 ` Milosz Tanski
0 siblings, 0 replies; 13+ messages in thread
From: Milosz Tanski @ 2014-11-07 19:58 UTC (permalink / raw)
To: Roger Willcocks
Cc: Anton Altaparmakov, Anand Avati, linux-arch, linux-aio@kvack.org,
linux-nfs, Volker Lendecke, Theodore Ts'o, Mel Gorman,
fuse-devel@lists.sourceforge.net, Linux API,
Linux Kernel Mailing List, Michael Kerrisk, Christoph Hellwig,
linux-mm, Jeff Moyer, Al Viro, Tejun Heo,
linux-fsdevel@vger.kernel.org, ceph-devel, Christoph Hellwig,
ocfs2-devel
On Fri, Nov 7, 2014 at 9:21 AM, Roger Willcocks <roger@filmlight.ltd.uk> wrote:
>
> On Fri, 2014-11-07 at 08:43 +0200, Anton Altaparmakov wrote:
>> Hi,
>>
>> > On 7 Nov 2014, at 07:52, Anand Avati <avati@gluster.org> wrote:
>> > On Thu, Nov 6, 2014 at 8:22 PM, Anton Altaparmakov <aia21@cam.ac.uk> wrote:
>> > > On 7 Nov 2014, at 01:46, Jeff Moyer <jmoyer@redhat.com> wrote:
>> > > Minor nit, but I'd rather read something that looks like this:
>> > >
>> > > if (type == READ && (flags & RWF_NONBLOCK))
>> > > return -EAGAIN;
>> > > else if (type == WRITE && (flags & RWF_DSYNC))
>> > > return -EINVAL;
>> >
>> > But your version is less logically efficient for the case where "type == READ" is true and "flags & RWF_NONBLOCK" is false because your version then has to do the "if (type == WRITE" check before discovering it does not need to take that branch either, whilst the original version does not have to do such a test at all.
>> >
>> > Seriously?
>>
>> Of course seriously.
>>
>> > Just focus on the code readability/maintainability which makes the code most easily understood/obvious to a new pair of eyes, and leave such micro-optimizations to the compiler..
>>
>> The original version is more readable (IMO) and this is not a micro-optimization. It is people like you who are responsible for the fact that we need faster and faster computers to cope with the inefficient/poor code being written more and more...
>>
>
> Your original version needs me to know that type can only be either READ
> or WRITE (and not, for instance, READONLY or READWRITE or some other
> random special case) and it rings alarm bells when I first see it. If
> you want to keep the micro optimization, you need an assertion to
> acknowledge the potential bug and a comment to make the code obvious:
>
> + assert(type == READ || type == WRITE);
> + if (type == READ) {
> + if (flags & RWF_NONBLOCK)
> + return -EAGAIN;
> + } else { /* WRITE */
> + if (flags & RWF_DSYNC)
> + return -EINVAL;
> + }
>
> but since what's really happening here is two separate and independent
> error checks, Jeff's version is still better, even if it does take an
> extra couple of nanoseconds.
>
> Actually I'd probably write:
>
> if (type == READ && (flags & RWF_NONBLOCK))
> return -EAGAIN;
>
> if (type == WRITE && (flags & RWF_DSYNC))
> return -EINVAL;
>
> (no 'else' since the code will never be reached if the first test is
> true).
>
>
> --
> Roger Willcocks <roger@filmlight.ltd.uk>
>
This is what I changed it to (and will be sending that out for the
next version).
--
Milosz Tanski
CTO
16 East 34th Street, 15th floor
New York, NY 10016
p: 646-253-9055
e: milosz@adfin.com
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 4/7] vfs: RWF_NONBLOCK flag for preadv2
2014-11-05 21:14 ` [PATCH v5 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
@ 2014-11-10 16:07 ` Sage Weil
0 siblings, 0 replies; 13+ messages in thread
From: Sage Weil @ 2014-11-10 16:07 UTC (permalink / raw)
To: Milosz Tanski
Cc: linux-kernel, Christoph Hellwig, linux-fsdevel, linux-aio,
Mel Gorman, Volker Lendecke, Tejun Heo, Jeff Moyer,
Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
linux-arch, ceph-devel, linux-cifs, samba-technical, linux-nfs,
linux-xfs, ocfs2-devel, linux-mm
On Wed, 5 Nov 2014, Milosz Tanski wrote:
> generic_file_read_iter() supports a new flag RWF_NONBLOCK which says that we
> only want to read the data if it's already in the page cache.
>
> Additionally, there are a few filesystems that we have to specifically
> bail early if RWF_NONBLOCK because the op would block. Christoph Hellwig
> contributed this code.
>
> Signed-off-by: Milosz Tanski <milosz@adfin.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Ceph bits
Acked-by: Sage Weil <sage@redhat.com>
> ---
> fs/ceph/file.c | 2 ++
> fs/cifs/file.c | 6 ++++++
> fs/nfs/file.c | 5 ++++-
> fs/ocfs2/file.c | 6 ++++++
> fs/pipe.c | 3 ++-
> fs/read_write.c | 38 +++++++++++++++++++++++++-------------
> fs/xfs/xfs_file.c | 4 ++++
> include/linux/fs.h | 3 +++
> mm/filemap.c | 18 ++++++++++++++++++
> mm/shmem.c | 4 ++++
> 10 files changed, 74 insertions(+), 15 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index d7e0da8..b798b5c 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -822,6 +822,8 @@ again:
> if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
> (iocb->ki_filp->f_flags & O_DIRECT) ||
> (fi->flags & CEPH_F_SYNC)) {
> + if (iocb->ki_rwflags & O_NONBLOCK)
> + return -EAGAIN;
>
> dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
> inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 3e4d00a..c485afa 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -3005,6 +3005,9 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
> struct cifs_readdata *rdata, *tmp;
> struct list_head rdata_list;
>
> + if (iocb->ki_rwflags & RWF_NONBLOCK)
> + return -EAGAIN;
> +
> len = iov_iter_count(to);
> if (!len)
> return 0;
> @@ -3123,6 +3126,9 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
> ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> return generic_file_read_iter(iocb, to);
>
> + if (iocb->ki_rwflags & RWF_NONBLOCK)
> + return -EAGAIN;
> +
> /*
> * We need to hold the sem to be sure nobody modifies lock list
> * with a brlock that prevents reading.
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 2ab6f00..aa9046f 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -171,8 +171,11 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
> struct inode *inode = file_inode(iocb->ki_filp);
> ssize_t result;
>
> - if (iocb->ki_filp->f_flags & O_DIRECT)
> + if (iocb->ki_filp->f_flags & O_DIRECT) {
> + if (iocb->ki_rwflags & O_NONBLOCK)
> + return -EAGAIN;
> return nfs_file_direct_read(iocb, to, iocb->ki_pos);
> + }
>
> dprintk("NFS: read(%pD2, %zu@%lu)\n",
> iocb->ki_filp,
> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
> index 324dc93..bb66ca4 100644
> --- a/fs/ocfs2/file.c
> +++ b/fs/ocfs2/file.c
> @@ -2472,6 +2472,12 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
> filp->f_path.dentry->d_name.name,
> to->nr_segs); /* GRRRRR */
>
> + /*
> + * No non-blocking reads for ocfs2 for now. Might be doable with
> + * non-blocking cluster lock helpers.
> + */
> + if (iocb->ki_rwflags & RWF_NONBLOCK)
> + return -EAGAIN;
>
> if (!inode) {
> ret = -EINVAL;
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 21981e5..212bf68 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -302,7 +302,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> */
> if (ret)
> break;
> - if (filp->f_flags & O_NONBLOCK) {
> + if ((filp->f_flags & O_NONBLOCK) ||
> + (iocb->ki_rwflags & RWF_NONBLOCK)) {
> ret = -EAGAIN;
> break;
> }
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 907735c..cba7d4c 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -835,14 +835,19 @@ static ssize_t do_readv_writev(int type, struct file *file,
> file_start_write(file);
> }
>
> - if (iter_fn)
> + if (iter_fn) {
> ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
> pos, iter_fn, flags);
> - else if (fnv)
> - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
> - pos, fnv);
> - else
> - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
> + } else {
> + if (type == READ && (flags & RWF_NONBLOCK))
> + return -EAGAIN;
> +
> + if (fnv)
> + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
> + pos, fnv);
> + else
> + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
> + }
>
> if (type != READ)
> file_end_write(file);
> @@ -866,8 +871,10 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
> return -EBADF;
> if (!(file->f_mode & FMODE_CAN_READ))
> return -EINVAL;
> - if (flags & ~0)
> + if (flags & ~RWF_NONBLOCK)
> return -EINVAL;
> + if ((file->f_flags & O_DIRECT) && (flags & RWF_NONBLOCK))
> + return -EAGAIN;
>
> return do_readv_writev(READ, file, vec, vlen, pos, flags);
> }
> @@ -1069,14 +1076,19 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
> file_start_write(file);
> }
>
> - if (iter_fn)
> + if (iter_fn) {
> ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
> pos, iter_fn, flags);
> - else if (fnv)
> - ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
> - pos, fnv);
> - else
> - ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
> + } else {
> + if (type == READ && (flags & RWF_NONBLOCK))
> + return -EAGAIN;
> +
> + if (fnv)
> + ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
> + pos, fnv);
> + else
> + ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
> + }
>
> if (type != READ)
> file_end_write(file);
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index eb596b4..b1f6334 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -246,6 +246,10 @@ xfs_file_read_iter(
>
> XFS_STATS_INC(xs_read_calls);
>
> + /* XXX: need a non-blocking iolock helper, shouldn't be too hard */
> + if (iocb->ki_rwflags & RWF_NONBLOCK)
> + return -EAGAIN;
> +
> if (unlikely(file->f_flags & O_DIRECT))
> ioflags |= XFS_IO_ISDIRECT;
> if (file->f_mode & FMODE_NOCMTIME)
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9ed5711..eaebd99 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1459,6 +1459,9 @@ struct block_device_operations;
> #define HAVE_COMPAT_IOCTL 1
> #define HAVE_UNLOCKED_IOCTL 1
>
> +/* These flags are used for the readv/writev syscalls with flags. */
> +#define RWF_NONBLOCK 0x00000001
> +
> struct iov_iter;
>
> struct file_operations {
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 530c263..09d3af3 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1494,6 +1494,8 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
> find_page:
> page = find_get_page(mapping, index);
> if (!page) {
> + if (flags & RWF_NONBLOCK)
> + goto would_block;
> page_cache_sync_readahead(mapping,
> ra, filp,
> index, last_index - index);
> @@ -1585,6 +1587,11 @@ page_ok:
> continue;
>
> page_not_up_to_date:
> + if (flags & RWF_NONBLOCK) {
> + page_cache_release(page);
> + goto would_block;
> + }
> +
> /* Get exclusive access to the page ... */
> error = lock_page_killable(page);
> if (unlikely(error))
> @@ -1604,6 +1611,12 @@ page_not_up_to_date_locked:
> goto page_ok;
> }
>
> + if (flags & RWF_NONBLOCK) {
> + unlock_page(page);
> + page_cache_release(page);
> + goto would_block;
> + }
> +
> readpage:
> /*
> * A previous I/O error may have been due to temporary
> @@ -1674,6 +1687,8 @@ no_cached_page:
> goto readpage;
> }
>
> +would_block:
> + error = -EAGAIN;
> out:
> ra->prev_pos = prev_index;
> ra->prev_pos <<= PAGE_CACHE_SHIFT;
> @@ -1707,6 +1722,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> size_t count = iov_iter_count(iter);
> loff_t size;
>
> + if (iocb->ki_rwflags & RWF_NONBLOCK)
> + return -EAGAIN;
> +
> if (!count)
> goto out; /* skip atime */
> size = i_size_read(inode);
> diff --git a/mm/shmem.c b/mm/shmem.c
> index cd6fc75..5c30f04 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1531,6 +1531,10 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> ssize_t retval = 0;
> loff_t *ppos = &iocb->ki_pos;
>
> + /* XXX: should be easily supportable */
> + if (iocb->ki_rwflags & RWF_NONBLOCK)
> + return -EAGAIN;
> +
> /*
> * Might this read be for a stacking filesystem? Then when reading
> * holes of a sparse file, we actually need to allocate those pages,
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics
2014-11-05 21:14 ` [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
2014-11-06 23:46 ` Jeff Moyer
@ 2014-11-10 16:07 ` Sage Weil
1 sibling, 0 replies; 13+ messages in thread
From: Sage Weil @ 2014-11-10 16:07 UTC (permalink / raw)
To: Milosz Tanski
Cc: linux-kernel, Christoph Hellwig, Christoph Hellwig, linux-fsdevel,
linux-aio, Mel Gorman, Volker Lendecke, Tejun Heo, Jeff Moyer,
Theodore Ts'o, Al Viro, linux-api, Michael Kerrisk,
linux-arch, ceph-devel, fuse-devel, linux-nfs, ocfs2-devel,
linux-mm
On Wed, 5 Nov 2014, Milosz Tanski wrote:
> From: Christoph Hellwig <hch@lst.de>
>
> With the new read/write with flags syscalls we can support a flag
> to enable O_DSYNC semantics on a per-operation basis. This ?s
> useful to implement protocols like SMB, NFS or SCSI that have such
> per-operation flags.
>
> Example program below:
>
> cat > pwritev2.c << EOF
>
> (off_t) val, \
> (off_t) ((((uint64_t) (val)) >> (sizeof (long) * 4)) >> (sizeof (long) * 4))
>
> static ssize_t
> pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags)
> {
> return syscall(__NR_pwritev2, fd, iov, iovcnt, LO_HI_LONG(offset),
> flags);
> }
>
> int main(int argc, char **argv)
> {
> int fd = open(argv[1], O_WRONLY|O_CREAT|O_TRUNC, 0666);
> char buf[1024];
> struct iovec iov = { .iov_base = buf, .iov_len = 1024 };
> int ret;
>
> if (fd < 0) {
> perror("open");
> return 0;
> }
>
> memset(buf, 0xfe, sizeof(buf));
>
> ret = pwritev2(fd, &iov, 1, 0, RWF_DSYNC);
> if (ret < 0)
> perror("pwritev2");
> else
> printf("ret = %d\n", ret);
>
> return 0;
> }
> EOF
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> [milosz@adfin.com: added flag check to compat_do_readv_writev()]
> Signed-off-by: Milosz Tanski <milosz@adfin.com>
Ceph bits
Acked-by: Sage Weil <sage@redhat.com>
> ---
> fs/ceph/file.c | 4 +++-
> fs/fuse/file.c | 2 ++
> fs/nfs/file.c | 10 ++++++----
> fs/ocfs2/file.c | 6 ++++--
> fs/read_write.c | 20 +++++++++++++++-----
> include/linux/fs.h | 3 ++-
> mm/filemap.c | 4 +++-
> 7 files changed, 35 insertions(+), 14 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index b798b5c..2d4e15a 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -983,7 +983,9 @@ retry_snap:
> ceph_put_cap_refs(ci, got);
>
> if (written >= 0 &&
> - ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) ||
> + ((file->f_flags & O_SYNC) ||
> + IS_SYNC(file->f_mapping->host) ||
> + (iocb->ki_rwflags & RWF_DSYNC) ||
> ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
> err = vfs_fsync_range(file, pos, pos + written - 1, 1);
> if (err < 0)
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index caa8d95..bb4fb23 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1248,6 +1248,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> written += written_buffered;
> iocb->ki_pos = pos + written_buffered;
> } else {
> + if (iocb->ki_rwflags & RWF_DSYNC)
> + return -EINVAL;
> written = fuse_perform_write(file, mapping, from, pos);
> if (written >= 0)
> iocb->ki_pos = pos + written;
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index aa9046f..c59b0b7 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -652,13 +652,15 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
> .remap_pages = generic_file_remap_pages,
> };
>
> -static int nfs_need_sync_write(struct file *filp, struct inode *inode)
> +static int nfs_need_sync_write(struct kiocb *iocb, struct inode *inode)
> {
> struct nfs_open_context *ctx;
>
> - if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
> + if (IS_SYNC(inode) ||
> + (iocb->ki_filp->f_flags & O_DSYNC) ||
> + (iocb->ki_rwflags & RWF_DSYNC))
> return 1;
> - ctx = nfs_file_open_context(filp);
> + ctx = nfs_file_open_context(iocb->ki_filp);
> if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
> nfs_ctx_key_to_expire(ctx))
> return 1;
> @@ -705,7 +707,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
> written = result;
>
> /* Return error values for O_DSYNC and IS_SYNC() */
> - if (result >= 0 && nfs_need_sync_write(file, inode)) {
> + if (result >= 0 && nfs_need_sync_write(iocb, inode)) {
> int err = vfs_fsync(file, 0);
> if (err < 0)
> result = err;
> diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
> index bb66ca4..8f9a86b 100644
> --- a/fs/ocfs2/file.c
> +++ b/fs/ocfs2/file.c
> @@ -2374,8 +2374,10 @@ out_dio:
> /* buffered aio wouldn't have proper lock coverage today */
> BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
>
> - if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
> - ((file->f_flags & O_DIRECT) && !direct_io)) {
> + if (((file->f_flags & O_DSYNC) && !direct_io) ||
> + IS_SYNC(inode) ||
> + ((file->f_flags & O_DIRECT) && !direct_io) ||
> + (iocb->ki_rwflags & RWF_DSYNC)) {
> ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
> *ppos + count - 1);
> if (ret < 0)
> diff --git a/fs/read_write.c b/fs/read_write.c
> index cba7d4c..3443265 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -839,8 +839,13 @@ static ssize_t do_readv_writev(int type, struct file *file,
> ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
> pos, iter_fn, flags);
> } else {
> - if (type == READ && (flags & RWF_NONBLOCK))
> - return -EAGAIN;
> + if (type == READ) {
> + if (flags & RWF_NONBLOCK)
> + return -EAGAIN;
> + } else {
> + if (flags & RWF_DSYNC)
> + return -EINVAL;
> + }
>
> if (fnv)
> ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
> @@ -888,7 +893,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
> return -EBADF;
> if (!(file->f_mode & FMODE_CAN_WRITE))
> return -EINVAL;
> - if (flags & ~0)
> + if (flags & ~RWF_DSYNC)
> return -EINVAL;
>
> return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
> @@ -1080,8 +1085,13 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
> ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
> pos, iter_fn, flags);
> } else {
> - if (type == READ && (flags & RWF_NONBLOCK))
> - return -EAGAIN;
> + if (type == READ) {
> + if (flags & RWF_NONBLOCK)
> + return -EAGAIN;
> + } else {
> + if (flags & RWF_DSYNC)
> + return -EINVAL;
> + }
>
> if (fnv)
> ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 7d0e116..7786b88 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1460,7 +1460,8 @@ struct block_device_operations;
> #define HAVE_UNLOCKED_IOCTL 1
>
> /* These flags are used for the readv/writev syscalls with flags. */
> -#define RWF_NONBLOCK 0x00000001
> +#define RWF_NONBLOCK 0x00000001
> +#define RWF_DSYNC 0x00000002
>
> struct iov_iter;
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 6107058..4fbef99 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2669,7 +2669,9 @@ int generic_write_sync(struct kiocb *iocb, loff_t count)
> struct file *file = iocb->ki_filp;
>
> if (count > 0 &&
> - ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))) {
> + ((file->f_flags & O_DSYNC) ||
> + (iocb->ki_rwflags & RWF_DSYNC) ||
> + IS_SYNC(file->f_mapping->host))) {
> bool fdatasync = !(file->f_flags & __O_SYNC);
> ssize_t ret = 0;
>
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2014-11-10 16:08 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <cover.1415220890.git.milosz@adfin.com>
2014-11-05 21:14 ` [PATCH v5 2/7] vfs: Define new syscalls preadv2,pwritev2 Milosz Tanski
2014-11-06 23:25 ` Jeff Moyer
2014-11-07 16:28 ` Milosz Tanski
2014-11-05 21:14 ` [PATCH v5 4/7] vfs: RWF_NONBLOCK flag for preadv2 Milosz Tanski
2014-11-10 16:07 ` Sage Weil
2014-11-05 21:14 ` [PATCH v5 7/7] fs: add a flag for per-operation O_DSYNC semantics Milosz Tanski
2014-11-06 23:46 ` Jeff Moyer
2014-11-07 4:22 ` [PATCH v5 7/7] " Anton Altaparmakov
2014-11-07 5:52 ` [fuse-devel] " Anand Avati
2014-11-07 6:43 ` Anton Altaparmakov
2014-11-07 14:21 ` Roger Willcocks
2014-11-07 19:58 ` Milosz Tanski
2014-11-10 16:07 ` [PATCH v5 7/7] fs: " Sage Weil
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).