- * [PATCH 1/9] compat.h: fix a spelling error in <linux/compat.h>
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 2/9] iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c Christoph Hellwig
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
There is no compat_sys_readv64v2 syscall, only a compat_sys_preadv64v2
one.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/compat.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/compat.h b/include/linux/compat.h
index b354ce58966e2d..654c1ec36671a4 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -812,7 +812,7 @@ asmlinkage ssize_t compat_sys_pwritev2(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
 		compat_ulong_t vlen, u32 pos_low, u32 pos_high, rwf_t flags);
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
-asmlinkage long  compat_sys_readv64v2(unsigned long fd,
+asmlinkage long  compat_sys_preadv64v2(unsigned long fd,
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, loff_t pos, rwf_t flags);
 #endif
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [PATCH 2/9] iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 1/9] compat.h: fix a spelling error in <linux/compat.h> Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec Christoph Hellwig
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module, David Laight,
	David Laight
From: David Laight <David.Laight@ACULAB.COM>
This lets the compiler inline it into import_iovec() generating
much better code.
Signed-off-by: David Laight <david.laight@aculab.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/read_write.c | 179 ------------------------------------------------
 lib/iov_iter.c  | 176 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+), 179 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0dd..e5e891a88442ef 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -752,185 +752,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 	return ret;
 }
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- *     into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- *     either @fast_pointer, a newly allocated kernel array, or NULL,
- *     depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-			      unsigned long nr_segs, unsigned long fast_segs,
-			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer)
-{
-	unsigned long seg;
-	ssize_t ret;
-	struct iovec *iov = fast_pointer;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0) {
-		ret = 0;
-		goto out;
-	}
-
-	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
-	 */
-	if (nr_segs > UIO_MAXIOV) {
-		ret = -EINVAL;
-		goto out;
-	}
-	if (nr_segs > fast_segs) {
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
-	}
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/*
-	 * According to the Single Unix Specification we should return EINVAL
-	 * if an element length is < 0 when cast to ssize_t or if the
-	 * total length would overflow the ssize_t return value of the
-	 * system call.
-	 *
-	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
-	 * overflow case.
-	 */
-	ret = 0;
-	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
-		ssize_t len = (ssize_t)iov[seg].iov_len;
-
-		/* see if we we're about to use an invalid len or if
-		 * it's about to overflow ssize_t */
-		if (len < 0) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (type >= 0
-		    && unlikely(!access_ok(buf, len))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - ret) {
-			len = MAX_RW_COUNT - ret;
-			iov[seg].iov_len = len;
-		}
-		ret += len;
-	}
-out:
-	*ret_pointer = iov;
-	return ret;
-}
-
-#ifdef CONFIG_COMPAT
-ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector, unsigned long nr_segs,
-		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer)
-{
-	compat_ssize_t tot_len;
-	struct iovec *iov = *ret_pointer = fast_pointer;
-	ssize_t ret = 0;
-	int seg;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (nr_segs > fast_segs) {
-		ret = -ENOMEM;
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL)
-			goto out;
-	}
-	*ret_pointer = iov;
-
-	ret = -EFAULT;
-	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
-		goto out;
-
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.
-	 *
-	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
-	 * no overflow possibility.
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		compat_uptr_t buf;
-		compat_ssize_t len;
-
-		if (__get_user(len, &uvector->iov_len) ||
-		   __get_user(buf, &uvector->iov_base)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
-			goto out;
-		if (type >= 0 &&
-		    !access_ok(compat_ptr(buf), len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - tot_len)
-			len = MAX_RW_COUNT - tot_len;
-		tot_len += len;
-		iov->iov_base = compat_ptr(buf);
-		iov->iov_len = (compat_size_t) len;
-		uvector++;
-		iov++;
-	}
-	ret = tot_len;
-
-out:
-	return ret;
-}
-#endif
-
 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
 		loff_t *pos, rwf_t flags)
 {
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f1232..ccea9db3f72be8 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1650,6 +1650,109 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ *     into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ *     either @fast_pointer, a newly allocated kernel array, or NULL,
+ *     depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ *   or a negative error code on error.
+ */
+ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_pointer, struct iovec **ret_pointer)
+{
+	unsigned long seg;
+	ssize_t ret;
+	struct iovec *iov = fast_pointer;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * First get the "struct iovec" from user memory and
+	 * verify all the pointers
+	 */
+	if (nr_segs > UIO_MAXIOV) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (nr_segs > fast_segs) {
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * According to the Single Unix Specification we should return EINVAL
+	 * if an element length is < 0 when cast to ssize_t or if the
+	 * total length would overflow the ssize_t return value of the
+	 * system call.
+	 *
+	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+	 * overflow case.
+	 */
+	ret = 0;
+	for (seg = 0; seg < nr_segs; seg++) {
+		void __user *buf = iov[seg].iov_base;
+		ssize_t len = (ssize_t)iov[seg].iov_len;
+
+		/* see if we we're about to use an invalid len or if
+		 * it's about to overflow ssize_t */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if (type >= 0
+		    && unlikely(!access_ok(buf, len))) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - ret) {
+			len = MAX_RW_COUNT - ret;
+			iov[seg].iov_len = len;
+		}
+		ret += len;
+	}
+out:
+	*ret_pointer = iov;
+	return ret;
+}
+
 /**
  * import_iovec() - Copy an array of &struct iovec from userspace
  *     into the kernel, check that it is valid, and initialize a new
@@ -1695,6 +1798,79 @@ EXPORT_SYMBOL(import_iovec);
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 
+ssize_t compat_rw_copy_check_uvector(int type,
+		const struct compat_iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_pointer, struct iovec **ret_pointer)
+{
+	compat_ssize_t tot_len;
+	struct iovec *iov = *ret_pointer = fast_pointer;
+	ssize_t ret = 0;
+	int seg;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0)
+		goto out;
+
+	ret = -EINVAL;
+	if (nr_segs > UIO_MAXIOV)
+		goto out;
+	if (nr_segs > fast_segs) {
+		ret = -ENOMEM;
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL)
+			goto out;
+	}
+	*ret_pointer = iov;
+
+	ret = -EFAULT;
+	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
+		goto out;
+
+	/*
+	 * Single unix specification:
+	 * We should -EINVAL if an element length is not >= 0 and fitting an
+	 * ssize_t.
+	 *
+	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
+	 * no overflow possibility.
+	 */
+	tot_len = 0;
+	ret = -EINVAL;
+	for (seg = 0; seg < nr_segs; seg++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		if (__get_user(len, &uvector->iov_len) ||
+		   __get_user(buf, &uvector->iov_base)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
+			goto out;
+		if (type >= 0 &&
+		    !access_ok(compat_ptr(buf), len)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - tot_len)
+			len = MAX_RW_COUNT - tot_len;
+		tot_len += len;
+		iov->iov_base = compat_ptr(buf);
+		iov->iov_len = (compat_size_t) len;
+		uvector++;
+		iov++;
+	}
+	ret = tot_len;
+
+out:
+	return ret;
+}
+
 ssize_t compat_import_iovec(int type,
 		const struct compat_iovec __user * uvector,
 		unsigned nr_segs, unsigned fast_segs,
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 1/9] compat.h: fix a spelling error in <linux/compat.h> Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 2/9] iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23 14:16   ` Al Viro
  2020-09-23  6:05 ` [PATCH 4/9] iov_iter: transparently handle compat iovecs in import_iovec Christoph Hellwig
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Split rw_copy_check_uvector into two new helpers with more sensible
calling conventions:
 - iovec_from_user copies a iovec from userspace either into the provided
   stack buffer if it fits, or allocates a new buffer for it.  Returns
   the actually used iovec.  It also verifies that iov_len does fit a
   signed type, and handles compat iovecs if the compat flag is set.
 - __import_iovec consolidates the native and compat versions of
   import_iovec. It calls iovec_from_user, then validates each iovec
   actually points to user addresses, and ensures the total length
   doesn't overflow.
This has two major implications:
 - the access_process_vm case loses the total lenght checking, which
   wasn't required anyway, given that each call receives two iovecs
   for the local and remote side of the operation, and it verifies
   the total length on the local side already.
 - instead of a single loop there now are two loops over the iovecs.
   Given that the iovecs are cache hot this doesn't make a major
   difference
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/compat.h |   6 -
 include/linux/fs.h     |  13 --
 include/linux/uio.h    |  12 +-
 lib/iov_iter.c         | 300 ++++++++++++++++-------------------------
 mm/process_vm_access.c |  34 +++--
 5 files changed, 138 insertions(+), 227 deletions(-)
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 654c1ec36671a4..b930de791ff16b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 struct epoll_event;	/* fortunately, this one is fixed-layout */
 
-extern ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector,
-		unsigned long nr_segs,
-		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer);
-
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a082c..e69b45b6cc7b5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
 
-/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
- */
-#define CHECK_IOVEC_ONLY -1
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-			      unsigned long nr_segs, unsigned long fast_segs,
-			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer);
-
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..92c11fe41c6228 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i);
 
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
-		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i);
+struct iovec *iovec_from_user(const struct iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat);
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i);
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat);
 
 #ifdef CONFIG_COMPAT
 struct compat_iovec;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ccea9db3f72be8..d5d8afe31fca16 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/splice.h>
+#include <linux/compat.h>
 #include <net/checksum.h>
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
@@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- *     into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- *     either @fast_pointer, a newly allocated kernel array, or NULL,
- *     depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
-		unsigned long nr_segs, unsigned long fast_segs,
-		struct iovec *fast_pointer, struct iovec **ret_pointer)
+static int copy_compat_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
+{
+	const struct compat_iovec __user *uiov =
+		(const struct compat_iovec __user *)uvec;
+	int ret = -EFAULT, i;
+
+	if (!user_access_begin(uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+
+	for (i = 0; i < nr_segs; i++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
+		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
+
+		/* check for compat_size_t not fitting in compat_ssize_t .. */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto uaccess_end;
+		}
+		iov[i].iov_base = compat_ptr(buf);
+		iov[i].iov_len = len;
+	}
+
+	ret = 0;
+uaccess_end:
+	user_access_end();
+	return ret;
+}
+		
+static int copy_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
 {
 	unsigned long seg;
-	ssize_t ret;
-	struct iovec *iov = fast_pointer;
 
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0) {
-		ret = 0;
-		goto out;
+	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+	for (seg = 0; seg < nr_segs; seg++) {
+		if ((ssize_t)iov[seg].iov_len < 0)
+			return -EINVAL;
 	}
 
+	return 0;
+}
+
+struct iovec *iovec_from_user(const struct iovec __user *uvec,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat)
+{
+	struct iovec *iov = fast_iov;
+	int ret;
+
 	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
+	 * SuS says "The readv() function *may* fail if the iovcnt argument was
+	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
 	 */
-	if (nr_segs > UIO_MAXIOV) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (nr_segs == 0)
+		return iov;
+	if (nr_segs > UIO_MAXIOV)
+		return ERR_PTR(-EINVAL);
 	if (nr_segs > fast_segs) {
 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!iov)
+			return ERR_PTR(-ENOMEM);
 	}
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-		ret = -EFAULT;
-		goto out;
+
+	if (compat)
+		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
+	else
+		ret = copy_iovec_from_user(iov, uvec, nr_segs);
+	if (ret) {
+		if (iov != fast_iov)
+			kfree(iov);
+		return ERR_PTR(ret);
+	}
+
+	return iov;
+}
+
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat)
+{
+	ssize_t total_len = 0;
+	unsigned long seg;
+	struct iovec *iov;
+
+	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
+	if (IS_ERR(iov)) {
+		*iovp = NULL;
+		return PTR_ERR(iov);
 	}
 
 	/*
-	 * According to the Single Unix Specification we should return EINVAL
-	 * if an element length is < 0 when cast to ssize_t or if the
-	 * total length would overflow the ssize_t return value of the
-	 * system call.
+	 * According to the Single Unix Specification we should return EINVAL if
+	 * an element length is < 0 when cast to ssize_t or if the total length
+	 * would overflow the ssize_t return value of the system call.
 	 *
 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 	 * overflow case.
 	 */
-	ret = 0;
 	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
 		ssize_t len = (ssize_t)iov[seg].iov_len;
 
-		/* see if we we're about to use an invalid len or if
-		 * it's about to overflow ssize_t */
-		if (len < 0) {
-			ret = -EINVAL;
-			goto out;
+		if (!access_ok(iov[seg].iov_base, len)) {
+			if (iov != *iovp)
+				kfree(iov);
+			*iovp = NULL;
+			return -EFAULT;
 		}
-		if (type >= 0
-		    && unlikely(!access_ok(buf, len))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - ret) {
-			len = MAX_RW_COUNT - ret;
+
+		if (len > MAX_RW_COUNT - total_len) {
+			len = MAX_RW_COUNT - total_len;
 			iov[seg].iov_len = len;
 		}
-		ret += len;
+		total_len += len;
 	}
-out:
-	*ret_pointer = iov;
-	return ret;
+
+	iov_iter_init(i, type, iov, nr_segs, total_len);
+	if (iov == *iovp)
+		*iovp = NULL;
+	else
+		*iovp = iov;
+	return total_len;
 }
 
 /**
@@ -1759,10 +1786,10 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
  *     &struct iov_iter iterator to access it.
  *
  * @type: One of %READ or %WRITE.
- * @uvector: Pointer to the userspace array.
+ * @uvec: Pointer to the userspace array.
  * @nr_segs: Number of elements in userspace array.
  * @fast_segs: Number of elements in @iov.
- * @iov: (input and output parameter) Pointer to pointer to (usually small
+ * @iovp: (input and output parameter) Pointer to pointer to (usually small
  *     on-stack) kernel array.
  * @i: Pointer to iterator that will be initialized on success.
  *
@@ -1775,120 +1802,21 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
  *
  * Return: Negative error code on error, bytes imported on success
  */
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i)
+		 struct iovec **iovp, struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false);
 }
 EXPORT_SYMBOL(import_iovec);
 
 #ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
-ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector,
-		unsigned long nr_segs, unsigned long fast_segs,
-		struct iovec *fast_pointer, struct iovec **ret_pointer)
-{
-	compat_ssize_t tot_len;
-	struct iovec *iov = *ret_pointer = fast_pointer;
-	ssize_t ret = 0;
-	int seg;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (nr_segs > fast_segs) {
-		ret = -ENOMEM;
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL)
-			goto out;
-	}
-	*ret_pointer = iov;
-
-	ret = -EFAULT;
-	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
-		goto out;
-
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.
-	 *
-	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
-	 * no overflow possibility.
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		compat_uptr_t buf;
-		compat_ssize_t len;
-
-		if (__get_user(len, &uvector->iov_len) ||
-		   __get_user(buf, &uvector->iov_base)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
-			goto out;
-		if (type >= 0 &&
-		    !access_ok(compat_ptr(buf), len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - tot_len)
-			len = MAX_RW_COUNT - tot_len;
-		tot_len += len;
-		iov->iov_base = compat_ptr(buf);
-		iov->iov_len = (compat_size_t) len;
-		uvector++;
-		iov++;
-	}
-	ret = tot_len;
-
-out:
-	return ret;
-}
-
-ssize_t compat_import_iovec(int type,
-		const struct compat_iovec __user * uvector,
-		unsigned nr_segs, unsigned fast_segs,
-		struct iovec **iov, struct iov_iter *i)
+ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec,
+		unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs,
+			     fast_segs, iovp, i, true);
 }
 EXPORT_SYMBOL(compat_import_iovec);
 #endif
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 29c052099affdc..5e728c20c2bead 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -276,20 +276,17 @@ static ssize_t process_vm_rw(pid_t pid,
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
-		goto free_iovecs;
-
-	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
-				   iovstack_r, &iov_r);
-	if (rc <= 0)
-		goto free_iovecs;
-
+		goto free_iov_l;
+	iov_r = iovec_from_user(rvec, riovcnt, UIO_FASTIOV, iovstack_r, false);
+	if (IS_ERR(iov_r)) {
+		rc = PTR_ERR(iov_r);
+		goto free_iov_l;
+	}
 	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-
-free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
+free_iov_l:
 	kfree(iov_l);
-
 	return rc;
 }
 
@@ -333,18 +330,17 @@ compat_process_vm_rw(compat_pid_t pid,
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
-		goto free_iovecs;
-	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
-					  UIO_FASTIOV, iovstack_r,
-					  &iov_r);
-	if (rc <= 0)
-		goto free_iovecs;
-
+		goto free_iov_l;
+	iov_r = iovec_from_user((const struct iovec __user *)rvec, riovcnt,
+				UIO_FASTIOV, iovstack_r, true);
+	if (IS_ERR(iov_r)) {
+		rc = PTR_ERR(iov_r);
+		goto free_iov_l;
+	}
 	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-
-free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
+free_iov_l:
 	kfree(iov_l);
 	return rc;
 }
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * Re: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec
  2020-09-23  6:05 ` [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec Christoph Hellwig
@ 2020-09-23 14:16   ` Al Viro
  2020-09-23 14:38     ` David Laight
  2020-09-23 14:40     ` Al Viro
  0 siblings, 2 replies; 25+ messages in thread
From: Al Viro @ 2020-09-23 14:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module, Linus Torvalds
On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> +		unsigned long nr_segs, unsigned long fast_segs,
Hmm...  For fast_segs unsigned long had always been ridiculous
(4G struct iovec on caller stack frame?), but that got me wondering about
nr_segs and I wish I'd thought of that when introducing import_iovec().
The thing is, import_iovec() takes unsigned int there.  Which is fine
(hell, the maximal value that can be accepted in 1024), except that
we do pass unsigned long syscall argument to it in some places.
E.g. vfs_readv() quietly truncates vlen to 32 bits, and vlen can
come unchanged through sys_readv() -> do_readv() -> vfs_readv().
With unsigned long passed by syscall glue.
AFAICS, passing 4G+1 as the third argument to readv(2) on 64bit box
will be quietly treated as 1 these days.  Which would be fine, except
that before "switch {compat_,}do_readv_writev() to {compat_,}import_iovec()"
it used to fail with -EINVAL.
Userland, BTW, describes readv(2) iovcnt as int; process_vm_readv(),
OTOH, has these counts unsigned long from the userland POV...
I suppose we ought to switch import_iovec() to unsigned long for nr_segs ;-/
Strictly speaking that had been a userland ABI change, even though nothing
except regression tests checking for expected errors would've been likely
to notice.  And it looks like no regression tests covered that one...
Linus, does that qualify for your "if no userland has noticed the change,
it's not a breakage"?
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * RE: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec
  2020-09-23 14:16   ` Al Viro
@ 2020-09-23 14:38     ` David Laight
  2020-09-23 14:49       ` Al Viro
  2020-09-23 14:40     ` Al Viro
  1 sibling, 1 reply; 25+ messages in thread
From: David Laight @ 2020-09-23 14:38 UTC (permalink / raw)
  To: 'Al Viro', Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-mips@vger.kernel.org,
	linux-parisc@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, sparclinux@vger.kernel.org,
	linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	io-uring@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-mm@kvack.org, netdev@vger.kernel.org,
	keyrings@vger.kernel.org, linux-security-module@vger.kernel.org,
	Linus Torvalds
From: Al Viro
> Sent: 23 September 2020 15:17
> 
> On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> 
> > +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> > +		unsigned long nr_segs, unsigned long fast_segs,
> 
> Hmm...  For fast_segs unsigned long had always been ridiculous
> (4G struct iovec on caller stack frame?), but that got me wondering about
> nr_segs and I wish I'd thought of that when introducing import_iovec().
> 
> The thing is, import_iovec() takes unsigned int there.  Which is fine
> (hell, the maximal value that can be accepted in 1024), except that
> we do pass unsigned long syscall argument to it in some places.
It will make diddly-squit difference.
The parameters end up in registers on most calling conventions.
Plausibly you get an extra 'REX' byte on x86 for the 64bit value.
What you want to avoid is explicit sign/zero extension and value
masking after arithmetic.
On x86-64 the 'horrid' type is actually 'signed int'.
It often needs sign extending to 64bits (eg when being
used as an array subscript).
	David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec
  2020-09-23 14:38     ` David Laight
@ 2020-09-23 14:49       ` Al Viro
  0 siblings, 0 replies; 25+ messages in thread
From: Al Viro @ 2020-09-23 14:49 UTC (permalink / raw)
  To: David Laight
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, Arnd Bergmann,
	David Howells, linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-mips@vger.kernel.org,
	linux-parisc@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, sparclinux@vger.kernel.org,
	linux-block@vger.kernel.org, linux-scsi@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	io-uring@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-mm@kvack.org, netdev@vger.kernel.org,
	keyrings@vger.kernel.org, linux-security-module@vger.kernel.org,
	Linus Torvalds
On Wed, Sep 23, 2020 at 02:38:24PM +0000, David Laight wrote:
> From: Al Viro
> > Sent: 23 September 2020 15:17
> > 
> > On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> > 
> > > +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> > > +		unsigned long nr_segs, unsigned long fast_segs,
> > 
> > Hmm...  For fast_segs unsigned long had always been ridiculous
> > (4G struct iovec on caller stack frame?), but that got me wondering about
> > nr_segs and I wish I'd thought of that when introducing import_iovec().
> > 
> > The thing is, import_iovec() takes unsigned int there.  Which is fine
> > (hell, the maximal value that can be accepted in 1024), except that
> > we do pass unsigned long syscall argument to it in some places.
> 
> It will make diddly-squit difference.
> The parameters end up in registers on most calling conventions.
> Plausibly you get an extra 'REX' byte on x86 for the 64bit value.
> What you want to avoid is explicit sign/zero extension and value
> masking after arithmetic.
Don't tell me what I want; your telepathic abilities are consistently sucky.
I am *NOT* talking about microoptimization here.  I have described
the behaviour change of syscall caused by commit 5 years ago.  Which is
generally considered a problem.  Then I asked whether that behaviour
change would fall under the "if nobody noticed, it's not a userland ABI
breakage" exception.
Could you show me the point where I have expressed concerns about
the quality of amd64 code generated for that thing, before or after
the change in question?
^ permalink raw reply	[flat|nested] 25+ messages in thread 
 
- * Re: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec
  2020-09-23 14:16   ` Al Viro
  2020-09-23 14:38     ` David Laight
@ 2020-09-23 14:40     ` Al Viro
  1 sibling, 0 replies; 25+ messages in thread
From: Al Viro @ 2020-09-23 14:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module, Linus Torvalds
On Wed, Sep 23, 2020 at 03:16:54PM +0100, Al Viro wrote:
> On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> 
> > +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> > +		unsigned long nr_segs, unsigned long fast_segs,
> 
> Hmm...  For fast_segs unsigned long had always been ridiculous
> (4G struct iovec on caller stack frame?), but that got me wondering about
> nr_segs and I wish I'd thought of that when introducing import_iovec().
> 
> The thing is, import_iovec() takes unsigned int there.  Which is fine
> (hell, the maximal value that can be accepted in 1024), except that
> we do pass unsigned long syscall argument to it in some places.
> 
> E.g. vfs_readv() quietly truncates vlen to 32 bits, and vlen can
> come unchanged through sys_readv() -> do_readv() -> vfs_readv().
> With unsigned long passed by syscall glue.
> 
> AFAICS, passing 4G+1 as the third argument to readv(2) on 64bit box
> will be quietly treated as 1 these days.  Which would be fine, except
> that before "switch {compat_,}do_readv_writev() to {compat_,}import_iovec()"
> it used to fail with -EINVAL.
> 
> Userland, BTW, describes readv(2) iovcnt as int; process_vm_readv(),
> OTOH, has these counts unsigned long from the userland POV...
> 
> I suppose we ought to switch import_iovec() to unsigned long for nr_segs ;-/
> Strictly speaking that had been a userland ABI change, even though nothing
> except regression tests checking for expected errors would've been likely
> to notice.  And it looks like no regression tests covered that one...
> 
> Linus, does that qualify for your "if no userland has noticed the change,
> it's not a breakage"?
Egads...  We have sys_readv() with unsigned long for file descriptor, since
1.3.31 when it had been introduced.  And originally it did comparison with
NR_OPEN right in sys_readv().  Then in 2.1.60 it had been switched to
fget(), which used to take unsigned long at that point.  And in 2.1.90pre1
it went unsigned int, so non-zero upper 32 bits in readv(2) first argument
ceased to cause EBADF...
Of course, libc had it as int fd all along.
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
 
- * [PATCH 4/9] iov_iter: transparently handle compat iovecs in import_iovec
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
                   ` (2 preceding siblings ...)
  2020-09-23  6:05 ` [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 5/9] fs: remove various compat readv/writev helpers Christoph Hellwig
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Use in compat_syscall to import either native or the compat iovecs, and
remove the now superflous compat_import_iovec.
This removes the need for special compat logic in most callers, and
the remaining ones can still be simplified by using __import_iovec
with a bool compat parameter.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/scsi_ioctl.c     | 12 ++----------
 drivers/scsi/sg.c      |  9 +--------
 fs/aio.c               |  8 ++------
 fs/io_uring.c          | 20 ++++++++------------
 fs/read_write.c        |  6 ++++--
 fs/splice.c            |  2 +-
 include/linux/uio.h    |  8 --------
 lib/iov_iter.c         | 14 ++------------
 mm/process_vm_access.c |  3 ++-
 net/compat.c           |  4 ++--
 security/keys/compat.c |  5 ++---
 11 files changed, 26 insertions(+), 65 deletions(-)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ef722f04f88a93..e08df86866ee5d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -333,16 +333,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		struct iov_iter i;
 		struct iovec *iov = NULL;
 
-#ifdef CONFIG_COMPAT
-		if (in_compat_syscall())
-			ret = compat_import_iovec(rq_data_dir(rq),
-				   hdr->dxferp, hdr->iovec_count,
-				   0, &iov, &i);
-		else
-#endif
-			ret = import_iovec(rq_data_dir(rq),
-				   hdr->dxferp, hdr->iovec_count,
-				   0, &iov, &i);
+		ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
+				   hdr->iovec_count, 0, &iov, &i);
 		if (ret < 0)
 			goto out_free_cdb;
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 20472aaaf630a4..bfa8d77322d732 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1820,14 +1820,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 		struct iovec *iov = NULL;
 		struct iov_iter i;
 
-#ifdef CONFIG_COMPAT
-		if (in_compat_syscall())
-			res = compat_import_iovec(rw, hp->dxferp, iov_count,
-						  0, &iov, &i);
-		else
-#endif
-			res = import_iovec(rw, hp->dxferp, iov_count,
-					   0, &iov, &i);
+		res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
 		if (res < 0)
 			return res;
 
diff --git a/fs/aio.c b/fs/aio.c
index d5ec303855669d..c45c20d875388c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1489,12 +1489,8 @@ static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
 		*iovec = NULL;
 		return ret;
 	}
-#ifdef CONFIG_COMPAT
-	if (compat)
-		return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
-				iter);
-#endif
-	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
+
+	return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat);
 }
 
 static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3790c7fe9fee22..ba84ecea7cb1a4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2837,13 +2837,8 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
 		return ret;
 	}
 
-#ifdef CONFIG_COMPAT
-	if (req->ctx->compat)
-		return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
-						iovec, iter);
-#endif
-
-	return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
+	return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
+			      req->ctx->compat);
 }
 
 static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
@@ -4179,8 +4174,9 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
 				sr->len);
 		iomsg->iov = NULL;
 	} else {
-		ret = import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
-					&iomsg->iov, &iomsg->msg.msg_iter);
+		ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+				     &iomsg->iov, &iomsg->msg.msg_iter,
+				     false);
 		if (ret > 0)
 			ret = 0;
 	}
@@ -4220,9 +4216,9 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
 		sr->len = iomsg->iov[0].iov_len;
 		iomsg->iov = NULL;
 	} else {
-		ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
-						&iomsg->iov,
-						&iomsg->msg.msg_iter);
+		ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
+				   UIO_FASTIOV, &iomsg->iov,
+				   &iomsg->msg.msg_iter, true);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/fs/read_write.c b/fs/read_write.c
index e5e891a88442ef..0a68037580b455 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1078,7 +1078,8 @@ static size_t compat_readv(struct file *file,
 	struct iov_iter iter;
 	ssize_t ret;
 
-	ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
+	ret = import_iovec(READ, (const struct iovec __user *)vec, vlen,
+			   UIO_FASTIOV, &iov, &iter);
 	if (ret >= 0) {
 		ret = do_iter_read(file, &iter, pos, flags);
 		kfree(iov);
@@ -1186,7 +1187,8 @@ static size_t compat_writev(struct file *file,
 	struct iov_iter iter;
 	ssize_t ret;
 
-	ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
+	ret = import_iovec(WRITE, (const struct iovec __user *)vec, vlen,
+			   UIO_FASTIOV, &iov, &iter);
 	if (ret >= 0) {
 		file_start_write(file);
 		ret = do_iter_write(file, &iter, pos, flags);
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c4db07ff..132d42b9871f9b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1387,7 +1387,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
 	if (error)
 		return error;
 
-	error = compat_import_iovec(type, iov32, nr_segs,
+	error = import_iovec(type, (struct iovec __user *)iov32, nr_segs,
 			     ARRAY_SIZE(iovstack), &iov, &iter);
 	if (error >= 0) {
 		error = do_vmsplice(f.file, &iter, flags);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 92c11fe41c6228..daedc61ad3706e 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -275,14 +275,6 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec,
 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
 		 struct iov_iter *i, bool compat);
-
-#ifdef CONFIG_COMPAT
-struct compat_iovec;
-ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector,
-		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i);
-#endif
-
 int import_single_range(int type, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index d5d8afe31fca16..8c51e1b03814a3 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1806,21 +1806,11 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
 		 struct iovec **iovp, struct iov_iter *i)
 {
-	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false);
+	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
+			      in_compat_syscall());
 }
 EXPORT_SYMBOL(import_iovec);
 
-#ifdef CONFIG_COMPAT
-ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec,
-		unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
-		struct iov_iter *i)
-{
-	return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs,
-			     fast_segs, iovp, i, true);
-}
-EXPORT_SYMBOL(compat_import_iovec);
-#endif
-
 int import_single_range(int rw, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i)
 {
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 5e728c20c2bead..3f2156aab44263 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -326,7 +326,8 @@ compat_process_vm_rw(compat_pid_t pid,
 	if (flags != 0)
 		return -EINVAL;
 
-	rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+	rc = import_iovec(dir, (const struct iovec __user *)lvec, liovcnt,
+			  UIO_FASTIOV, &iov_l, &iter);
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
diff --git a/net/compat.c b/net/compat.c
index 95ce707a30a31d..ddd15af3a2837b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -98,8 +98,8 @@ int get_compat_msghdr(struct msghdr *kmsg,
 	if (err)
 		return err;
 
-	err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
-				   len, UIO_FASTIOV, iov, &kmsg->msg_iter);
+	err = import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr), len,
+			   UIO_FASTIOV, iov, &kmsg->msg_iter);
 	return err < 0 ? err : 0;
 }
 
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 6ee9d8f6a4a5bb..7ae531db031cf8 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -33,9 +33,8 @@ static long compat_keyctl_instantiate_key_iov(
 	if (!_payload_iov)
 		ioc = 0;
 
-	ret = compat_import_iovec(WRITE, _payload_iov, ioc,
-				  ARRAY_SIZE(iovstack), &iov,
-				  &from);
+	ret = import_iovec(WRITE, (const struct iovec __user *)_payload_iov,
+			   ioc, ARRAY_SIZE(iovstack), &iov, &from);
 	if (ret < 0)
 		return ret;
 
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
                   ` (3 preceding siblings ...)
  2020-09-23  6:05 ` [PATCH 4/9] iov_iter: transparently handle compat iovecs in import_iovec Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23 14:25   ` Al Viro
  2020-09-23  6:05 ` [PATCH 6/9] fs: remove the compat readv/writev syscalls Christoph Hellwig
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Now that import_iovec handles compat iovecs as well, all the duplicated
code in the compat readv/writev helpers is not needed.  Remove them
and switch the compat syscall handlers to use the native helpers.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/read_write.c | 179 ++++++++----------------------------------------
 1 file changed, 30 insertions(+), 149 deletions(-)
diff --git a/fs/read_write.c b/fs/read_write.c
index 0a68037580b455..eab427b7cc0a3f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1068,226 +1068,107 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
 	return do_pwritev(fd, vec, vlen, pos, flags);
 }
 
+/*
+ * Various compat syscalls.  Note that they all pretend to take a native
+ * iovec - import_iovec will properly treat those as compat_iovecs based on
+ * in_compat_syscall().
+ */
 #ifdef CONFIG_COMPAT
-static size_t compat_readv(struct file *file,
-			   const struct compat_iovec __user *vec,
-			   unsigned long vlen, loff_t *pos, rwf_t flags)
-{
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t ret;
-
-	ret = import_iovec(READ, (const struct iovec __user *)vec, vlen,
-			   UIO_FASTIOV, &iov, &iter);
-	if (ret >= 0) {
-		ret = do_iter_read(file, &iter, pos, flags);
-		kfree(iov);
-	}
-	if (ret > 0)
-		add_rchar(current, ret);
-	inc_syscr(current);
-	return ret;
-}
-
-static size_t do_compat_readv(compat_ulong_t fd,
-				 const struct compat_iovec __user *vec,
-				 compat_ulong_t vlen, rwf_t flags)
-{
-	struct fd f = fdget_pos(fd);
-	ssize_t ret;
-	loff_t pos;
-
-	if (!f.file)
-		return -EBADF;
-	pos = f.file->f_pos;
-	ret = compat_readv(f.file, vec, vlen, &pos, flags);
-	if (ret >= 0)
-		f.file->f_pos = pos;
-	fdput_pos(f);
-	return ret;
-
-}
-
 COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		compat_ulong_t, vlen)
 {
-	return do_compat_readv(fd, vec, vlen, 0);
-}
-
-static long do_compat_preadv64(unsigned long fd,
-				  const struct compat_iovec __user *vec,
-				  unsigned long vlen, loff_t pos, rwf_t flags)
-{
-	struct fd f;
-	ssize_t ret;
-
-	if (pos < 0)
-		return -EINVAL;
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
-	ret = -ESPIPE;
-	if (f.file->f_mode & FMODE_PREAD)
-		ret = compat_readv(f.file, vec, vlen, &pos, flags);
-	fdput(f);
-	return ret;
+	return do_readv(fd, vec, vlen, 0);
 }
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		unsigned long, vlen, loff_t, pos)
 {
-	return do_compat_preadv64(fd, vec, vlen, pos, 0);
+	return do_preadv(fd, vec, vlen, pos, 0);
 }
 #endif
 
 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-	return do_compat_preadv64(fd, vec, vlen, pos, 0);
+	return do_preadv(fd, vec, vlen, pos, 0);
 }
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		unsigned long, vlen, loff_t, pos, rwf_t, flags)
 {
 	if (pos == -1)
-		return do_compat_readv(fd, vec, vlen, flags);
-
-	return do_compat_preadv64(fd, vec, vlen, pos, flags);
+		return do_readv(fd, vec, vlen, flags);
+	return do_preadv(fd, vec, vlen, pos, flags);
 }
 #endif
 
 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
 		rwf_t, flags)
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
 	if (pos == -1)
-		return do_compat_readv(fd, vec, vlen, flags);
-
-	return do_compat_preadv64(fd, vec, vlen, pos, flags);
-}
-
-static size_t compat_writev(struct file *file,
-			    const struct compat_iovec __user *vec,
-			    unsigned long vlen, loff_t *pos, rwf_t flags)
-{
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t ret;
-
-	ret = import_iovec(WRITE, (const struct iovec __user *)vec, vlen,
-			   UIO_FASTIOV, &iov, &iter);
-	if (ret >= 0) {
-		file_start_write(file);
-		ret = do_iter_write(file, &iter, pos, flags);
-		file_end_write(file);
-		kfree(iov);
-	}
-	if (ret > 0)
-		add_wchar(current, ret);
-	inc_syscw(current);
-	return ret;
-}
-
-static size_t do_compat_writev(compat_ulong_t fd,
-				  const struct compat_iovec __user* vec,
-				  compat_ulong_t vlen, rwf_t flags)
-{
-	struct fd f = fdget_pos(fd);
-	ssize_t ret;
-	loff_t pos;
-
-	if (!f.file)
-		return -EBADF;
-	pos = f.file->f_pos;
-	ret = compat_writev(f.file, vec, vlen, &pos, flags);
-	if (ret >= 0)
-		f.file->f_pos = pos;
-	fdput_pos(f);
-	return ret;
+		return do_readv(fd, vec, vlen, flags);
+	return do_preadv(fd, vec, vlen, pos, flags);
 }
 
 COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
-		const struct compat_iovec __user *, vec,
+		const struct iovec __user *, vec,
 		compat_ulong_t, vlen)
 {
-	return do_compat_writev(fd, vec, vlen, 0);
-}
-
-static long do_compat_pwritev64(unsigned long fd,
-				   const struct compat_iovec __user *vec,
-				   unsigned long vlen, loff_t pos, rwf_t flags)
-{
-	struct fd f;
-	ssize_t ret;
-
-	if (pos < 0)
-		return -EINVAL;
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
-	ret = -ESPIPE;
-	if (f.file->f_mode & FMODE_PWRITE)
-		ret = compat_writev(f.file, vec, vlen, &pos, flags);
-	fdput(f);
-	return ret;
+	return do_writev(fd, vec, vlen, 0);
 }
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		unsigned long, vlen, loff_t, pos)
 {
-	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+	return do_pwritev(fd, vec, vlen, pos, 0);
 }
 #endif
 
 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *,vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
-	return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+	return do_pwritev(fd, vec, vlen, pos, 0);
 }
 
 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *, vec,
 		unsigned long, vlen, loff_t, pos, rwf_t, flags)
 {
 	if (pos == -1)
-		return do_compat_writev(fd, vec, vlen, flags);
-
-	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+		return do_writev(fd, vec, vlen, flags);
+	return do_pwritev(fd, vec, vlen, pos, flags);
 }
 #endif
 
 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
-		const struct compat_iovec __user *,vec,
+		const struct iovec __user *,vec,
 		compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
 {
 	loff_t pos = ((loff_t)pos_high << 32) | pos_low;
 
 	if (pos == -1)
-		return do_compat_writev(fd, vec, vlen, flags);
-
-	return do_compat_pwritev64(fd, vec, vlen, pos, flags);
+		return do_writev(fd, vec, vlen, flags);
+	return do_pwritev(fd, vec, vlen, pos, flags);
 }
-
-#endif
+#endif /* CONFIG_COMPAT */
 
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 		  	   size_t count, loff_t max)
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23  6:05 ` [PATCH 5/9] fs: remove various compat readv/writev helpers Christoph Hellwig
@ 2020-09-23 14:25   ` Al Viro
  2020-09-23 14:32     ` Christoph Hellwig
  0 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2020-09-23 14:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 08:05:43AM +0200, Christoph Hellwig wrote:
>  COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> -		const struct compat_iovec __user *,vec,
> +		const struct iovec __user *, vec,
Um...  Will it even compile?
>  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
>  COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
> -		const struct compat_iovec __user *,vec,
> +		const struct iovec __user *, vec,
Ditto.  Look into include/linux/compat.h and you'll see
asmlinkage long compat_sys_preadv64(unsigned long fd,
                const struct compat_iovec __user *vec,
                unsigned long vlen, loff_t pos);
How does that manage to avoid the compiler screaming bloody
murder?
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 14:25   ` Al Viro
@ 2020-09-23 14:32     ` Christoph Hellwig
  2020-09-23 14:59       ` Al Viro
  0 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23 14:32 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, Arnd Bergmann,
	David Howells, David Laight, linux-arm-kernel, linux-kernel,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-block, linux-scsi, linux-fsdevel, linux-aio, io-uring,
	linux-arch, linux-mm, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 03:25:49PM +0100, Al Viro wrote:
> On Wed, Sep 23, 2020 at 08:05:43AM +0200, Christoph Hellwig wrote:
> >  COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> > -		const struct compat_iovec __user *,vec,
> > +		const struct iovec __user *, vec,
> 
> Um...  Will it even compile?
> 
> >  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
> >  COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
> > -		const struct compat_iovec __user *,vec,
> > +		const struct iovec __user *, vec,
> 
> Ditto.  Look into include/linux/compat.h and you'll see
> 
> asmlinkage long compat_sys_preadv64(unsigned long fd,
>                 const struct compat_iovec __user *vec,
>                 unsigned long vlen, loff_t pos);
> 
> How does that manage to avoid the compiler screaming bloody
> murder?
That's a very good question.  But it does not just compile but actually
works.  Probably because all the syscall wrappers mean that we don't
actually generate the normal names.  I just tried this:
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset,
 asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
 asmlinkage long sys_write(unsigned int fd, const char __user *buf,
                            size_t count);
-asmlinkage long sys_readv(unsigned long fd,
+asmlinkage long sys_readv(void *fd,
for fun, and the compiler doesn't care either..
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 14:32     ` Christoph Hellwig
@ 2020-09-23 14:59       ` Al Viro
  2020-09-23 16:38         ` Al Viro
  0 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2020-09-23 14:59 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 04:32:51PM +0200, Christoph Hellwig wrote:
> On Wed, Sep 23, 2020 at 03:25:49PM +0100, Al Viro wrote:
> > On Wed, Sep 23, 2020 at 08:05:43AM +0200, Christoph Hellwig wrote:
> > >  COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
> > > -		const struct compat_iovec __user *,vec,
> > > +		const struct iovec __user *, vec,
> > 
> > Um...  Will it even compile?
> > 
> > >  #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
> > >  COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
> > > -		const struct compat_iovec __user *,vec,
> > > +		const struct iovec __user *, vec,
> > 
> > Ditto.  Look into include/linux/compat.h and you'll see
> > 
> > asmlinkage long compat_sys_preadv64(unsigned long fd,
> >                 const struct compat_iovec __user *vec,
> >                 unsigned long vlen, loff_t pos);
> > 
> > How does that manage to avoid the compiler screaming bloody
> > murder?
> 
> That's a very good question.  But it does not just compile but actually
> works.  Probably because all the syscall wrappers mean that we don't
> actually generate the normal names.  I just tried this:
> 
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset,
>  asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
>  asmlinkage long sys_write(unsigned int fd, const char __user *buf,
>                             size_t count);
> -asmlinkage long sys_readv(unsigned long fd,
> +asmlinkage long sys_readv(void *fd,
> 
> for fun, and the compiler doesn't care either..
Try to build it for sparc or ppc...
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 14:59       ` Al Viro
@ 2020-09-23 16:38         ` Al Viro
  2020-09-23 17:05           ` Al Viro
                             ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Al Viro @ 2020-09-23 16:38 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 03:59:01PM +0100, Al Viro wrote:
> > That's a very good question.  But it does not just compile but actually
> > works.  Probably because all the syscall wrappers mean that we don't
> > actually generate the normal names.  I just tried this:
> > 
> > --- a/include/linux/syscalls.h
> > +++ b/include/linux/syscalls.h
> > @@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset,
> >  asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
> >  asmlinkage long sys_write(unsigned int fd, const char __user *buf,
> >                             size_t count);
> > -asmlinkage long sys_readv(unsigned long fd,
> > +asmlinkage long sys_readv(void *fd,
> > 
> > for fun, and the compiler doesn't care either..
> 
> Try to build it for sparc or ppc...
FWIW, declarations in syscalls.h used to serve 4 purposes:
	1) syscall table initializers needed symbols declared
	2) direct calls needed the same
	3) catching mismatches between the declarations and definitions
	4) centralized list of all syscalls
(2) has been (thankfully) reduced for some time; in any case, ksys_... is
used for the remaining ones.
(1) and (3) are served by syscalls.h in architectures other than x86, arm64
and s390.  On those 3 (1) is done otherwise (near the syscall table initializer)
and (3) is not done at all.
I wonder if we should do something like
SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
		 unsigned long, vlen);
in syscalls.h instead, and not under that ifdef.
Let it expand to declaration of sys_...() in generic case and, on x86, into
__do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
what SYSCALL_DEFINE ends up using.
Similar macro would cover compat_sys_...() declarations.  That would
restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
be terribly high - cpp would have more to chew through in syscalls.h,
but it shouldn't be all that costly.  Famous last words, of course...
Does anybody see fundamental problems with that?
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 16:38         ` Al Viro
@ 2020-09-23 17:05           ` Al Viro
  2020-09-23 17:46             ` Christoph Hellwig
  2020-09-23 17:08           ` Brian Gerst
  2020-09-23 18:45           ` Arnd Bergmann
  2 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2020-09-23 17:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 05:38:31PM +0100, Al Viro wrote:
> On Wed, Sep 23, 2020 at 03:59:01PM +0100, Al Viro wrote:
> 
> > > That's a very good question.  But it does not just compile but actually
> > > works.  Probably because all the syscall wrappers mean that we don't
> > > actually generate the normal names.  I just tried this:
> > > 
> > > --- a/include/linux/syscalls.h
> > > +++ b/include/linux/syscalls.h
> > > @@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset,
> > >  asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
> > >  asmlinkage long sys_write(unsigned int fd, const char __user *buf,
> > >                             size_t count);
> > > -asmlinkage long sys_readv(unsigned long fd,
> > > +asmlinkage long sys_readv(void *fd,
> > > 
> > > for fun, and the compiler doesn't care either..
> > 
> > Try to build it for sparc or ppc...
> 
> FWIW, declarations in syscalls.h used to serve 4 purposes:
> 	1) syscall table initializers needed symbols declared
> 	2) direct calls needed the same
> 	3) catching mismatches between the declarations and definitions
> 	4) centralized list of all syscalls
> 
> (2) has been (thankfully) reduced for some time; in any case, ksys_... is
> used for the remaining ones.
> 
> (1) and (3) are served by syscalls.h in architectures other than x86, arm64
> and s390.  On those 3 (1) is done otherwise (near the syscall table initializer)
> and (3) is not done at all.
> 
> I wonder if we should do something like
> 
> SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
> 		 unsigned long, vlen);
> in syscalls.h instead, and not under that ifdef.
> 
> Let it expand to declaration of sys_...() in generic case and, on x86, into
> __do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
> what SYSCALL_DEFINE ends up using.
> 
> Similar macro would cover compat_sys_...() declarations.  That would
> restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
> be terribly high - cpp would have more to chew through in syscalls.h,
> but it shouldn't be all that costly.  Famous last words, of course...
> 
> Does anybody see fundamental problems with that?
Just to make it clear - I do not propose to fold that into this series;
there we just need to keep those declarations in sync with fs/read_write.c
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 17:05           ` Al Viro
@ 2020-09-23 17:46             ` Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23 17:46 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, Arnd Bergmann,
	David Howells, David Laight, linux-arm-kernel, linux-kernel,
	linux-mips, linux-parisc, linuxppc-dev, linux-s390, sparclinux,
	linux-block, linux-scsi, linux-fsdevel, linux-aio, io-uring,
	linux-arch, linux-mm, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 06:05:27PM +0100, Al Viro wrote:
> On Wed, Sep 23, 2020 at 05:38:31PM +0100, Al Viro wrote:
> > On Wed, Sep 23, 2020 at 03:59:01PM +0100, Al Viro wrote:
> > 
> > > > That's a very good question.  But it does not just compile but actually
> > > > works.  Probably because all the syscall wrappers mean that we don't
> > > > actually generate the normal names.  I just tried this:
> > > > 
> > > > --- a/include/linux/syscalls.h
> > > > +++ b/include/linux/syscalls.h
> > > > @@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset,
> > > >  asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
> > > >  asmlinkage long sys_write(unsigned int fd, const char __user *buf,
> > > >                             size_t count);
> > > > -asmlinkage long sys_readv(unsigned long fd,
> > > > +asmlinkage long sys_readv(void *fd,
> > > > 
> > > > for fun, and the compiler doesn't care either..
> > > 
> > > Try to build it for sparc or ppc...
> > 
> > FWIW, declarations in syscalls.h used to serve 4 purposes:
> > 	1) syscall table initializers needed symbols declared
> > 	2) direct calls needed the same
> > 	3) catching mismatches between the declarations and definitions
> > 	4) centralized list of all syscalls
> > 
> > (2) has been (thankfully) reduced for some time; in any case, ksys_... is
> > used for the remaining ones.
> > 
> > (1) and (3) are served by syscalls.h in architectures other than x86, arm64
> > and s390.  On those 3 (1) is done otherwise (near the syscall table initializer)
> > and (3) is not done at all.
> > 
> > I wonder if we should do something like
> > 
> > SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
> > 		 unsigned long, vlen);
> > in syscalls.h instead, and not under that ifdef.
> > 
> > Let it expand to declaration of sys_...() in generic case and, on x86, into
> > __do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
> > what SYSCALL_DEFINE ends up using.
> > 
> > Similar macro would cover compat_sys_...() declarations.  That would
> > restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
> > be terribly high - cpp would have more to chew through in syscalls.h,
> > but it shouldn't be all that costly.  Famous last words, of course...
> > 
> > Does anybody see fundamental problems with that?
> 
> Just to make it clear - I do not propose to fold that into this series;
> there we just need to keep those declarations in sync with fs/read_write.c
Agreed.  The above idea generally sounds sane to me.
^ permalink raw reply	[flat|nested] 25+ messages in thread 
 
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 16:38         ` Al Viro
  2020-09-23 17:05           ` Al Viro
@ 2020-09-23 17:08           ` Brian Gerst
  2020-09-23 18:45           ` Arnd Bergmann
  2 siblings, 0 replies; 25+ messages in thread
From: Brian Gerst @ 2020-09-23 17:08 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, Arnd Bergmann,
	David Howells, David Laight, Linux ARM, Linux Kernel Mailing List,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, linux-block, linux-scsi,
	Linux FS-devel Mailing List, linux-aio, io-uring, linux-arch,
	Linux-MM, netdev, keyrings, linux-security-module
On Wed, Sep 23, 2020 at 12:39 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Wed, Sep 23, 2020 at 03:59:01PM +0100, Al Viro wrote:
>
> > > That's a very good question.  But it does not just compile but actually
> > > works.  Probably because all the syscall wrappers mean that we don't
> > > actually generate the normal names.  I just tried this:
> > >
> > > --- a/include/linux/syscalls.h
> > > +++ b/include/linux/syscalls.h
> > > @@ -468,7 +468,7 @@ asmlinkage long sys_lseek(unsigned int fd, off_t offset,
> > >  asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count);
> > >  asmlinkage long sys_write(unsigned int fd, const char __user *buf,
> > >                             size_t count);
> > > -asmlinkage long sys_readv(unsigned long fd,
> > > +asmlinkage long sys_readv(void *fd,
> > >
> > > for fun, and the compiler doesn't care either..
> >
> > Try to build it for sparc or ppc...
>
> FWIW, declarations in syscalls.h used to serve 4 purposes:
>         1) syscall table initializers needed symbols declared
>         2) direct calls needed the same
>         3) catching mismatches between the declarations and definitions
>         4) centralized list of all syscalls
>
> (2) has been (thankfully) reduced for some time; in any case, ksys_... is
> used for the remaining ones.
>
> (1) and (3) are served by syscalls.h in architectures other than x86, arm64
> and s390.  On those 3 (1) is done otherwise (near the syscall table initializer)
> and (3) is not done at all.
>
> I wonder if we should do something like
>
> SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
>                  unsigned long, vlen);
> in syscalls.h instead, and not under that ifdef.
>
> Let it expand to declaration of sys_...() in generic case and, on x86, into
> __do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
> what SYSCALL_DEFINE ends up using.
>
> Similar macro would cover compat_sys_...() declarations.  That would
> restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
> be terribly high - cpp would have more to chew through in syscalls.h,
> but it shouldn't be all that costly.  Famous last words, of course...
>
> Does anybody see fundamental problems with that?
I think this would be a good idea.  I have been working on a patchset
to clean up the conditional syscall handling (sys_ni.c), and conflicts
with the prototypes in syscalls.h have been getting in the way.
Having the prototypes use SYSCALL_DECLAREx(...) would solve that
issue.
--
Brian Gerst
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 16:38         ` Al Viro
  2020-09-23 17:05           ` Al Viro
  2020-09-23 17:08           ` Brian Gerst
@ 2020-09-23 18:45           ` Arnd Bergmann
  2020-09-23 19:47             ` Al Viro
  2020-09-23 21:30             ` David Laight
  2 siblings, 2 replies; 25+ messages in thread
From: Arnd Bergmann @ 2020-09-23 18:45 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, David Howells,
	David Laight, Linux ARM, linux-kernel@vger.kernel.org,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, linux-block, linux-scsi,
	Linux FS-devel Mailing List, linux-aio, io-uring, linux-arch,
	Linux-MM, Networking, keyrings, LSM List
On Wed, Sep 23, 2020 at 6:38 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> I wonder if we should do something like
>
> SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
>                  unsigned long, vlen);
> in syscalls.h instead, and not under that ifdef.
>
> Let it expand to declaration of sys_...() in generic case and, on x86, into
> __do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
> what SYSCALL_DEFINE ends up using.
>
> Similar macro would cover compat_sys_...() declarations.  That would
> restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
> be terribly high - cpp would have more to chew through in syscalls.h,
> but it shouldn't be all that costly.  Famous last words, of course...
>
> Does anybody see fundamental problems with that?
I've had some ideas along those lines in the past and I think it should work.
As a variation of this, the SYSCALL_DEFINEx() macros could go away
entirely, leaving only the macro instantiations from the header to
require that syntax. It would require first changing the remaining
architectures to build the syscall table from C code instead of
assembler though.
Regardless of that, another advantage of having the SYSCALL_DECLAREx()
would be the ability to include that header file from elsewhere with a different
macro definition to create a machine-readable version of the interface when
combined with the syscall.tbl files. This could be used to create a user
space stub for calling into the low-level syscall regardless of the
libc interfaces,
or for synchronizing the interfaces with strace, qemu-user, or anything that
needs to deal with the low-level interface.
      Arnd
^ permalink raw reply	[flat|nested] 25+ messages in thread
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 18:45           ` Arnd Bergmann
@ 2020-09-23 19:47             ` Al Viro
  2020-09-23 19:52               ` Arnd Bergmann
  2020-09-23 21:30             ` David Laight
  1 sibling, 1 reply; 25+ messages in thread
From: Al Viro @ 2020-09-23 19:47 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, David Howells,
	David Laight, Linux ARM, linux-kernel@vger.kernel.org,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, linux-block, linux-scsi,
	Linux FS-devel Mailing List, linux-aio, io-uring, linux-arch,
	Linux-MM, Networking, keyrings, LSM List
On Wed, Sep 23, 2020 at 08:45:51PM +0200, Arnd Bergmann wrote:
> On Wed, Sep 23, 2020 at 6:38 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > I wonder if we should do something like
> >
> > SYSCALL_DECLARE3(readv, unsigned long, fd, const struct iovec __user *, vec,
> >                  unsigned long, vlen);
> > in syscalls.h instead, and not under that ifdef.
> >
> > Let it expand to declaration of sys_...() in generic case and, on x86, into
> > __do_sys_...() and __ia32_sys_...()/__x64_sys_...(), with types matching
> > what SYSCALL_DEFINE ends up using.
> >
> > Similar macro would cover compat_sys_...() declarations.  That would
> > restore mismatch checking for x86 and friends.  AFAICS, the cost wouldn't
> > be terribly high - cpp would have more to chew through in syscalls.h,
> > but it shouldn't be all that costly.  Famous last words, of course...
> >
> > Does anybody see fundamental problems with that?
> 
> I've had some ideas along those lines in the past and I think it should work.
> 
> As a variation of this, the SYSCALL_DEFINEx() macros could go away
> entirely, leaving only the macro instantiations from the header to
> require that syntax. It would require first changing the remaining
> architectures to build the syscall table from C code instead of
> assembler though.
> 
> Regardless of that, another advantage of having the SYSCALL_DECLAREx()
> would be the ability to include that header file from elsewhere with a different
> macro definition to create a machine-readable version of the interface when
> combined with the syscall.tbl files. This could be used to create a user
> space stub for calling into the low-level syscall regardless of the
> libc interfaces,
> or for synchronizing the interfaces with strace, qemu-user, or anything that
> needs to deal with the low-level interface.
FWIW, after playing with that for a while...  Do we really want the
compat_sys_...() declarations to live in linux/compat.h?  Most of
the users of that file don't want those; why not move them to
linux/syscalls.h?
Reason: there's a lot more users of linux/compat.h than those of
linux/syscalls.h - it's pulled by everything in the networking stack,
for starters...
^ permalink raw reply	[flat|nested] 25+ messages in thread 
- * Re: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 19:47             ` Al Viro
@ 2020-09-23 19:52               ` Arnd Bergmann
  0 siblings, 0 replies; 25+ messages in thread
From: Arnd Bergmann @ 2020-09-23 19:52 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, David Howells,
	David Laight, Linux ARM, linux-kernel@vger.kernel.org,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, linux-block, linux-scsi,
	Linux FS-devel Mailing List, linux-aio, io-uring, linux-arch,
	Linux-MM, Networking, keyrings, LSM List
On Wed, Sep 23, 2020 at 9:48 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
> FWIW, after playing with that for a while...  Do we really want the
> compat_sys_...() declarations to live in linux/compat.h?  Most of
> the users of that file don't want those; why not move them to
> linux/syscalls.h?
Sure, let's do that. The trend overall is to integrate the compat stuff
more closely into where the native implementation lives, so this
would just follow that trend.
I think with Christoph's latest patches, about half of them are
going away as well.
> Reason: there's a lot more users of linux/compat.h than those of
> linux/syscalls.h - it's pulled by everything in the networking stack,
> for starters...
Right, the network headers pull in almost everything else through
multiple indirect inclusions, anything we can do to reduce that
helps.
     Arnd
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
- * RE: [PATCH 5/9] fs: remove various compat readv/writev helpers
  2020-09-23 18:45           ` Arnd Bergmann
  2020-09-23 19:47             ` Al Viro
@ 2020-09-23 21:30             ` David Laight
  1 sibling, 0 replies; 25+ messages in thread
From: David Laight @ 2020-09-23 21:30 UTC (permalink / raw)
  To: 'Arnd Bergmann', Al Viro
  Cc: Christoph Hellwig, Andrew Morton, Jens Axboe, David Howells,
	Linux ARM, linux-kernel@vger.kernel.org,
	open list:BROADCOM NVRAM DRIVER, Parisc List, linuxppc-dev,
	linux-s390, sparclinux, linux-block, linux-scsi,
	Linux FS-devel Mailing List, linux-aio, io-uring@vger.kernel.org,
	linux-arch, Linux-MM, Networking, keyrings@vger.kernel.org,
	LSM List
From: Arnd Bergmann
> Sent: 23 September 2020 19:46
...
> Regardless of that, another advantage of having the SYSCALL_DECLAREx()
> would be the ability to include that header file from elsewhere with a different
> macro definition to create a machine-readable version of the interface when
> combined with the syscall.tbl files. This could be used to create a user
> space stub for calling into the low-level syscall regardless of the
> libc interfaces,
> or for synchronizing the interfaces with strace, qemu-user, or anything that
> needs to deal with the low-level interface.
A similar 'trick' (that probably won't work here) is to pass
the name of a #define function as a parameter to another define.
Useful for defining constants and error strings together. eg:
#define TRAFFIC_LIGHTS(x) \
	x(RED, 0, "Red") \
	x(YELLOW, 1, "Yellow) \
	x(GREEN, 2, "GREEN)
You can then do thing like:
#define x(token, value, string) token = value,
enum {TRAFFIC_LIGHTS(x) NUM_LIGHTS};
#undef x
#define x(token, value, string) [value] = string,
const char *colours[] = {TRAFFIC_LIGHTS(x)};
#undef x
to initialise constants and a name table that are always in sync.
It is also a good way to generate source lines that are over 1MB.
	David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply	[flat|nested] 25+ messages in thread
 
 
 
 
 
 
- * [PATCH 6/9] fs: remove the compat readv/writev syscalls
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
                   ` (4 preceding siblings ...)
  2020-09-23  6:05 ` [PATCH 5/9] fs: remove various compat readv/writev helpers Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 7/9] fs: remove compat_sys_vmsplice Christoph Hellwig
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Now that import_iovec handles compat iovecs, the native readv and writev
syscalls can be used for the compat case as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/unistd32.h                  |  4 ++--
 arch/mips/kernel/syscalls/syscall_n32.tbl          |  4 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl          |  4 ++--
 arch/parisc/kernel/syscalls/syscall.tbl            |  4 ++--
 arch/powerpc/kernel/syscalls/syscall.tbl           |  4 ++--
 arch/s390/kernel/syscalls/syscall.tbl              |  4 ++--
 arch/sparc/kernel/syscalls/syscall.tbl             |  4 ++--
 arch/x86/entry/syscall_x32.c                       |  2 ++
 arch/x86/entry/syscalls/syscall_32.tbl             |  4 ++--
 arch/x86/entry/syscalls/syscall_64.tbl             |  4 ++--
 fs/read_write.c                                    | 14 --------------
 include/linux/compat.h                             |  4 ----
 include/uapi/asm-generic/unistd.h                  |  4 ++--
 tools/include/uapi/asm-generic/unistd.h            |  4 ++--
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |  4 ++--
 tools/perf/arch/s390/entry/syscalls/syscall.tbl    |  4 ++--
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |  4 ++--
 17 files changed, 30 insertions(+), 46 deletions(-)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 734860ac7cf9d5..4a236493dca5b9 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -301,9 +301,9 @@ __SYSCALL(__NR_flock, sys_flock)
 #define __NR_msync 144
 __SYSCALL(__NR_msync, sys_msync)
 #define __NR_readv 145
-__SYSCALL(__NR_readv, compat_sys_readv)
+__SYSCALL(__NR_readv, sys_readv)
 #define __NR_writev 146
-__SYSCALL(__NR_writev, compat_sys_writev)
+__SYSCALL(__NR_writev, sys_writev)
 #define __NR_getsid 147
 __SYSCALL(__NR_getsid, sys_getsid)
 #define __NR_fdatasync 148
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f9df9edb67a407..c99a92646f8ee9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -25,8 +25,8 @@
 15	n32	ioctl				compat_sys_ioctl
 16	n32	pread64				sys_pread64
 17	n32	pwrite64			sys_pwrite64
-18	n32	readv				compat_sys_readv
-19	n32	writev				compat_sys_writev
+18	n32	readv				sys_readv
+19	n32	writev				sys_writev
 20	n32	access				sys_access
 21	n32	pipe				sysm_pipe
 22	n32	_newselect			compat_sys_select
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 195b43cf27c848..075064d10661bf 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -156,8 +156,8 @@
 142	o32	_newselect			sys_select			compat_sys_select
 143	o32	flock				sys_flock
 144	o32	msync				sys_msync
-145	o32	readv				sys_readv			compat_sys_readv
-146	o32	writev				sys_writev			compat_sys_writev
+145	o32	readv				sys_readv
+146	o32	writev				sys_writev
 147	o32	cacheflush			sys_cacheflush
 148	o32	cachectl			sys_cachectl
 149	o32	sysmips				__sys_sysmips
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index def64d221cd4fb..192abde0001d9d 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -159,8 +159,8 @@
 142	common	_newselect		sys_select			compat_sys_select
 143	common	flock			sys_flock
 144	common	msync			sys_msync
-145	common	readv			sys_readv			compat_sys_readv
-146	common	writev			sys_writev			compat_sys_writev
+145	common	readv			sys_readv
+146	common	writev			sys_writev
 147	common	getsid			sys_getsid
 148	common	fdatasync		sys_fdatasync
 149	common	_sysctl			sys_ni_syscall
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index c2d737ff2e7bec..6f1e2ecf0edad9 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -193,8 +193,8 @@
 142	common	_newselect			sys_select			compat_sys_select
 143	common	flock				sys_flock
 144	common	msync				sys_msync
-145	common	readv				sys_readv			compat_sys_readv
-146	common	writev				sys_writev			compat_sys_writev
+145	common	readv				sys_readv
+146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
 149	nospu	_sysctl				sys_ni_syscall
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 10456bc936fb09..6101cf2e004cb4 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -134,8 +134,8 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
 149  common	_sysctl			-				-
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 4af114e84f2022..a87ddb282ab16f 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -149,8 +149,8 @@
 117	common	getrusage		sys_getrusage			compat_sys_getrusage
 118	common	getsockopt		sys_getsockopt			sys_getsockopt
 119	common	getcwd			sys_getcwd
-120	common	readv			sys_readv			compat_sys_readv
-121	common	writev			sys_writev			compat_sys_writev
+120	common	readv			sys_readv
+121	common	writev			sys_writev
 122	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
 123	32	fchown			sys_fchown16
 123	64	fchown			sys_fchown
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 1583831f61a9df..aa321444a41f63 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -12,6 +12,8 @@
  * Reuse the 64-bit entry points for the x32 versions that occupy different
  * slots in the syscall table.
  */
+#define __x32_sys_readv		__x64_sys_readv
+#define __x32_sys_writev	__x64_sys_writev
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9d11028736661b..54ab4beb517f25 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -156,8 +156,8 @@
 142	i386	_newselect		sys_select			compat_sys_select
 143	i386	flock			sys_flock
 144	i386	msync			sys_msync
-145	i386	readv			sys_readv			compat_sys_readv
-146	i386	writev			sys_writev			compat_sys_writev
+145	i386	readv			sys_readv
+146	i386	writev			sys_writev
 147	i386	getsid			sys_getsid
 148	i386	fdatasync		sys_fdatasync
 149	i386	_sysctl			sys_ni_syscall
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a6883c..b1e59957c5c51c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
+515	x32	readv			sys_readv
+516	x32	writev			sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
diff --git a/fs/read_write.c b/fs/read_write.c
index eab427b7cc0a3f..6c13f744c34a38 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1074,13 +1074,6 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
  * in_compat_syscall().
  */
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
-		const struct iovec __user *, vec,
-		compat_ulong_t, vlen)
-{
-	return do_readv(fd, vec, vlen, 0);
-}
-
 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
 		const struct iovec __user *, vec,
@@ -1122,13 +1115,6 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
 	return do_preadv(fd, vec, vlen, pos, flags);
 }
 
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
-		const struct iovec __user *, vec,
-		compat_ulong_t, vlen)
-{
-	return do_writev(fd, vec, vlen, 0);
-}
-
 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
 		const struct iovec __user *, vec,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index b930de791ff16b..fa39b7a5488d70 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -545,10 +545,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
 
 /* fs/read_write.c */
 asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
-asmlinkage ssize_t compat_sys_readv(compat_ulong_t fd,
-		const struct compat_iovec __user *vec, compat_ulong_t vlen);
-asmlinkage ssize_t compat_sys_writev(compat_ulong_t fd,
-		const struct compat_iovec __user *vec, compat_ulong_t vlen);
 /* No generic prototype for pread64 and pwrite64 */
 asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
 		const struct compat_iovec __user *vec,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d8a..211c9eacbda6eb 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
 #define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
 #define __NR_pread64 67
 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 995b36c2ea7d8a..211c9eacbda6eb 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
 #define __NR_write 64
 __SYSCALL(__NR_write, sys_write)
 #define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
 #define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
 #define __NR_pread64 67
 __SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
 #define __NR_pwrite64 68
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3ca6fe057a0b1f..46be68029587f9 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -189,8 +189,8 @@
 142	common	_newselect			sys_select			compat_sys_select
 143	common	flock				sys_flock
 144	common	msync				sys_msync
-145	common	readv				sys_readv			compat_sys_readv
-146	common	writev				sys_writev			compat_sys_writev
+145	common	readv				sys_readv
+146	common	writev				sys_writev
 147	common	getsid				sys_getsid
 148	common	fdatasync			sys_fdatasync
 149	nospu	_sysctl				sys_ni_syscall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 6a0bbea225db0d..fb5e61ce9d5838 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -134,8 +134,8 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			compat_sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv
+146  common	writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
 149  common	_sysctl			-				-
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index f30d6ae9a6883c..b1e59957c5c51c 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -371,8 +371,8 @@
 512	x32	rt_sigaction		compat_sys_rt_sigaction
 513	x32	rt_sigreturn		compat_sys_x32_rt_sigreturn
 514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
+515	x32	readv			sys_readv
+516	x32	writev			sys_writev
 517	x32	recvfrom		compat_sys_recvfrom
 518	x32	sendmsg			compat_sys_sendmsg
 519	x32	recvmsg			compat_sys_recvmsg
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [PATCH 7/9] fs: remove compat_sys_vmsplice
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
                   ` (5 preceding siblings ...)
  2020-09-23  6:05 ` [PATCH 6/9] fs: remove the compat readv/writev syscalls Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 8/9] mm: remove compat_process_vm_{readv,writev} Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 9/9] security/keys: remove compat_keyctl_instantiate_key_iov Christoph Hellwig
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Now that import_iovec handles compat iovecs, the native vmsplice syscall
can be used for the compat case as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/unistd32.h             |  2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl     |  2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl     |  2 +-
 arch/parisc/kernel/syscalls/syscall.tbl       |  2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl      |  2 +-
 arch/s390/kernel/syscalls/syscall.tbl         |  2 +-
 arch/sparc/kernel/syscalls/syscall.tbl        |  2 +-
 arch/x86/entry/syscall_x32.c                  |  1 +
 arch/x86/entry/syscalls/syscall_32.tbl        |  2 +-
 arch/x86/entry/syscalls/syscall_64.tbl        |  2 +-
 fs/splice.c                                   | 57 +++++--------------
 include/linux/compat.h                        |  4 --
 include/uapi/asm-generic/unistd.h             |  2 +-
 tools/include/uapi/asm-generic/unistd.h       |  2 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |  2 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |  2 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl    |  2 +-
 17 files changed, 28 insertions(+), 62 deletions(-)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 4a236493dca5b9..11dfae3a8563bd 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -697,7 +697,7 @@ __SYSCALL(__NR_sync_file_range2, compat_sys_aarch32_sync_file_range2)
 #define __NR_tee 342
 __SYSCALL(__NR_tee, sys_tee)
 #define __NR_vmsplice 343
-__SYSCALL(__NR_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages 344
 __SYSCALL(__NR_move_pages, compat_sys_move_pages)
 #define __NR_getcpu 345
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index c99a92646f8ee9..5a39d4de0ac85b 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -278,7 +278,7 @@
 267	n32	splice				sys_splice
 268	n32	sync_file_range			sys_sync_file_range
 269	n32	tee				sys_tee
-270	n32	vmsplice			compat_sys_vmsplice
+270	n32	vmsplice			sys_vmsplice
 271	n32	move_pages			compat_sys_move_pages
 272	n32	set_robust_list			compat_sys_set_robust_list
 273	n32	get_robust_list			compat_sys_get_robust_list
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 075064d10661bf..136efc6b8c5444 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -318,7 +318,7 @@
 304	o32	splice				sys_splice
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
 306	o32	tee				sys_tee
-307	o32	vmsplice			sys_vmsplice			compat_sys_vmsplice
+307	o32	vmsplice			sys_vmsplice
 308	o32	move_pages			sys_move_pages			compat_sys_move_pages
 309	o32	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
 310	o32	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 192abde0001d9d..a9e184192caedd 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -330,7 +330,7 @@
 292	32	sync_file_range		parisc_sync_file_range
 292	64	sync_file_range		sys_sync_file_range
 293	common	tee			sys_tee
-294	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+294	common	vmsplice		sys_vmsplice
 295	common	move_pages		sys_move_pages			compat_sys_move_pages
 296	common	getcpu			sys_getcpu
 297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 6f1e2ecf0edad9..0d4985919ca34d 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -369,7 +369,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 6101cf2e004cb4..b5495a42814bd1 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index a87ddb282ab16f..f1810c1a35caa5 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -38,7 +38,7 @@
 23	64    	setuid			sys_setuid
 24	32	getuid			sys_getuid16
 24	64   	getuid			sys_getuid
-25	common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+25	common	vmsplice		sys_vmsplice
 26	common	ptrace			sys_ptrace			compat_sys_ptrace
 27	common	alarm			sys_alarm
 28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index aa321444a41f63..a4840b9d50ad14 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -16,6 +16,7 @@
 #define __x32_sys_writev	__x64_sys_writev
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
+#define __x32_sys_vmsplice	__x64_sys_vmsplice
 
 #define __SYSCALL_64(nr, sym)
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 54ab4beb517f25..0fb2f172581e51 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -327,7 +327,7 @@
 313	i386	splice			sys_splice
 314	i386	sync_file_range		sys_ia32_sync_file_range
 315	i386	tee			sys_tee
-316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
+316	i386	vmsplice		sys_vmsplice
 317	i386	move_pages		sys_move_pages			compat_sys_move_pages
 318	i386	getcpu			sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index b1e59957c5c51c..642af919183de4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -388,7 +388,7 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
diff --git a/fs/splice.c b/fs/splice.c
index 132d42b9871f9b..18d84544030b39 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,7 +33,6 @@
 #include <linux/security.h>
 #include <linux/gfp.h>
 #include <linux/socket.h>
-#include <linux/compat.h>
 #include <linux/sched/signal.h>
 
 #include "internal.h"
@@ -1332,20 +1331,6 @@ static int vmsplice_type(struct fd f, int *type)
  * Currently we punt and implement it as a normal copy, see pipe_to_user().
  *
  */
-static long do_vmsplice(struct file *f, struct iov_iter *iter, unsigned int flags)
-{
-	if (unlikely(flags & ~SPLICE_F_ALL))
-		return -EINVAL;
-
-	if (!iov_iter_count(iter))
-		return 0;
-
-	if (iov_iter_rw(iter) == WRITE)
-		return vmsplice_to_pipe(f, iter, flags);
-	else
-		return vmsplice_to_user(f, iter, flags);
-}
-
 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 		unsigned long, nr_segs, unsigned int, flags)
 {
@@ -1356,6 +1341,9 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 	struct fd f;
 	int type;
 
+	if (unlikely(flags & ~SPLICE_F_ALL))
+		return -EINVAL;
+
 	f = fdget(fd);
 	error = vmsplice_type(f, &type);
 	if (error)
@@ -1363,40 +1351,21 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
 
 	error = import_iovec(type, uiov, nr_segs,
 			     ARRAY_SIZE(iovstack), &iov, &iter);
-	if (error >= 0) {
-		error = do_vmsplice(f.file, &iter, flags);
-		kfree(iov);
-	}
-	fdput(f);
-	return error;
-}
+	if (error < 0)
+		goto out_fdput;
 
-#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32,
-		    unsigned int, nr_segs, unsigned int, flags)
-{
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov = iovstack;
-	struct iov_iter iter;
-	ssize_t error;
-	struct fd f;
-	int type;
-
-	f = fdget(fd);
-	error = vmsplice_type(f, &type);
-	if (error)
-		return error;
+	if (!iov_iter_count(&iter))
+		error = 0;
+	else if (iov_iter_rw(&iter) == WRITE)
+		error = vmsplice_to_pipe(f.file, &iter, flags);
+	else
+		error = vmsplice_to_user(f.file, &iter, flags);
 
-	error = import_iovec(type, (struct iovec __user *)iov32, nr_segs,
-			     ARRAY_SIZE(iovstack), &iov, &iter);
-	if (error >= 0) {
-		error = do_vmsplice(f.file, &iter, flags);
-		kfree(iov);
-	}
+	kfree(iov);
+out_fdput:
 	fdput(f);
 	return error;
 }
-#endif
 
 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
 		int, fd_out, loff_t __user *, off_out,
diff --git a/include/linux/compat.h b/include/linux/compat.h
index fa39b7a5488d70..14d8bf412bd02e 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -597,10 +597,6 @@ asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
 				     compat_size_t sigsetsize, int flags);
 
-/* fs/splice.c */
-asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *,
-				    unsigned int nr_segs, unsigned int flags);
-
 /* fs/stat.c */
 asmlinkage long compat_sys_newfstatat(unsigned int dfd,
 				      const char __user *filename,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 211c9eacbda6eb..f2dcb0d5703014 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 211c9eacbda6eb..f2dcb0d5703014 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
 
 /* fs/splice.c */
 #define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 46be68029587f9..26f0347c15118b 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
-285	common	vmsplice			sys_vmsplice			compat_sys_vmsplice
+285	common	vmsplice			sys_vmsplice
 286	common	openat				sys_openat			compat_sys_openat
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index fb5e61ce9d5838..02ad81f69bb7e3 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			compat_sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				compat_sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			compat_sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index b1e59957c5c51c..642af919183de4 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -388,7 +388,7 @@
 529	x32	waitid			compat_sys_waitid
 530	x32	set_robust_list		compat_sys_set_robust_list
 531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
+532	x32	vmsplice		sys_vmsplice
 533	x32	move_pages		compat_sys_move_pages
 534	x32	preadv			compat_sys_preadv64
 535	x32	pwritev			compat_sys_pwritev64
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [PATCH 8/9] mm: remove compat_process_vm_{readv,writev}
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
                   ` (6 preceding siblings ...)
  2020-09-23  6:05 ` [PATCH 7/9] fs: remove compat_sys_vmsplice Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  2020-09-23  6:05 ` [PATCH 9/9] security/keys: remove compat_keyctl_instantiate_key_iov Christoph Hellwig
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Now that import_iovec handles compat iovecs, the native syscalls
can be used for the compat case as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/unistd32.h             |  4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl     |  4 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl     |  4 +-
 arch/parisc/kernel/syscalls/syscall.tbl       |  4 +-
 arch/powerpc/kernel/syscalls/syscall.tbl      |  4 +-
 arch/s390/kernel/syscalls/syscall.tbl         |  4 +-
 arch/sparc/kernel/syscalls/syscall.tbl        |  4 +-
 arch/x86/entry/syscall_x32.c                  |  2 +
 arch/x86/entry/syscalls/syscall_32.tbl        |  4 +-
 arch/x86/entry/syscalls/syscall_64.tbl        |  4 +-
 include/linux/compat.h                        |  8 ---
 include/uapi/asm-generic/unistd.h             |  6 +-
 mm/process_vm_access.c                        | 69 -------------------
 tools/include/uapi/asm-generic/unistd.h       |  6 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |  4 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |  4 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl    |  4 +-
 17 files changed, 30 insertions(+), 109 deletions(-)
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 11dfae3a8563bd..0c280a05f699bf 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -763,9 +763,9 @@ __SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg)
 #define __NR_setns 375
 __SYSCALL(__NR_setns, sys_setns)
 #define __NR_process_vm_readv 376
-__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 377
-__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 378
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 379
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 5a39d4de0ac85b..0bc2e0fcf1ee56 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -317,8 +317,8 @@
 306	n32	syncfs				sys_syncfs
 307	n32	sendmmsg			compat_sys_sendmmsg
 308	n32	setns				sys_setns
-309	n32	process_vm_readv		compat_sys_process_vm_readv
-310	n32	process_vm_writev		compat_sys_process_vm_writev
+309	n32	process_vm_readv		sys_process_vm_readv
+310	n32	process_vm_writev		sys_process_vm_writev
 311	n32	kcmp				sys_kcmp
 312	n32	finit_module			sys_finit_module
 313	n32	sched_setattr			sys_sched_setattr
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 136efc6b8c5444..b408c13b934296 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -356,8 +356,8 @@
 342	o32	syncfs				sys_syncfs
 343	o32	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 344	o32	setns				sys_setns
-345	o32	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-346	o32	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+345	o32	process_vm_readv		sys_process_vm_readv
+346	o32	process_vm_writev		sys_process_vm_writev
 347	o32	kcmp				sys_kcmp
 348	o32	finit_module			sys_finit_module
 349	o32	sched_setattr			sys_sched_setattr
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index a9e184192caedd..2015a5124b78ad 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -372,8 +372,8 @@
 327	common	syncfs			sys_syncfs
 328	common	setns			sys_setns
 329	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
-330	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-331	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+330	common	process_vm_readv	sys_process_vm_readv
+331	common	process_vm_writev	sys_process_vm_writev
 332	common	kcmp			sys_kcmp
 333	common	finit_module		sys_finit_module
 334	common	sched_setattr		sys_sched_setattr
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 0d4985919ca34d..66a472aa635d3f 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -449,8 +449,8 @@
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
-351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
 353	nospu	finit_module			sys_finit_module
 354	nospu	kcmp				sys_kcmp
 355	common	sched_setattr			sys_sched_setattr
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index b5495a42814bd1..7485867a490bb2 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			sys_kcmp
 344  common	finit_module		sys_finit_module		sys_finit_module
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index f1810c1a35caa5..4a9365b2e340b2 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -406,8 +406,8 @@
 335	common	syncfs			sys_syncfs
 336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 337	common	setns			sys_setns
-338	common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-339	common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+338	common	process_vm_readv	sys_process_vm_readv
+339	common	process_vm_writev	sys_process_vm_writev
 340	32	kern_features		sys_ni_syscall			sys_kern_features
 340	64	kern_features		sys_kern_features
 341	common	kcmp			sys_kcmp
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index a4840b9d50ad14..f2fe0a33bcfdd5 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -17,6 +17,8 @@
 #define __x32_sys_getsockopt	__x64_sys_getsockopt
 #define __x32_sys_setsockopt	__x64_sys_setsockopt
 #define __x32_sys_vmsplice	__x64_sys_vmsplice
+#define __x32_sys_process_vm_readv	__x64_sys_process_vm_readv
+#define __x32_sys_process_vm_writev	__x64_sys_process_vm_writev
 
 #define __SYSCALL_64(nr, sym)
 
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 0fb2f172581e51..5fbe10ad8a23fc 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -358,8 +358,8 @@
 344	i386	syncfs			sys_syncfs
 345	i386	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 346	i386	setns			sys_setns
-347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+347	i386	process_vm_readv	sys_process_vm_readv
+348	i386	process_vm_writev	sys_process_vm_writev
 349	i386	kcmp			sys_kcmp
 350	i386	finit_module		sys_finit_module
 351	i386	sched_setattr		sys_sched_setattr
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 642af919183de4..347809649ba28f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -395,8 +395,8 @@
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
 537	x32	recvmmsg		compat_sys_recvmmsg_time64
 538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
+539	x32	process_vm_readv	sys_process_vm_readv
+540	x32	process_vm_writev	sys_process_vm_writev
 541	x32	setsockopt		sys_setsockopt
 542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 14d8bf412bd02e..d46e5905970817 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -780,14 +780,6 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     int flags);
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags);
-asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
-		const struct compat_iovec __user *lvec,
-		compat_ulong_t liovcnt, const struct compat_iovec __user *rvec,
-		compat_ulong_t riovcnt, compat_ulong_t flags);
-asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid,
-		const struct compat_iovec __user *lvec,
-		compat_ulong_t liovcnt, const struct compat_iovec __user *rvec,
-		compat_ulong_t riovcnt, compat_ulong_t flags);
 asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
 		     const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp, int flags);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index f2dcb0d5703014..c1dfe99c9c3f70 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
 #define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
-          compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
-          compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 272
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 3f2156aab44263..fd12da80b6f27b 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -14,10 +14,6 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-#endif
-
 /**
  * process_vm_rw_pages - read/write pages from task specified
  * @pages: array of pointers to pages we want to copy
@@ -304,68 +300,3 @@ SYSCALL_DEFINE6(process_vm_writev, pid_t, pid,
 {
 	return process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1);
 }
-
-#ifdef CONFIG_COMPAT
-
-static ssize_t
-compat_process_vm_rw(compat_pid_t pid,
-		     const struct compat_iovec __user *lvec,
-		     unsigned long liovcnt,
-		     const struct compat_iovec __user *rvec,
-		     unsigned long riovcnt,
-		     unsigned long flags, int vm_write)
-{
-	struct iovec iovstack_l[UIO_FASTIOV];
-	struct iovec iovstack_r[UIO_FASTIOV];
-	struct iovec *iov_l = iovstack_l;
-	struct iovec *iov_r = iovstack_r;
-	struct iov_iter iter;
-	ssize_t rc = -EFAULT;
-	int dir = vm_write ? WRITE : READ;
-
-	if (flags != 0)
-		return -EINVAL;
-
-	rc = import_iovec(dir, (const struct iovec __user *)lvec, liovcnt,
-			  UIO_FASTIOV, &iov_l, &iter);
-	if (rc < 0)
-		return rc;
-	if (!iov_iter_count(&iter))
-		goto free_iov_l;
-	iov_r = iovec_from_user((const struct iovec __user *)rvec, riovcnt,
-				UIO_FASTIOV, iovstack_r, true);
-	if (IS_ERR(iov_r)) {
-		rc = PTR_ERR(iov_r);
-		goto free_iov_l;
-	}
-	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-	if (iov_r != iovstack_r)
-		kfree(iov_r);
-free_iov_l:
-	kfree(iov_l);
-	return rc;
-}
-
-COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid,
-		       const struct compat_iovec __user *, lvec,
-		       compat_ulong_t, liovcnt,
-		       const struct compat_iovec __user *, rvec,
-		       compat_ulong_t, riovcnt,
-		       compat_ulong_t, flags)
-{
-	return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
-				    riovcnt, flags, 0);
-}
-
-COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid,
-		       const struct compat_iovec __user *, lvec,
-		       compat_ulong_t, liovcnt,
-		       const struct compat_iovec __user *, rvec,
-		       compat_ulong_t, riovcnt,
-		       compat_ulong_t, flags)
-{
-	return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
-				    riovcnt, flags, 1);
-}
-
-#endif
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f2dcb0d5703014..c1dfe99c9c3f70 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
 #define __NR_sendmmsg 269
 __SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
 #define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
-          compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
 #define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
-          compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
 #define __NR_kcmp 272
 __SYSCALL(__NR_kcmp, sys_kcmp)
 #define __NR_finit_module 273
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 26f0347c15118b..a188f053cbf90a 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -443,8 +443,8 @@
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
-351	nospu	process_vm_readv		sys_process_vm_readv		compat_sys_process_vm_readv
-352	nospu	process_vm_writev		sys_process_vm_writev		compat_sys_process_vm_writev
+351	nospu	process_vm_readv		sys_process_vm_readv
+352	nospu	process_vm_writev		sys_process_vm_writev
 353	nospu	finit_module			sys_finit_module
 354	nospu	kcmp				sys_kcmp
 355	common	sched_setattr			sys_sched_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 02ad81f69bb7e3..c44c83032c3a04 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			compat_sys_kcmp
 344  common	finit_module		sys_finit_module		compat_sys_finit_module
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 642af919183de4..347809649ba28f 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -395,8 +395,8 @@
 536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
 537	x32	recvmmsg		compat_sys_recvmmsg_time64
 538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
+539	x32	process_vm_readv	sys_process_vm_readv
+540	x32	process_vm_writev	sys_process_vm_writev
 541	x32	setsockopt		sys_setsockopt
 542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread
- * [PATCH 9/9] security/keys: remove compat_keyctl_instantiate_key_iov
  2020-09-23  6:05 let import_iovec deal with compat_iovecs as well v3 Christoph Hellwig
                   ` (7 preceding siblings ...)
  2020-09-23  6:05 ` [PATCH 8/9] mm: remove compat_process_vm_{readv,writev} Christoph Hellwig
@ 2020-09-23  6:05 ` Christoph Hellwig
  8 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2020-09-23  6:05 UTC (permalink / raw)
  To: Alexander Viro
  Cc: Andrew Morton, Jens Axboe, Arnd Bergmann, David Howells,
	David Laight, linux-arm-kernel, linux-kernel, linux-mips,
	linux-parisc, linuxppc-dev, linux-s390, sparclinux, linux-block,
	linux-scsi, linux-fsdevel, linux-aio, io-uring, linux-arch,
	linux-mm, netdev, keyrings, linux-security-module
Now that import_iovec handles compat iovecs, the native version of
keyctl_instantiate_key_iov can be used for the compat case as well.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 security/keys/compat.c   | 36 ++----------------------------------
 security/keys/internal.h |  5 -----
 security/keys/keyctl.c   |  2 +-
 3 files changed, 3 insertions(+), 40 deletions(-)
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 7ae531db031cf8..1545efdca56227 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -11,38 +11,6 @@
 #include <linux/slab.h>
 #include "internal.h"
 
-/*
- * Instantiate a key with the specified compatibility multipart payload and
- * link the key into the destination keyring if one is given.
- *
- * The caller must have the appropriate instantiation permit set for this to
- * work (see keyctl_assume_authority).  No other permissions are required.
- *
- * If successful, 0 will be returned.
- */
-static long compat_keyctl_instantiate_key_iov(
-	key_serial_t id,
-	const struct compat_iovec __user *_payload_iov,
-	unsigned ioc,
-	key_serial_t ringid)
-{
-	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
-	struct iov_iter from;
-	long ret;
-
-	if (!_payload_iov)
-		ioc = 0;
-
-	ret = import_iovec(WRITE, (const struct iovec __user *)_payload_iov,
-			   ioc, ARRAY_SIZE(iovstack), &iov, &from);
-	if (ret < 0)
-		return ret;
-
-	ret = keyctl_instantiate_key_common(id, &from, ringid);
-	kfree(iov);
-	return ret;
-}
-
 /*
  * The key control system call, 32-bit compatibility version for 64-bit archs
  */
@@ -113,8 +81,8 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
 		return keyctl_reject_key(arg2, arg3, arg4, arg5);
 
 	case KEYCTL_INSTANTIATE_IOV:
-		return compat_keyctl_instantiate_key_iov(
-			arg2, compat_ptr(arg3), arg4, arg5);
+		return keyctl_instantiate_key_iov(arg2, compat_ptr(arg3), arg4,
+						  arg5);
 
 	case KEYCTL_INVALIDATE:
 		return keyctl_invalidate_key(arg2);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 338a526cbfa516..9b9cf3b6fcbb4d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -262,11 +262,6 @@ extern long keyctl_instantiate_key_iov(key_serial_t,
 				       const struct iovec __user *,
 				       unsigned, key_serial_t);
 extern long keyctl_invalidate_key(key_serial_t);
-
-struct iov_iter;
-extern long keyctl_instantiate_key_common(key_serial_t,
-					  struct iov_iter *,
-					  key_serial_t);
 extern long keyctl_restrict_keyring(key_serial_t id,
 				    const char __user *_type,
 				    const char __user *_restriction);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 9febd37a168fd0..e26bbccda7ccee 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1164,7 +1164,7 @@ static int keyctl_change_reqkey_auth(struct key *key)
  *
  * If successful, 0 will be returned.
  */
-long keyctl_instantiate_key_common(key_serial_t id,
+static long keyctl_instantiate_key_common(key_serial_t id,
 				   struct iov_iter *from,
 				   key_serial_t ringid)
 {
-- 
2.28.0
^ permalink raw reply related	[flat|nested] 25+ messages in thread