Netdev List
 help / color / mirror / Atom feed
* [PATCH 15/25] ipv6 equivalent of "ipv4: Avoid reading user iov twice after raw_probe_proto_opt"
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv6/raw.c | 112 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8baa53e..942f67b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -672,65 +672,62 @@ error:
 	return err;
 }
 
-static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
+struct raw6_frag_vec {
+	struct msghdr *msg;
+	int hlen;
+	char c[4];
+};
+
+static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6)
 {
-	struct iovec *iov;
-	u8 __user *type = NULL;
-	u8 __user *code = NULL;
-	u8 len = 0;
-	int probed = 0;
-	int i;
-
-	if (!msg->msg_iov)
-		return 0;
+	int err = 0;
+	switch (fl6->flowi6_proto) {
+	case IPPROTO_ICMPV6:
+		rfv->hlen = 2;
+		err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen);
+		if (!err) {
+			fl6->fl6_icmp_type = rfv->c[0];
+			fl6->fl6_icmp_code = rfv->c[1];
+		}
+		break;
+	case IPPROTO_MH:
+		rfv->hlen = 4;
+		err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen);
+		if (!err)
+			fl6->fl6_mh_type = rfv->c[2];
+	}
+	return err;
+}
 
-	for (i = 0; i < msg->msg_iovlen; i++) {
-		iov = &msg->msg_iov[i];
-		if (!iov)
-			continue;
+static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
+		       struct sk_buff *skb)
+{
+	struct raw6_frag_vec *rfv = from;
 
-		switch (fl6->flowi6_proto) {
-		case IPPROTO_ICMPV6:
-			/* check if one-byte field is readable or not. */
-			if (iov->iov_base && iov->iov_len < 1)
-				break;
-
-			if (!type) {
-				type = iov->iov_base;
-				/* check if code field is readable or not. */
-				if (iov->iov_len > 1)
-					code = type + 1;
-			} else if (!code)
-				code = iov->iov_base;
-
-			if (type && code) {
-				if (get_user(fl6->fl6_icmp_type, type) ||
-				    get_user(fl6->fl6_icmp_code, code))
-					return -EFAULT;
-				probed = 1;
-			}
-			break;
-		case IPPROTO_MH:
-			if (iov->iov_base && iov->iov_len < 1)
-				break;
-			/* check if type field is readable or not. */
-			if (iov->iov_len > 2 - len) {
-				u8 __user *p = iov->iov_base;
-				if (get_user(fl6->fl6_mh_type, &p[2 - len]))
-					return -EFAULT;
-				probed = 1;
-			} else
-				len += iov->iov_len;
+	if (offset < rfv->hlen) {
+		int copy = min(rfv->hlen - offset, len);
 
-			break;
-		default:
-			probed = 1;
-			break;
-		}
-		if (probed)
-			break;
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			memcpy(to, rfv->c + offset, copy);
+		else
+			skb->csum = csum_block_add(
+				skb->csum,
+				csum_partial_copy_nocheck(rfv->c + offset,
+							  to, copy, 0),
+				odd);
+
+		odd = 0;
+		offset += copy;
+		to += copy;
+		len -= copy;
+
+		if (!len)
+			return 0;
 	}
-	return 0;
+
+	offset -= rfv->hlen;
+
+	return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
 }
 
 static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -745,6 +742,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
+	struct raw6_frag_vec rfv;
 	struct flowi6 fl6;
 	int addr_len = msg->msg_namelen;
 	int hlimit = -1;
@@ -848,7 +846,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	opt = ipv6_fixup_options(&opt_space, opt);
 
 	fl6.flowi6_proto = proto;
-	err = rawv6_probe_proto_opt(&fl6, msg);
+	rfv.msg = msg;
+	rfv.hlen = 0;
+	err = rawv6_probe_proto_opt(&rfv, &fl6);
 	if (err)
 		goto out;
 
@@ -889,7 +889,7 @@ back_from_confirm:
 		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
 	else {
 		lock_sock(sk);
-		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
+		err = ip6_append_data(sk, raw6_getfrag, &rfv,
 			len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
 			msg->msg_flags, dontfrag);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 16/25] ip_generic_getfrag, udplite_getfrag: switch to passing msghdr
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/udplite.h | 3 ++-
 net/ipv4/ip_output.c  | 6 +++---
 net/ipv4/raw.c        | 2 +-
 net/ipv4/udp.c        | 4 ++--
 net/ipv6/raw.c        | 2 +-
 net/ipv6/udp.c        | 2 +-
 net/l2tp/l2tp_ip6.c   | 2 +-
 7 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/net/udplite.h b/include/net/udplite.h
index 9a28a51..d5baaba 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -19,7 +19,8 @@ extern struct udp_table		udplite_table;
 static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 				      int len, int odd, struct sk_buff *skb)
 {
-	return memcpy_fromiovecend(to, (struct iovec *) from, offset, len);
+	struct msghdr *msg = from;
+	return memcpy_fromiovecend(to, msg->msg_iov, offset, len);
 }
 
 /* Designate sk as UDP-Lite socket */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4a929ad..cdedcf1 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -752,14 +752,14 @@ EXPORT_SYMBOL(ip_fragment);
 int
 ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
 {
-	struct iovec *iov = from;
+	struct msghdr *msg = from;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
+		if (memcpy_fromiovecend(to, msg->msg_iov, offset, len) < 0)
 			return -EFAULT;
 	} else {
 		__wsum csum = 0;
-		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
+		if (csum_partial_copy_fromiovecend(to, msg->msg_iov, offset, len, &csum) < 0)
 			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, odd);
 	}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5c901eb..5d83bd2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -478,7 +478,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
 
 	offset -= rfv->hlen;
 
-	return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
+	return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
 }
 
 static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dd8e006..13b4dcf8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1049,7 +1049,7 @@ back_from_confirm:
 
 	/* Lockless fast path for the non-corking case. */
 	if (!corkreq) {
-		skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
+		skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
 				  sizeof(struct udphdr), &ipc, &rt,
 				  msg->msg_flags);
 		err = PTR_ERR(skb);
@@ -1080,7 +1080,7 @@ back_from_confirm:
 
 do_append_data:
 	up->len += ulen;
-	err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
+	err = ip_append_data(sk, fl4, getfrag, msg, ulen,
 			     sizeof(struct udphdr), &ipc, &rt,
 			     corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 	if (err)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 942f67b..11a9283 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -727,7 +727,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
 
 	offset -= rfv->hlen;
 
-	return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
+	return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
 }
 
 static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7f964322..189dc4a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1312,7 +1312,7 @@ do_append_data:
 		dontfrag = np->dontfrag;
 	up->len += ulen;
 	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
-	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
+	err = ip6_append_data(sk, getfrag, msg, ulen,
 		sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
 		(struct rt6_info *)dst,
 		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 2177b96..8611f1b 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -619,7 +619,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 back_from_confirm:
 	lock_sock(sk);
-	err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
+	err = ip6_append_data(sk, ip_generic_getfrag, msg,
 			      ulen, transhdrlen, hlimit, tclass, opt,
 			      &fl6, (struct rt6_info *)dst,
 			      msg->msg_flags, dontfrag);
-- 
2.1.3

^ permalink raw reply related

* [PATCH 25/25] bury memcpy_toiovec()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

no users left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  1 -
 lib/iovec.c         | 25 -------------------------
 2 files changed, 26 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index bd8569a..a41e252 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -131,7 +131,6 @@ size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			int offset, int len);
 int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
diff --git a/lib/iovec.c b/lib/iovec.c
index df3abd1..2d99cb4 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -29,31 +29,6 @@ EXPORT_SYMBOL(memcpy_fromiovec);
 
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
- *
- *	Note: this modifies the original iovec.
- */
-
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
-{
-	while (len > 0) {
-		if (iov->iov_len) {
-			int copy = min_t(unsigned int, iov->iov_len, len);
-			if (copy_to_user(iov->iov_base, kdata, copy))
-				return -EFAULT;
-			kdata += copy;
-			len -= copy;
-			iov->iov_len -= copy;
-			iov->iov_base += copy;
-		}
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovec);
-
-/*
- *	Copy kernel to iovec. Returns -EFAULT on error.
  */
 
 int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
-- 
2.1.3

^ permalink raw reply related

* [PATCH 24/25] skb_copy_datagram_iovec() can die
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

no callers other than itself.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |  2 --
 net/core/datagram.c    | 84 --------------------------------------------------
 2 files changed, 86 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4902f2d..ab0bc43 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2644,8 +2644,6 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
 unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
-int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
-			    struct iovec *to, int size);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 41075ed..df493d6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -310,90 +310,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 EXPORT_SYMBOL(skb_kill_datagram);
 
 /**
- *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
- *	@skb: buffer to copy
- *	@offset: offset in the buffer to start copying from
- *	@to: io vector to copy to
- *	@len: amount of data to copy from buffer to iovec
- *
- *	Note: the iovec is modified during the copy.
- */
-int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
-			    struct iovec *to, int len)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	struct sk_buff *frag_iter;
-
-	trace_skb_copy_datagram_iovec(skb, len);
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		if (memcpy_toiovec(to, skb->data + offset, copy))
-			goto fault;
-		if ((len -= copy) == 0)
-			return 0;
-		offset += copy;
-	}
-
-	/* Copy paged appendix. Hmm... why does this look so complicated? */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		if ((copy = end - offset) > 0) {
-			int err;
-			u8  *vaddr;
-			struct page *page = skb_frag_page(frag);
-
-			if (copy > len)
-				copy = len;
-			vaddr = kmap(page);
-			err = memcpy_toiovec(to, vaddr + frag->page_offset +
-					     offset - start, copy);
-			kunmap(page);
-			if (err)
-				goto fault;
-			if (!(len -= copy))
-				return 0;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		if ((copy = end - offset) > 0) {
-			if (copy > len)
-				copy = len;
-			if (skb_copy_datagram_iovec(frag_iter,
-						    offset - start,
-						    to, copy))
-				goto fault;
-			if ((len -= copy) == 0)
-				return 0;
-			offset += copy;
-		}
-		start = end;
-	}
-	if (!len)
-		return 0;
-
-fault:
-	return -EFAULT;
-}
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-
-/**
  *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
  *	@skb: buffer to copy
  *	@offset: offset in the buffer to start copying from
-- 
2.1.3

^ permalink raw reply related

* [PATCH 23/25] ppp_read(): switch to skb_copy_datagram_iter()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/net/ppp/ppp_generic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 794a473..af034db 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -417,6 +417,7 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
 	ssize_t ret;
 	struct sk_buff *skb = NULL;
 	struct iovec iov;
+	struct iov_iter to;
 
 	ret = count;
 
@@ -462,7 +463,8 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
 	ret = -EFAULT;
 	iov.iov_base = buf;
 	iov.iov_len = count;
-	if (skb_copy_datagram_iovec(skb, 0, &iov, skb->len))
+	iov_iter_init(&to, READ, &iov, 1, count);
+	if (skb_copy_datagram_iter(skb, 0, &to, skb->len))
 		goto outf;
 	ret = skb->len;
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 22/25] switch memcpy_to_msg() and skb_copy{,_and_csum}_datagram_msg() to primitives
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

... making both non-draining.  That means that tcp_recvmsg() becomes
non-draining.  And _that_ would break iscsit_do_rx_data() unless we
	a) make sure tcp_recvmsg() is uniformly non-draining (it is)
	b) make sure it copes with arbitrary (including shifted)
iov_iter (it does, all it uses is iov_iter primitives)
	c) make iscsit_do_rx_data() initialize ->msg_iter only once.

Fortunately, (c) is doable with minimal work and we are rid of one
the two places where kernel send/recvmsg users would be unhappy with
non-draining behaviour.

Actually, that makes all but one of ->recvmsg() instances iov_iter-clean.
The exception is skcipher_recvmsg() and it also isn't hard to convert
to primitives (iov_iter_get_pages() is needed there).  That'll wait
a bit - there's some interplay with ->sendmsg() path for that one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/target/iscsi/iscsi_target_util.c | 12 +++----
 include/linux/skbuff.h                   | 16 +++-------
 net/core/datagram.c                      | 54 +++++++++++---------------------
 3 files changed, 28 insertions(+), 54 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index ce87ce9..7c6a95b 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1326,21 +1326,19 @@ static int iscsit_do_rx_data(
 	struct iscsi_conn *conn,
 	struct iscsi_data_count *count)
 {
-	int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
-	struct kvec *iov_p;
+	int data = count->data_length, rx_loop = 0, total_rx = 0;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
 	memset(&msg, 0, sizeof(struct msghdr));
-
-	iov_p = count->iov;
-	iov_len	= count->iov_count;
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
+		      count->iov, count->iov_count, data);
 
 	while (total_rx < data) {
-		rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
-					(data - total_rx), MSG_WAITALL);
+		rx_loop = sock_recvmsg(conn->sock, &msg,
+				      (data - total_rx), MSG_WAITALL);
 		if (rx_loop <= 0) {
 			pr_debug("rx_loop: %d total_rx: %d\n",
 				rx_loop, total_rx);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 52cf1bd..4902f2d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2651,17 +2651,10 @@ int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 					struct msghdr *msg, int size)
 {
-	/* XXX: stripping const */
-	return skb_copy_datagram_iovec(from, offset, (struct iovec *)msg->msg_iter.iov, size);
-}
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
-				     struct iovec *iov);
-static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
-			    struct msghdr *msg)
-{
-	/* XXX: stripping const */
-	return skb_copy_and_csum_datagram_iovec(skb, hlen, (struct iovec *)msg->msg_iter.iov);
+	return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
 }
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
+				   struct msghdr *msg);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
@@ -2697,8 +2690,7 @@ static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 
 static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
 {
-	/* XXX: stripping const */
-	return memcpy_toiovec((struct iovec *)msg->msg_iter.iov, data, len);
+	return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
 }
 
 struct skb_checksum_ops {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b6e303b..41075ed 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -615,27 +615,25 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
-				      u8 __user *to, int len,
+				      struct iov_iter *to, int len,
 				      __wsum *csump)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
 	struct sk_buff *frag_iter;
 	int pos = 0;
+	int n;
 
 	/* Copy header. */
 	if (copy > 0) {
-		int err = 0;
 		if (copy > len)
 			copy = len;
-		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
-					       *csump, &err);
-		if (err)
+		n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+		if (n != copy)
 			goto fault;
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
-		to += copy;
 		pos = copy;
 	}
 
@@ -647,26 +645,22 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
-			__wsum csum2;
-			int err = 0;
-			u8  *vaddr;
+			__wsum csum2 = 0;
 			struct page *page = skb_frag_page(frag);
+			u8  *vaddr = kmap(page);
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap(page);
-			csum2 = csum_and_copy_to_user(vaddr +
-							frag->page_offset +
-							offset - start,
-						      to, copy, 0, &err);
+			n = csum_and_copy_to_iter(vaddr + frag->page_offset +
+						  offset - start, copy,
+						  &csum2, to);
 			kunmap(page);
-			if (err)
+			if (n != copy)
 				goto fault;
 			*csump = csum_block_add(*csump, csum2, pos);
 			if (!(len -= copy))
 				return 0;
 			offset += copy;
-			to += copy;
 			pos += copy;
 		}
 		start = end;
@@ -691,7 +685,6 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 			if ((len -= copy) == 0)
 				return 0;
 			offset += copy;
-			to += copy;
 			pos += copy;
 		}
 		start = end;
@@ -744,20 +737,19 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
- *	skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec.
+ *	skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
  *	@skb: skbuff
  *	@hlen: hardware length
- *	@iov: io vector
+ *	@msg: destination
  *
  *	Caller _must_ check that skb will fit to this iovec.
  *
  *	Returns: 0       - success.
  *		 -EINVAL - checksum failure.
- *		 -EFAULT - fault during copy. Beware, in this case iovec
- *			   can be modified!
+ *		 -EFAULT - fault during copy.
  */
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
-				     int hlen, struct iovec *iov)
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
+				   int hlen, struct msghdr *msg)
 {
 	__wsum csum;
 	int chunk = skb->len - hlen;
@@ -765,28 +757,20 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 	if (!chunk)
 		return 0;
 
-	/* Skip filled elements.
-	 * Pretty silly, look at memcpy_toiovec, though 8)
-	 */
-	while (!iov->iov_len)
-		iov++;
-
-	if (iov->iov_len < chunk) {
+	if (iov_iter_count(&msg->msg_iter) < chunk) {
 		if (__skb_checksum_complete(skb))
 			goto csum_error;
-		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+		if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
 			goto fault;
 	} else {
 		csum = csum_partial(skb->data, hlen, skb->csum);
-		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+		if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
 					       chunk, &csum))
 			goto fault;
 		if (csum_fold(csum))
 			goto csum_error;
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
-		iov->iov_len -= chunk;
-		iov->iov_base += chunk;
 	}
 	return 0;
 csum_error:
@@ -794,7 +778,7 @@ csum_error:
 fault:
 	return -EFAULT;
 }
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
 
 /**
  * 	datagram_poll - generic datagram poll
-- 
2.1.3

^ permalink raw reply related

* [PATCH 21/25] first fruits - kill l2cap ->memcpy_fromiovec()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Just use copy_from_iter().  That's what this method is trying to do
in all cases, in a very convoluted fashion.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/bluetooth/l2cap.h | 29 -----------------------------
 net/bluetooth/6lowpan.c       |  3 +--
 net/bluetooth/a2mp.c          |  3 +--
 net/bluetooth/l2cap_core.c    |  7 +++----
 net/bluetooth/l2cap_sock.c    |  8 --------
 net/bluetooth/smp.c           |  4 +---
 6 files changed, 6 insertions(+), 48 deletions(-)

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index bca6fc0..692f786 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -606,10 +606,6 @@ struct l2cap_ops {
 	struct sk_buff		*(*alloc_skb) (struct l2cap_chan *chan,
 					       unsigned long hdr_len,
 					       unsigned long len, int nb);
-	int			(*memcpy_fromiovec) (struct l2cap_chan *chan,
-						     unsigned char *kdata,
-						     struct msghdr *msg,
-						     int len);
 };
 
 struct l2cap_conn {
@@ -903,31 +899,6 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan)
 	return 0;
 }
 
-static inline int l2cap_chan_no_memcpy_fromiovec(struct l2cap_chan *chan,
-						 unsigned char *kdata,
-						 struct msghdr *msg,
-						 int len)
-{
-	/* Following is safe since for compiler definitions of kvec and
-	 * iovec are identical, yielding the same in-core layout and alignment
-	 */
-	struct kvec *vec = (struct kvec *)msg->msg_iter.iov;
-
-	while (len > 0) {
-		if (vec->iov_len) {
-			int copy = min_t(unsigned int, len, vec->iov_len);
-			memcpy(kdata, vec->iov_base, copy);
-			len -= copy;
-			kdata += copy;
-			vec->iov_base += copy;
-			vec->iov_len -= copy;
-		}
-		vec++;
-	}
-
-	return 0;
-}
-
 extern bool disable_ertm;
 
 int l2cap_init_sockets(void);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index d8c67a5..76617be 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -541,7 +541,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
 	iv.iov_len = skb->len;
 
 	memset(&msg, 0, sizeof(msg));
-	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *) &iv, 1, skb->len);
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
 
 	err = l2cap_chan_send(chan, &msg, skb->len);
 	if (err > 0) {
@@ -1050,7 +1050,6 @@ static const struct l2cap_ops bt_6lowpan_chan_ops = {
 	.suspend		= chan_suspend_cb,
 	.get_sndtimeo		= chan_get_sndtimeo_cb,
 	.alloc_skb		= chan_alloc_skb_cb,
-	.memcpy_fromiovec	= l2cap_chan_no_memcpy_fromiovec,
 
 	.teardown		= l2cap_chan_no_teardown,
 	.defer			= l2cap_chan_no_defer,
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 716d2a3..cedfbda 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -60,7 +60,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)&iv, 1, total_len);
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
 
 	l2cap_chan_send(chan, &msg, total_len);
 
@@ -719,7 +719,6 @@ static const struct l2cap_ops a2mp_chan_ops = {
 	.resume = l2cap_chan_no_resume,
 	.set_shutdown = l2cap_chan_no_set_shutdown,
 	.get_sndtimeo = l2cap_chan_no_get_sndtimeo,
-	.memcpy_fromiovec = l2cap_chan_no_memcpy_fromiovec,
 };
 
 static struct l2cap_chan *a2mp_chan_open(struct l2cap_conn *conn, bool locked)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 5201d61..1754040 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2096,8 +2096,7 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
 	struct sk_buff **frag;
 	int sent = 0;
 
-	if (chan->ops->memcpy_fromiovec(chan, skb_put(skb, count),
-					msg, count))
+	if (copy_from_iter(skb_put(skb, count), count, &msg->msg_iter) != count)
 		return -EFAULT;
 
 	sent += count;
@@ -2117,8 +2116,8 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
 
 		*frag = tmp;
 
-		if (chan->ops->memcpy_fromiovec(chan, skb_put(*frag, count),
-						msg, count))
+		if (copy_from_iter(skb_put(*frag, count), count,
+				   &msg->msg_iter) != count)
 			return -EFAULT;
 
 		sent += count;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 205b298..f65caf4 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1336,13 +1336,6 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
 	return skb;
 }
 
-static int l2cap_sock_memcpy_fromiovec_cb(struct l2cap_chan *chan,
-					  unsigned char *kdata,
-					  struct msghdr *msg, int len)
-{
-	return memcpy_from_msg(kdata, msg, len);
-}
-
 static void l2cap_sock_ready_cb(struct l2cap_chan *chan)
 {
 	struct sock *sk = chan->data;
@@ -1427,7 +1420,6 @@ static const struct l2cap_ops l2cap_chan_ops = {
 	.set_shutdown		= l2cap_sock_set_shutdown_cb,
 	.get_sndtimeo		= l2cap_sock_get_sndtimeo_cb,
 	.alloc_skb		= l2cap_sock_alloc_skb_cb,
-	.memcpy_fromiovec	= l2cap_sock_memcpy_fromiovec_cb,
 };
 
 static void l2cap_sock_destruct(struct sock *sk)
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 21f555b..de7dc75 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -268,7 +268,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iv, 2, 1 + len);
+	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
 
 	l2cap_chan_send(chan, &msg, 1 + len);
 
@@ -1629,7 +1629,6 @@ static const struct l2cap_ops smp_chan_ops = {
 	.suspend		= l2cap_chan_no_suspend,
 	.set_shutdown		= l2cap_chan_no_set_shutdown,
 	.get_sndtimeo		= l2cap_chan_no_get_sndtimeo,
-	.memcpy_fromiovec	= l2cap_chan_no_memcpy_fromiovec,
 };
 
 static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
@@ -1678,7 +1677,6 @@ static const struct l2cap_ops smp_root_chan_ops = {
 	.resume			= l2cap_chan_no_resume,
 	.set_shutdown		= l2cap_chan_no_set_shutdown,
 	.get_sndtimeo		= l2cap_chan_no_get_sndtimeo,
-	.memcpy_fromiovec	= l2cap_chan_no_memcpy_fromiovec,
 };
 
 int smp_register(struct hci_dev *hdev)
-- 
2.1.3

^ permalink raw reply related

* [PATCH 20/25] put iov_iter into msghdr
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Note that the code _using_ ->msg_iter at that point will be very
unhappy with anything other than unshifted iovec-backed iov_iter.
We still need to convert users to proper primitives.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/algif_hash.c            |  4 ++--
 crypto/algif_skcipher.c        |  4 ++--
 drivers/net/macvtap.c          |  8 ++------
 drivers/net/tun.c              |  8 ++------
 drivers/vhost/net.c            |  8 +++-----
 fs/afs/rxrpc.c                 | 14 ++++++--------
 include/linux/skbuff.h         | 16 ++++++++++------
 include/linux/socket.h         |  3 +--
 include/net/bluetooth/l2cap.h  |  2 +-
 include/net/udplite.h          |  3 ++-
 net/atm/common.c               |  5 +----
 net/bluetooth/6lowpan.c        |  6 +++---
 net/bluetooth/a2mp.c           |  3 +--
 net/bluetooth/smp.c            |  3 +--
 net/caif/caif_socket.c         |  2 +-
 net/compat.c                   | 10 ++++++----
 net/ipv4/ip_output.c           |  6 ++++--
 net/ipv4/ping.c                |  3 ++-
 net/ipv4/raw.c                 |  3 ++-
 net/ipv4/tcp.c                 |  6 +++---
 net/ipv4/tcp_output.c          |  2 +-
 net/ipv6/ping.c                |  3 ++-
 net/ipv6/raw.c                 |  3 ++-
 net/netlink/af_netlink.c       |  2 +-
 net/packet/af_packet.c         |  7 ++-----
 net/rds/recv.c                 |  7 ++++---
 net/rds/send.c                 |  4 +---
 net/rxrpc/ar-output.c          |  8 +++-----
 net/sctp/socket.c              |  5 +----
 net/socket.c                   | 27 ++++++++++++---------------
 net/tipc/msg.c                 |  4 ++--
 net/unix/af_unix.c             | 10 ++--------
 net/vmw_vsock/vmci_transport.c |  3 ++-
 33 files changed, 90 insertions(+), 112 deletions(-)

diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 35c93ff..83cd2cc 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -42,7 +42,7 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
 	struct alg_sock *ask = alg_sk(sk);
 	struct hash_ctx *ctx = ask->private;
 	unsigned long iovlen;
-	struct iovec *iov;
+	const struct iovec *iov;
 	long copied = 0;
 	int err;
 
@@ -58,7 +58,7 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
 
 	ctx->more = 0;
 
-	for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
+	for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0;
 	     iovlen--, iov++) {
 		unsigned long seglen = iov->iov_len;
 		char __user *from = iov->iov_base;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c3b482b..4f45dab 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -429,13 +429,13 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
 	unsigned long iovlen;
-	struct iovec *iov;
+	const struct iovec *iov;
 	int err = -EAGAIN;
 	int used;
 	long copied = 0;
 
 	lock_sock(sk);
-	for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
+	for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0;
 	     iovlen--, iov++) {
 		unsigned long seglen = iov->iov_len;
 		char __user *from = iov->iov_base;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index ba1e5db..2c157cc 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -1095,9 +1095,7 @@ static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
 			   struct msghdr *m, size_t total_len)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	struct iov_iter from;
-	iov_iter_init(&from, WRITE, m->msg_iov, m->msg_iovlen, total_len);
-	return macvtap_get_user(q, m, &from, m->msg_flags & MSG_DONTWAIT);
+	return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT);
 }
 
 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
@@ -1105,12 +1103,10 @@ static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
 			   int flags)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	struct iov_iter to;
 	int ret;
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
 		return -EINVAL;
-	iov_iter_init(&to, READ, m->msg_iov, m->msg_iovlen, total_len);
-	ret = macvtap_do_read(q, &to, flags & MSG_DONTWAIT);
+	ret = macvtap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9c58286..f3e992e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1449,13 +1449,11 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
 	int ret;
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = __tun_get(tfile);
-	struct iov_iter from;
 
 	if (!tun)
 		return -EBADFD;
 
-	iov_iter_init(&from, WRITE, m->msg_iov, m->msg_iovlen, total_len);
-	ret = tun_get_user(tun, tfile, m->msg_control, &from,
+	ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
 			   m->msg_flags & MSG_DONTWAIT);
 	tun_put(tun);
 	return ret;
@@ -1467,7 +1465,6 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = __tun_get(tfile);
-	struct iov_iter to;
 	int ret;
 
 	if (!tun)
@@ -1482,8 +1479,7 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 					 SOL_PACKET, TUN_TX_TIMESTAMP);
 		goto out;
 	}
-	iov_iter_init(&to, READ, m->msg_iov, m->msg_iovlen, total_len);
-	ret = tun_do_read(tun, tfile, &to, flags & MSG_DONTWAIT);
+	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8dae2f7..9f06e70 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -342,7 +342,6 @@ static void handle_tx(struct vhost_net *net)
 		.msg_namelen = 0,
 		.msg_control = NULL,
 		.msg_controllen = 0,
-		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
 	size_t len, total_len = 0;
@@ -396,8 +395,8 @@ static void handle_tx(struct vhost_net *net)
 		}
 		/* Skip header. TODO: support TSO. */
 		s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
-		msg.msg_iovlen = out;
 		len = iov_length(vq->iov, out);
+		iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for TX: "
@@ -562,7 +561,6 @@ static void handle_rx(struct vhost_net *net)
 		.msg_namelen = 0,
 		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
 		.msg_controllen = 0,
-		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
 	struct virtio_net_hdr_mrg_rxbuf hdr = {
@@ -600,7 +598,7 @@ static void handle_rx(struct vhost_net *net)
 			break;
 		/* On overrun, truncate and discard */
 		if (unlikely(headcount > UIO_MAXIOV)) {
-			msg.msg_iovlen = 1;
+			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
 			err = sock->ops->recvmsg(NULL, sock, &msg,
 						 1, MSG_DONTWAIT | MSG_TRUNC);
 			pr_debug("Discarded rx packet: len %zd\n", sock_len);
@@ -626,7 +624,7 @@ static void handle_rx(struct vhost_net *net)
 			/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
 			 * needed because recvmsg can modify msg_iov. */
 			copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
-		msg.msg_iovlen = in;
+		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len);
 		err = sock->ops->recvmsg(NULL, sock, &msg,
 					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
 		/* Userspace might have consumed the packet meanwhile:
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 03a3beb..06e14bf 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 
 			_debug("- range %u-%u%s",
 			       offset, to, msg->msg_flags ? " [more]" : "");
-			msg->msg_iov = (struct iovec *) iov;
-			msg->msg_iovlen = 1;
+			iov_iter_init(&msg->msg_iter, WRITE,
+				      (struct iovec *) iov, 1, to - offset);
 
 			/* have to change the state *before* sending the last
 			 * packet as RxRPC might give us the reply before it
@@ -384,8 +384,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= (struct iovec *) iov;
-	msg.msg_iovlen		= 1;
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1,
+		      call->request_size);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
@@ -778,8 +778,7 @@ void afs_send_empty_reply(struct afs_call *call)
 	iov[0].iov_len		= 0;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= iov;
-	msg.msg_iovlen		= 0;
+	iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0);	/* WTF? */
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -815,8 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= iov;
-	msg.msg_iovlen		= 1;
+	iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ef64cec..52cf1bd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2646,22 +2646,24 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
 			    struct iovec *to, int size);
+int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
+			   struct iov_iter *to, int size);
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 					struct msghdr *msg, int size)
 {
-	return skb_copy_datagram_iovec(from, offset, msg->msg_iov, size);
+	/* XXX: stripping const */
+	return skb_copy_datagram_iovec(from, offset, (struct iovec *)msg->msg_iter.iov, size);
 }
 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
 				     struct iovec *iov);
 static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 			    struct msghdr *msg)
 {
-	return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov);
+	/* XXX: stripping const */
+	return skb_copy_and_csum_datagram_iovec(skb, hlen, (struct iovec *)msg->msg_iter.iov);
 }
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
-int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
-			   struct iov_iter *to, int size);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
@@ -2689,12 +2691,14 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 
 static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 {
-	return memcpy_fromiovec(data, msg->msg_iov, len);
+	/* XXX: stripping const */
+	return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
 }
 
 static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
 {
-	return memcpy_toiovec(msg->msg_iov, data, len);
+	/* XXX: stripping const */
+	return memcpy_toiovec((struct iovec *)msg->msg_iter.iov, data, len);
 }
 
 struct skb_checksum_ops {
diff --git a/include/linux/socket.h b/include/linux/socket.h
index de52228..048d6d6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -47,8 +47,7 @@ struct linger {
 struct msghdr {
 	void		*msg_name;	/* ptr to socket address structure */
 	int		msg_namelen;	/* size of socket address structure */
-	struct iovec	*msg_iov;	/* scatter/gather array */
-	__kernel_size_t	msg_iovlen;	/* # elements in msg_iov */
+	struct iov_iter	msg_iter;	/* data */
 	void		*msg_control;	/* ancillary data */
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
 	unsigned int	msg_flags;	/* flags on received message */
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 4e23674..bca6fc0 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -911,7 +911,7 @@ static inline int l2cap_chan_no_memcpy_fromiovec(struct l2cap_chan *chan,
 	/* Following is safe since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	struct kvec *vec = (struct kvec *)msg->msg_iov;
+	struct kvec *vec = (struct kvec *)msg->msg_iter.iov;
 
 	while (len > 0) {
 		if (vec->iov_len) {
diff --git a/include/net/udplite.h b/include/net/udplite.h
index d5baaba..ae7c8d1 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -20,7 +20,8 @@ static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 				      int len, int odd, struct sk_buff *skb)
 {
 	struct msghdr *msg = from;
-	return memcpy_fromiovecend(to, msg->msg_iov, offset, len);
+	/* XXX: stripping const */
+	return memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len);
 }
 
 /* Designate sk as UDP-Lite socket */
diff --git a/net/atm/common.c b/net/atm/common.c
index f591129..b84057e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -577,9 +577,6 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	struct atm_vcc *vcc;
 	struct sk_buff *skb;
 	int eff, error;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, m->msg_iov, m->msg_iovlen, size);
 
 	lock_sock(sk);
 	if (sock->state != SS_CONNECTED) {
@@ -634,7 +631,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 		goto out;
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
-	if (copy_from_iter(skb_put(skb, size), size, &from) != size) {
+	if (copy_from_iter(skb_put(skb, size), size, &m->msg_iter) != size) {
 		kfree_skb(skb);
 		error = -EFAULT;
 		goto out;
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index bdcaefd..d8c67a5 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -537,12 +537,12 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
 	 */
 	chan->data = skb;
 
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_iov = (struct iovec *) &iv;
-	msg.msg_iovlen = 1;
 	iv.iov_base = skb->data;
 	iv.iov_len = skb->len;
 
+	memset(&msg, 0, sizeof(msg));
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *) &iv, 1, skb->len);
+
 	err = l2cap_chan_send(chan, &msg, skb->len);
 	if (err > 0) {
 		netdev->stats.tx_bytes += err;
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 5dcade5..716d2a3 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -60,8 +60,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	msg.msg_iov = (struct iovec *) &iv;
-	msg.msg_iovlen = 1;
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)&iv, 1, total_len);
 
 	l2cap_chan_send(chan, &msg, total_len);
 
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 069b76e..21f555b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -268,8 +268,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	msg.msg_iov = (struct iovec *) &iv;
-	msg.msg_iovlen = 2;
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iv, 2, 1 + len);
 
 	l2cap_chan_send(chan, &msg, 1 + len);
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index ac618b0..769b185 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -535,7 +535,7 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto err;
 
 	ret = -EINVAL;
-	if (unlikely(msg->msg_iov->iov_base == NULL))
+	if (unlikely(msg->msg_iter.iov->iov_base == NULL))
 		goto err;
 	noblock = msg->msg_flags & MSG_DONTWAIT;
 
diff --git a/net/compat.c b/net/compat.c
index 062f157..3236b41 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -37,13 +37,14 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 			  struct iovec **iov)
 {
 	compat_uptr_t uaddr, uiov, tmp3;
+	compat_size_t nr_segs;
 	ssize_t err;
 
 	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
 	    __get_user(uaddr, &umsg->msg_name) ||
 	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
 	    __get_user(uiov, &umsg->msg_iov) ||
-	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
+	    __get_user(nr_segs, &umsg->msg_iovlen) ||
 	    __get_user(tmp3, &umsg->msg_control) ||
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
@@ -68,14 +69,15 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 		kmsg->msg_namelen = 0;
 	}
 
-	if (kmsg->msg_iovlen > UIO_MAXIOV)
+	if (nr_segs > UIO_MAXIOV)
 		return -EMSGSIZE;
 
 	err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
-					   compat_ptr(uiov), kmsg->msg_iovlen,
+					   compat_ptr(uiov), nr_segs,
 					   UIO_FASTIOV, *iov, iov);
 	if (err >= 0)
-		kmsg->msg_iov = *iov;
+		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
+			      *iov, nr_segs, err);
 	return err;
 }
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cdedcf1..b50861b2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -755,11 +755,13 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	struct msghdr *msg = from;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (memcpy_fromiovecend(to, msg->msg_iov, offset, len) < 0)
+		/* XXX: stripping const */
+		if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0)
 			return -EFAULT;
 	} else {
 		__wsum csum = 0;
-		if (csum_partial_copy_fromiovecend(to, msg->msg_iov, offset, len, &csum) < 0)
+		/* XXX: stripping const */
+		if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0)
 			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, odd);
 	}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8dd4ae0..c0d82f7 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -811,7 +811,8 @@ back_from_confirm:
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
-	pfh.iov = msg->msg_iov;
+	/* XXX: stripping const */
+	pfh.iov = (struct iovec *)msg->msg_iter.iov;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5d83bd2..0bb68df 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -625,7 +625,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 back_from_confirm:
 
 	if (inet->hdrincl)
-		err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
+		/* XXX: stripping const */
+		err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len,
 				      &rt, msg->msg_flags);
 
 	 else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a96f37..54ba620 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1085,7 +1085,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	struct iovec *iov;
+	const struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int iovlen, flags, err, copied = 0;
@@ -1136,8 +1136,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
 	/* Ok commence sending. */
-	iovlen = msg->msg_iovlen;
-	iov = msg->msg_iov;
+	iovlen = msg->msg_iter.nr_segs;
+	iov = msg->msg_iter.iov;
 	copied = 0;
 
 	err = -EPIPE;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f5bd4bd..3e225b0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3050,7 +3050,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 	syn_data->ip_summed = CHECKSUM_PARTIAL;
 	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
 	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
-					 fo->data->msg_iov, 0, space))) {
+					 fo->data->msg_iter.iov, 0, space))) {
 		kfree_skb(syn_data);
 		goto fallback;
 	}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5b7a1ed..2d31483 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -163,7 +163,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
-	pfh.iov = msg->msg_iov;
+	/* XXX: stripping const */
+	pfh.iov = (struct iovec *)msg->msg_iter.iov;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET6;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 11a9283..ee25631 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -886,7 +886,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 back_from_confirm:
 	if (inet->hdrincl)
-		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
+		/* XXX: stripping const */
+		err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags);
 	else {
 		lock_sock(sk);
 		err = ip6_append_data(sk, raw6_getfrag, &rfv,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 63aa5c8..cc9bcf0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2305,7 +2305,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 
 	if (netlink_tx_is_mmaped(sk) &&
-	    msg->msg_iov->iov_base == NULL) {
+	    msg->msg_iter.iov->iov_base == NULL) {
 		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
 					   siocb);
 		goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index efa8445..ed2e620 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2408,11 +2408,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	unsigned short gso_type = 0;
 	int hlen, tlen;
 	int extra_len = 0;
-	struct iov_iter from;
 	ssize_t n;
 
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
-
 	/*
 	 *	Get and verify the address.
 	 */
@@ -2451,7 +2448,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		len -= vnet_hdr_len;
 
 		err = -EFAULT;
-		n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &from);
+		n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
 		if (n != vnet_hdr_len)
 			goto out_unlock;
 
@@ -2522,7 +2519,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	/* Returns -EFAULT on error */
-	err = skb_copy_datagram_from_iter(skb, offset, &from, len);
+	err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
 	if (err)
 		goto out_free;
 
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 47d7b10..f9ec1ac 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -404,7 +404,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 	struct rds_incoming *inc = NULL;
-	struct iov_iter to;
 
 	/* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
 	timeo = sock_rcvtimeo(sk, nonblock);
@@ -415,6 +414,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		goto out;
 
 	while (1) {
+		struct iov_iter save;
 		/* If there are pending notifications, do those - and nothing else */
 		if (!list_empty(&rs->rs_notify_queue)) {
 			ret = rds_notify_queue_get(rs, msg);
@@ -450,8 +450,8 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
 			 &inc->i_conn->c_faddr,
 			 ntohs(inc->i_hdr.h_sport));
-		iov_iter_init(&to, READ, msg->msg_iov, msg->msg_iovlen, size);
-		ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &to);
+		save = msg->msg_iter;
+		ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
 		if (ret < 0)
 			break;
 
@@ -464,6 +464,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 			rds_inc_put(inc);
 			inc = NULL;
 			rds_stats_inc(s_recv_deliver_raced);
+			msg->msg_iter = save;
 			continue;
 		}
 
diff --git a/net/rds/send.c b/net/rds/send.c
index 4de62ea..40a5629a 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -934,9 +934,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int queued = 0, allocated_mr = 0;
 	int nonblock = msg->msg_flags & MSG_DONTWAIT;
 	long timeo = sock_sndtimeo(sk, nonblock);
-	struct iov_iter from;
 
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, payload_len);
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
 	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
@@ -984,7 +982,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = rds_message_copy_from_user(rm, &from);
+		ret = rds_message_copy_from_user(rm, &msg->msg_iter);
 		if (ret)
 			goto out;
 	}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 0b4b9a7..86e0f10 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,14 +531,12 @@ static int rxrpc_send_data(struct kiocb *iocb,
 	struct rxrpc_skb_priv *sp;
 	unsigned char __user *from;
 	struct sk_buff *skb;
-	struct iovec *iov;
+	const struct iovec *iov;
 	struct sock *sk = &rx->sk;
 	long timeo;
 	bool more;
 	int ret, ioc, segment, copied;
 
-	_enter(",,,{%zu},%zu", msg->msg_iovlen, len);
-
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
 	/* this should be in poll */
@@ -547,8 +545,8 @@ static int rxrpc_send_data(struct kiocb *iocb,
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		return -EPIPE;
 
-	iov = msg->msg_iov;
-	ioc = msg->msg_iovlen - 1;
+	iov = msg->msg_iter.iov;
+	ioc = msg->msg_iter.nr_segs - 1;
 	from = iov->iov_base;
 	segment = iov->iov_len;
 	iov++;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0397ac9..c92f96cd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1609,9 +1609,6 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	__u16 sinfo_flags = 0;
 	long timeo;
 	int err;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, msg_len);
 
 	err = 0;
 	sp = sctp_sk(sk);
@@ -1950,7 +1947,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	/* Break the message into multiple chunks of maximum size. */
-	datamsg = sctp_datamsg_from_user(asoc, sinfo, &from);
+	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
 	if (IS_ERR(datamsg)) {
 		err = PTR_ERR(datamsg);
 		goto out_free;
diff --git a/net/socket.c b/net/socket.c
index f676ac4..8809afc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -689,8 +689,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec;
-	msg->msg_iovlen = num;
+	iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
 	result = sock_sendmsg(sock, msg, size);
 	set_fs(oldfs);
 	return result;
@@ -853,7 +852,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
+	iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
 	result = sock_recvmsg(sock, msg, size, flags);
 	set_fs(oldfs);
 	return result;
@@ -913,8 +912,7 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *)iov;
-	msg->msg_iovlen = nr_segs;
+	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
 	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
@@ -953,8 +951,7 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *)iov;
-	msg->msg_iovlen = nr_segs;
+	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
 		msg->msg_flags |= MSG_EOR;
@@ -1798,8 +1795,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
 	iov.iov_base = buff;
 	iov.iov_len = len;
 	msg.msg_name = NULL;
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
+	iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_namelen = 0;
@@ -1856,10 +1852,9 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
-	msg.msg_iovlen = 1;
-	msg.msg_iov = &iov;
 	iov.iov_len = size;
 	iov.iov_base = ubuf;
+	iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
 	/* Save some cycles and don't copy the address if not needed */
 	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
 	/* We assume all kernel code knows the size of sockaddr_storage */
@@ -1993,13 +1988,14 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
 {
 	struct sockaddr __user *uaddr;
 	struct iovec __user *uiov;
+	size_t nr_segs;
 	ssize_t err;
 
 	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
 	    __get_user(uaddr, &umsg->msg_name) ||
 	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
 	    __get_user(uiov, &umsg->msg_iov) ||
-	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
+	    __get_user(nr_segs, &umsg->msg_iovlen) ||
 	    __get_user(kmsg->msg_control, &umsg->msg_control) ||
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
@@ -2029,14 +2025,15 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
 		kmsg->msg_namelen = 0;
 	}
 
-	if (kmsg->msg_iovlen > UIO_MAXIOV)
+	if (nr_segs > UIO_MAXIOV)
 		return -EMSGSIZE;
 
 	err = rw_copy_check_uvector(save_addr ? READ : WRITE,
-				    uiov, kmsg->msg_iovlen,
+				    uiov, nr_segs,
 				    UIO_FASTIOV, *iov, iov);
 	if (err >= 0)
-		kmsg->msg_iov = *iov;
+		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
+			      *iov, nr_segs, err);
 	return err;
 }
 
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 5b06597..a687b30 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -194,7 +194,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		__skb_queue_tail(list, skb);
 		skb_copy_to_linear_data(skb, mhdr, mhsz);
 		pktpos = skb->data + mhsz;
-		if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iov, offset,
+		if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset,
 						 dsz))
 			return dsz;
 		rc = -EFAULT;
@@ -224,7 +224,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		if (drem < pktrem)
 			pktrem = drem;
 
-		if (memcpy_fromiovecend(pktpos, m->msg_iov, offset, pktrem)) {
+		if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) {
 			rc = -EFAULT;
 			goto error;
 		}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4450d62..8e1b102 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1459,9 +1459,6 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct scm_cookie tmp_scm;
 	int max_level;
 	int data_len = 0;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1519,7 +1516,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
 	skb->len = len;
-	err = skb_copy_datagram_from_iter(skb, 0, &from, len);
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
 	if (err)
 		goto out_free;
 
@@ -1641,9 +1638,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	bool fds_sent = false;
 	int max_level;
 	int data_len;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1700,7 +1694,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		skb_put(skb, size - data_len);
 		skb->data_len = data_len;
 		skb->len = size;
-		err = skb_copy_datagram_from_iter(skb, 0, &from, size);
+		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
 		if (err) {
 			kfree_skb(skb);
 			goto out_err;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 20a0ba3..02d2e52 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1850,7 +1850,8 @@ static ssize_t vmci_transport_stream_enqueue(
 	struct msghdr *msg,
 	size_t len)
 {
-	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg->msg_iov, len, 0);
+	/* XXX: stripping const */
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0);
 }
 
 static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
-- 
2.1.3

^ permalink raw reply related

* [PATCH 19/25] vmci: propagate msghdr all way down to __qp_memcpy_from_queue()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

... and switch it to memcpy_to_msg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 17 +++++++++--------
 include/linux/vmw_vmci_api.h            |  5 +++--
 net/vmw_vsock/vmci_transport.c          |  4 ++--
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 1b7b303..7aaaf51 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -27,6 +27,7 @@
 #include <linux/uio.h>
 #include <linux/wait.h>
 #include <linux/vmalloc.h>
+#include <linux/skbuff.h>
 
 #include "vmci_handle_array.h"
 #include "vmci_queue_pair.h"
@@ -429,11 +430,11 @@ static int __qp_memcpy_from_queue(void *dest,
 			to_copy = size - bytes_copied;
 
 		if (is_iovec) {
-			struct iovec *iov = (struct iovec *)dest;
+			struct msghdr *msg = dest;
 			int err;
 
 			/* The iovec will track bytes_copied internally. */
-			err = memcpy_toiovec(iov, (u8 *)va + page_offset,
+			err = memcpy_to_msg(msg, (u8 *)va + page_offset,
 					     to_copy);
 			if (err != 0) {
 				if (kernel_if->host)
@@ -3264,13 +3265,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
  * of bytes dequeued or < 0 on error.
  */
 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
-			  void *iov,
+			  struct msghdr *msg,
 			  size_t iov_size,
 			  int buf_type)
 {
 	ssize_t result;
 
-	if (!qpair || !iov)
+	if (!qpair)
 		return VMCI_ERROR_INVALID_ARGS;
 
 	qp_lock(qpair);
@@ -3279,7 +3280,7 @@ ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
 		result = qp_dequeue_locked(qpair->produce_q,
 					   qpair->consume_q,
 					   qpair->consume_q_size,
-					   iov, iov_size,
+					   msg, iov_size,
 					   qp_memcpy_from_queue_iov,
 					   true);
 
@@ -3308,13 +3309,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
  * of bytes peeked or < 0 on error.
  */
 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
-			 void *iov,
+			 struct msghdr *msg,
 			 size_t iov_size,
 			 int buf_type)
 {
 	ssize_t result;
 
-	if (!qpair || !iov)
+	if (!qpair)
 		return VMCI_ERROR_INVALID_ARGS;
 
 	qp_lock(qpair);
@@ -3323,7 +3324,7 @@ ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
 		result = qp_dequeue_locked(qpair->produce_q,
 					   qpair->consume_q,
 					   qpair->consume_q_size,
-					   iov, iov_size,
+					   msg, iov_size,
 					   qp_memcpy_from_queue_iov,
 					   false);
 
diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h
index 023430e..5691f75 100644
--- a/include/linux/vmw_vmci_api.h
+++ b/include/linux/vmw_vmci_api.h
@@ -24,6 +24,7 @@
 #define VMCI_KERNEL_API_VERSION_2 2
 #define VMCI_KERNEL_API_VERSION   VMCI_KERNEL_API_VERSION_2
 
+struct msghdr;
 typedef void (vmci_device_shutdown_fn) (void *device_registration,
 					void *user_data);
 
@@ -75,8 +76,8 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size,
 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
 			  void *iov, size_t iov_size, int mode);
 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
-			  void *iov, size_t iov_size, int mode);
-ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, void *iov, size_t iov_size,
+			  struct msghdr *msg, size_t iov_size, int mode);
+ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size,
 			 int mode);
 
 #endif /* !__VMW_VMCI_API_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c1c0389..20a0ba3 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1840,9 +1840,9 @@ static ssize_t vmci_transport_stream_dequeue(
 	int flags)
 {
 	if (flags & MSG_PEEK)
-		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg->msg_iov, len, 0);
+		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0);
 	else
-		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg->msg_iov, len, 0);
+		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0);
 }
 
 static ssize_t vmci_transport_stream_enqueue(
-- 
2.1.3

^ permalink raw reply related

* [PATCH 18/25] switch l2cap ->memcpy_fromiovec() to msghdr
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

it'll die soon enough - now that kvec-backed iov_iter works regardless
of set_fs(), both instances will become copy_from_iter() as soon as
we introduce ->msg_iter...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/bluetooth/l2cap.h | 6 +++---
 net/bluetooth/l2cap_core.c    | 4 ++--
 net/bluetooth/l2cap_sock.c    | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 061e648..4e23674 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -608,7 +608,7 @@ struct l2cap_ops {
 					       unsigned long len, int nb);
 	int			(*memcpy_fromiovec) (struct l2cap_chan *chan,
 						     unsigned char *kdata,
-						     struct iovec *iov,
+						     struct msghdr *msg,
 						     int len);
 };
 
@@ -905,13 +905,13 @@ static inline long l2cap_chan_no_get_sndtimeo(struct l2cap_chan *chan)
 
 static inline int l2cap_chan_no_memcpy_fromiovec(struct l2cap_chan *chan,
 						 unsigned char *kdata,
-						 struct iovec *iov,
+						 struct msghdr *msg,
 						 int len)
 {
 	/* Following is safe since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	struct kvec *vec = (struct kvec *)iov;
+	struct kvec *vec = (struct kvec *)msg->msg_iov;
 
 	while (len > 0) {
 		if (vec->iov_len) {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 8e12731..5201d61 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2097,7 +2097,7 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
 	int sent = 0;
 
 	if (chan->ops->memcpy_fromiovec(chan, skb_put(skb, count),
-					msg->msg_iov, count))
+					msg, count))
 		return -EFAULT;
 
 	sent += count;
@@ -2118,7 +2118,7 @@ static inline int l2cap_skbuff_fromiovec(struct l2cap_chan *chan,
 		*frag = tmp;
 
 		if (chan->ops->memcpy_fromiovec(chan, skb_put(*frag, count),
-						msg->msg_iov, count))
+						msg, count))
 			return -EFAULT;
 
 		sent += count;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index b0efb72..205b298 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1338,9 +1338,9 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
 
 static int l2cap_sock_memcpy_fromiovec_cb(struct l2cap_chan *chan,
 					  unsigned char *kdata,
-					  struct iovec *iov, int len)
+					  struct msghdr *msg, int len)
 {
-	return memcpy_fromiovec(kdata, iov, len);
+	return memcpy_from_msg(kdata, msg, len);
 }
 
 static void l2cap_sock_ready_cb(struct l2cap_chan *chan)
-- 
2.1.3

^ permalink raw reply related

* [PATCH 17/25] switch tcp_sock->ucopy from iovec (ucopy.iov) to msghdr (ucopy.msg)
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/tcp.h  | 2 +-
 net/ipv4/tcp.c       | 2 +-
 net/ipv4/tcp_input.c | 7 +++----
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f566b85..5d9cc9c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -162,7 +162,7 @@ struct tcp_sock {
 	struct {
 		struct sk_buff_head	prequeue;
 		struct task_struct	*task;
-		struct iovec		*iov;
+		struct msghdr		*msg;
 		int			memory;
 		int			len;
 	} ucopy;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a36..4a96f37 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1729,7 +1729,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
 				user_recv = current;
 				tp->ucopy.task = user_recv;
-				tp->ucopy.iov = msg->msg_iov;
+				tp->ucopy.msg = msg;
 			}
 
 			tp->ucopy.len = len;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69de1a1..075ab4d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4421,7 +4421,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			__set_current_state(TASK_RUNNING);
 
 			local_bh_enable();
-			if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
+			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
 				tp->ucopy.len -= chunk;
 				tp->copied_seq += chunk;
 				eaten = (chunk == skb->len);
@@ -4941,10 +4941,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 
 	local_bh_enable();
 	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
+		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
 	else
-		err = skb_copy_and_csum_datagram_iovec(skb, hlen,
-						       tp->ucopy.iov);
+		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
 
 	if (!err) {
 		tp->ucopy.len -= chunk;
-- 
2.1.3

^ permalink raw reply related

* [PATCH 14/25] raw.c: stick msghdr into raw_frag_vec
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

we'll want access to ->msg_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/raw.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 43385a9..5c901eb 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -82,7 +82,7 @@
 #include <linux/uio.h>
 
 struct raw_frag_vec {
-	struct iovec *iov;
+	struct msghdr *msg;
 	union {
 		struct icmphdr icmph;
 		char c[1];
@@ -440,7 +440,7 @@ static int raw_probe_proto_opt(struct raw_frag_vec *rfv, struct flowi4 *fl4)
 	/* We only need the first two bytes. */
 	rfv->hlen = 2;
 
-	err = memcpy_fromiovec(rfv->hdr.c, rfv->iov, rfv->hlen);
+	err = memcpy_from_msg(rfv->hdr.c, rfv->msg, rfv->hlen);
 	if (err)
 		return err;
 
@@ -478,7 +478,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
 
 	offset -= rfv->hlen;
 
-	return ip_generic_getfrag(rfv->iov, to, offset, len, odd, skb);
+	return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
 }
 
 static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
@@ -600,7 +600,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			   daddr, saddr, 0, 0);
 
 	if (!inet->hdrincl) {
-		rfv.iov = msg->msg_iov;
+		rfv.msg = msg;
 		rfv.hlen = 0;
 
 		err = raw_probe_proto_opt(&rfv, &fl4);
-- 
2.1.3

^ permalink raw reply related

* [PATCH 13/25] copy_from_iter_nocache()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

BTW, do we want memcpy_nocache()?

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  1 +
 mm/iov_iter.c       | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index c567655..bd8569a 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -83,6 +83,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 88c052e..a1599ca 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -399,6 +399,27 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(copy_from_iter);
 
+size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
+					 v.iov_base, v.iov_len),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL(copy_from_iter_nocache);
+
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
-- 
2.1.3

^ permalink raw reply related

* [PATCH 12/25] new helper: iov_iter_kvec()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

initialization of kvec-backed iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  2 ++
 mm/iov_iter.c       | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 28ed2d9..c567655 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -87,6 +87,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
+void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *iov,
+			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 1d2cdeb..88c052e 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -479,6 +479,19 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
+void iov_iter_kvec(struct iov_iter *i, int direction,
+			const struct kvec *iov, unsigned long nr_segs,
+			size_t count)
+{
+	BUG_ON(!(direction & ITER_KVEC));
+	i->type = direction;
+	i->kvec = (struct kvec *)iov;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_kvec);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
 	unsigned long res = 0;
-- 
2.1.3

^ permalink raw reply related

* [PATCH 11/25] csum_and_copy_..._iter()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  2 ++
 mm/iov_iter.c       | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e16945..28ed2d9 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -124,6 +124,8 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
 	i->count = count;
 }
+size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 1618e37..1d2cdeb 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -3,6 +3,7 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <net/checksum.h>
 
 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
 	size_t left;					\
@@ -586,6 +587,94 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
+size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
+			       struct iov_iter *i)
+{
+	char *to = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	sum = *csum;
+	iterate_and_advance(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_from_user(v.iov_base, 
+					       (to += v.iov_len) - v.iov_len,
+					       v.iov_len, 0, &err);
+		if (!err) {
+			sum = csum_block_add(sum, next, off);
+			off += v.iov_len;
+		}
+		err ? v.iov_len : 0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck(p + v.bv_offset,
+						 (to += v.bv_len) - v.bv_len,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck(v.iov_base,
+						 (to += v.iov_len) - v.iov_len,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	return bytes;
+}
+EXPORT_SYMBOL(csum_and_copy_from_iter);
+
+size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
+			     struct iov_iter *i)
+{
+	char *from = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	sum = *csum;
+	iterate_and_advance(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
+					     v.iov_base, 
+					     v.iov_len, 0, &err);
+		if (!err) {
+			sum = csum_block_add(sum, next, off);
+			off += v.iov_len;
+		}
+		err ? v.iov_len : 0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
+						 p + v.bv_offset,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
+						 v.iov_base,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	return bytes;
+}
+EXPORT_SYMBOL(csum_and_copy_to_iter);
+
 int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
 	size_t size = i->count;
-- 
2.1.3

^ permalink raw reply related

* [PATCH 10/25] iov_iter.c: handle ITER_KVEC directly
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

... without bothering with copy_..._user()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  1 +
 mm/iov_iter.c       | 82 ++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 9b15814..6e16945 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -31,6 +31,7 @@ struct iov_iter {
 	size_t count;
 	union {
 		const struct iovec *iov;
+		const struct kvec *kvec;
 		const struct bio_vec *bvec;
 	};
 	unsigned long nr_segs;
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 66665449..1618e37 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -32,6 +32,29 @@
 	n = wanted - n;					\
 }
 
+#define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
+	size_t wanted = n;				\
+	__p = i->kvec;					\
+	__v.iov_len = min(n, __p->iov_len - skip);	\
+	if (likely(__v.iov_len)) {			\
+		__v.iov_base = __p->iov_base + skip;	\
+		(void)(STEP);				\
+		skip += __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	while (unlikely(n)) {				\
+		__p++;					\
+		__v.iov_len = min(n, __p->iov_len);	\
+		if (unlikely(!__v.iov_len))		\
+			continue;			\
+		__v.iov_base = __p->iov_base;		\
+		(void)(STEP);				\
+		skip = __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	n = wanted;					\
+}
+
 #define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
 	size_t wanted = n;				\
 	__p = i->bvec;					\
@@ -57,12 +80,16 @@
 	n = wanted;					\
 }
 
-#define iterate_all_kinds(i, n, v, I, B) {			\
+#define iterate_all_kinds(i, n, v, I, B, K) {			\
 	size_t skip = i->iov_offset;				\
 	if (unlikely(i->type & ITER_BVEC)) {			\
 		const struct bio_vec *bvec;			\
 		struct bio_vec v;				\
 		iterate_bvec(i, n, v, bvec, skip, (B))		\
+	} else if (unlikely(i->type & ITER_KVEC)) {		\
+		const struct kvec *kvec;			\
+		struct kvec v;					\
+		iterate_kvec(i, n, v, kvec, skip, (K))		\
 	} else {						\
 		const struct iovec *iov;			\
 		struct iovec v;					\
@@ -70,7 +97,7 @@
 	}							\
 }
 
-#define iterate_and_advance(i, n, v, I, B) {			\
+#define iterate_and_advance(i, n, v, I, B, K) {			\
 	size_t skip = i->iov_offset;				\
 	if (unlikely(i->type & ITER_BVEC)) {			\
 		const struct bio_vec *bvec;			\
@@ -82,6 +109,16 @@
 		}						\
 		i->nr_segs -= bvec - i->bvec;			\
 		i->bvec = bvec;					\
+	} else if (unlikely(i->type & ITER_KVEC)) {		\
+		const struct kvec *kvec;			\
+		struct kvec v;					\
+		iterate_kvec(i, n, v, kvec, skip, (K))		\
+		if (skip == kvec->iov_len) {			\
+			kvec++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= kvec - i->kvec;			\
+		i->kvec = kvec;					\
 	} else {						\
 		const struct iovec *iov;			\
 		struct iovec v;					\
@@ -270,7 +307,7 @@ done:
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-	if (!(i->type & ITER_BVEC)) {
+	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
 		char __user *buf = i->iov->iov_base + i->iov_offset;
 		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
 		return fault_in_pages_readable(buf, bytes);
@@ -284,10 +321,14 @@ void iov_iter_init(struct iov_iter *i, int direction,
 			size_t count)
 {
 	/* It will get better.  Eventually... */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (segment_eq(get_fs(), KERNEL_DS)) {
 		direction |= ITER_KVEC;
-	i->type = direction;
-	i->iov = iov;
+		i->type = direction;
+		i->kvec = (struct kvec *)iov;
+	} else {
+		i->type = direction;
+		i->iov = iov;
+	}
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
 	i->count = count;
@@ -328,7 +369,8 @@ size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
 			       v.iov_len),
 		memcpy_to_page(v.bv_page, v.bv_offset,
-			       (from += v.bv_len) - v.bv_len, v.bv_len)
+			       (from += v.bv_len) - v.bv_len, v.bv_len),
+		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
 	)
 
 	return bytes;
@@ -348,7 +390,8 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
 				 v.iov_len),
 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len)
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 	)
 
 	return bytes;
@@ -371,7 +414,7 @@ EXPORT_SYMBOL(copy_page_to_iter);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC) {
+	if (i->type & (ITER_BVEC|ITER_KVEC)) {
 		void *kaddr = kmap_atomic(page);
 		size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
@@ -391,7 +434,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 
 	iterate_and_advance(i, bytes, v,
 		__clear_user(v.iov_base, v.iov_len),
-		memzero_page(v.bv_page, v.bv_offset, v.bv_len)
+		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
+		memset(v.iov_base, 0, v.iov_len)
 	)
 
 	return bytes;
@@ -406,7 +450,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
 					  v.iov_base, v.iov_len),
 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len)
+				 v.bv_offset, v.bv_len),
+		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 	)
 	kunmap_atomic(kaddr);
 	return bytes;
@@ -415,7 +460,7 @@ EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
-	iterate_and_advance(i, size, v, 0, 0)
+	iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
 
@@ -443,7 +488,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 
 	iterate_all_kinds(i, size, v,
 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
-		res |= v.bv_offset | v.bv_len
+		res |= v.bv_offset | v.bv_len,
+		res |= (unsigned long)v.iov_base | v.iov_len
 	)
 	return res;
 }
@@ -478,6 +524,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 		*start = v.bv_offset;
 		get_page(*pages = v.bv_page);
 		return v.bv_len;
+	}),({
+		return -EFAULT;
 	})
 	)
 	return 0;
@@ -530,6 +578,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 			return -ENOMEM;
 		get_page(*p = v.bv_page);
 		return v.bv_len;
+	}),({
+		return -EFAULT;
 	})
 	)
 	return 0;
@@ -554,6 +604,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 		npages++;
 		if (npages >= maxpages)
 			return maxpages;
+	}),({
+		unsigned long p = (unsigned long)v.iov_base;
+		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
+			- p / PAGE_SIZE;
+		if (npages >= maxpages)
+			return maxpages;
 	})
 	)
 	return npages;
-- 
2.1.3

^ permalink raw reply related

* [PATCH 09/25] iov_iter.c: convert copy_to_iter() to iterate_and_advance
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 91 ++++++-----------------------------------------------------
 1 file changed, 9 insertions(+), 82 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 791429d..66665449 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -97,51 +97,6 @@
 	i->iov_offset = skip;					\
 }
 
-static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
-{
-	size_t skip, copy, left, wanted;
-	const struct iovec *iov;
-	char __user *buf;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	iov = i->iov;
-	skip = i->iov_offset;
-	buf = iov->iov_base + skip;
-	copy = min(bytes, iov->iov_len - skip);
-
-	left = __copy_to_user(buf, from, copy);
-	copy -= left;
-	skip += copy;
-	from += copy;
-	bytes -= copy;
-	while (unlikely(!left && bytes)) {
-		iov++;
-		buf = iov->iov_base;
-		copy = min(bytes, iov->iov_len);
-		left = __copy_to_user(buf, from, copy);
-		copy -= left;
-		skip = copy;
-		from += copy;
-		bytes -= copy;
-	}
-
-	if (skip == iov->iov_len) {
-		iov++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= iov - i->iov;
-	i->iov = iov;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
-
 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -360,51 +315,23 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
 	kunmap_atomic(addr);
 }
 
-static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i)
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
-	size_t skip, copy, wanted;
-	const struct bio_vec *bvec;
-
+	char *from = addr;
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
 
 	if (unlikely(!bytes))
 		return 0;
 
-	wanted = bytes;
-	bvec = i->bvec;
-	skip = i->iov_offset;
-	copy = min_t(size_t, bytes, bvec->bv_len - skip);
-
-	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
-	skip += copy;
-	from += copy;
-	bytes -= copy;
-	while (bytes) {
-		bvec++;
-		copy = min(bytes, (size_t)bvec->bv_len);
-		memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
-		skip = copy;
-		from += copy;
-		bytes -= copy;
-	}
-	if (skip == bvec->bv_len) {
-		bvec++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= bvec - i->bvec;
-	i->bvec = bvec;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
+	iterate_and_advance(i, bytes, v,
+		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
+			       v.iov_len),
+		memcpy_to_page(v.bv_page, v.bv_offset,
+			       (from += v.bv_len) - v.bv_len, v.bv_len)
+	)
 
-size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
-	if (i->type & ITER_BVEC)
-		return copy_to_iter_bvec(addr, bytes, i);
-	else
-		return copy_to_iter_iovec(addr, bytes, i);
+	return bytes;
 }
 EXPORT_SYMBOL(copy_to_iter);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 08/25] iov_iter.c: convert copy_from_iter() to iterate_and_advance
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 106 +++++++++-------------------------------------------------
 1 file changed, 15 insertions(+), 91 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 17b7144..791429d 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -142,51 +142,6 @@ static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i)
-{
-	size_t skip, copy, left, wanted;
-	const struct iovec *iov;
-	char __user *buf;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	iov = i->iov;
-	skip = i->iov_offset;
-	buf = iov->iov_base + skip;
-	copy = min(bytes, iov->iov_len - skip);
-
-	left = __copy_from_user(to, buf, copy);
-	copy -= left;
-	skip += copy;
-	to += copy;
-	bytes -= copy;
-	while (unlikely(!left && bytes)) {
-		iov++;
-		buf = iov->iov_base;
-		copy = min(bytes, iov->iov_len);
-		left = __copy_from_user(to, buf, copy);
-		copy -= left;
-		skip = copy;
-		to += copy;
-		bytes -= copy;
-	}
-
-	if (skip == iov->iov_len) {
-		iov++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= iov - i->iov;
-	i->iov = iov;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
-
 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -444,48 +399,6 @@ static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i)
-{
-	size_t skip, copy, wanted;
-	const struct bio_vec *bvec;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	bvec = i->bvec;
-	skip = i->iov_offset;
-
-	copy = min(bytes, bvec->bv_len - skip);
-
-	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
-
-	to += copy;
-	skip += copy;
-	bytes -= copy;
-
-	while (bytes) {
-		bvec++;
-		copy = min(bytes, (size_t)bvec->bv_len);
-		memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
-		skip = copy;
-		to += copy;
-		bytes -= copy;
-	}
-	if (skip == bvec->bv_len) {
-		bvec++;
-		skip = 0;
-	}
-	i->count -= wanted;
-	i->nr_segs -= bvec - i->bvec;
-	i->bvec = bvec;
-	i->iov_offset = skip;
-	return wanted;
-}
-
 size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (i->type & ITER_BVEC)
@@ -497,10 +410,21 @@ EXPORT_SYMBOL(copy_to_iter);
 
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC)
-		return copy_from_iter_bvec(addr, bytes, i);
-	else
-		return copy_from_iter_iovec(addr, bytes, i);
+	char *to = addr;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
+				 v.iov_len),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len)
+	)
+
+	return bytes;
 }
 EXPORT_SYMBOL(copy_from_iter);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 07/25] iov_iter.c: get rid of bvec_copy_page_{to,from}_iter()
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Just have copy_page_{to,from}_iter() fall back to kmap_atomic +
copy_{to,from}_iter() + kunmap_atomic() in ITER_BVEC case.  As
the matter of fact, that's what we want to do for any iov_iter
kind that isn't blocking - e.g. ITER_KVEC will also go that way
once we recognize it on iov_iter.c primitives level

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 60 ++++++++++++++++++++++++-----------------------------------
 1 file changed, 24 insertions(+), 36 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 39ad713..17b7144 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -486,30 +486,33 @@ static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i)
 	return wanted;
 }
 
-static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
-					size_t bytes, struct iov_iter *i)
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
-	void *kaddr = kmap_atomic(page);
-	size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
-	kunmap_atomic(kaddr);
-	return wanted;
+	if (i->type & ITER_BVEC)
+		return copy_to_iter_bvec(addr, bytes, i);
+	else
+		return copy_to_iter_iovec(addr, bytes, i);
 }
+EXPORT_SYMBOL(copy_to_iter);
 
-static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
-					size_t bytes, struct iov_iter *i)
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
-	void *kaddr = kmap_atomic(page);
-	size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i);
-	kunmap_atomic(kaddr);
-	return wanted;
+	if (i->type & ITER_BVEC)
+		return copy_from_iter_bvec(addr, bytes, i);
+	else
+		return copy_from_iter_iovec(addr, bytes, i);
 }
+EXPORT_SYMBOL(copy_from_iter);
 
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC)
-		return copy_page_to_iter_bvec(page, offset, bytes, i);
-	else
+	if (i->type & (ITER_BVEC|ITER_KVEC)) {
+		void *kaddr = kmap_atomic(page);
+		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
+		kunmap_atomic(kaddr);
+		return wanted;
+	} else
 		return copy_page_to_iter_iovec(page, offset, bytes, i);
 }
 EXPORT_SYMBOL(copy_page_to_iter);
@@ -517,31 +520,16 @@ EXPORT_SYMBOL(copy_page_to_iter);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC)
-		return copy_page_from_iter_bvec(page, offset, bytes, i);
-	else
+	if (i->type & ITER_BVEC) {
+		void *kaddr = kmap_atomic(page);
+		size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
+		kunmap_atomic(kaddr);
+		return wanted;
+	} else
 		return copy_page_from_iter_iovec(page, offset, bytes, i);
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
-size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
-	if (i->type & ITER_BVEC)
-		return copy_to_iter_bvec(addr, bytes, i);
-	else
-		return copy_to_iter_iovec(addr, bytes, i);
-}
-EXPORT_SYMBOL(copy_to_iter);
-
-size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
-	if (i->type & ITER_BVEC)
-		return copy_from_iter_bvec(addr, bytes, i);
-	else
-		return copy_from_iter_iovec(addr, bytes, i);
-}
-EXPORT_SYMBOL(copy_from_iter);
-
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(bytes > i->count))
-- 
2.1.3

^ permalink raw reply related

* [PATCH 06/25] iov_iter.c: convert iov_iter_zero() to iterate_and_advance
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 98 ++++++++---------------------------------------------------
 1 file changed, 12 insertions(+), 86 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 3214b9b..39ad713 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -349,50 +349,6 @@ done:
 	return wanted - bytes;
 }
 
-static size_t zero_iovec(size_t bytes, struct iov_iter *i)
-{
-	size_t skip, copy, left, wanted;
-	const struct iovec *iov;
-	char __user *buf;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	iov = i->iov;
-	skip = i->iov_offset;
-	buf = iov->iov_base + skip;
-	copy = min(bytes, iov->iov_len - skip);
-
-	left = __clear_user(buf, copy);
-	copy -= left;
-	skip += copy;
-	bytes -= copy;
-
-	while (unlikely(!left && bytes)) {
-		iov++;
-		buf = iov->iov_base;
-		copy = min(bytes, iov->iov_len);
-		left = __clear_user(buf, copy);
-		copy -= left;
-		skip = copy;
-		bytes -= copy;
-	}
-
-	if (skip == iov->iov_len) {
-		iov++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= iov - i->iov;
-	i->iov = iov;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
-
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
  * of bytes. Returns 0 on success, or non-zero if the memory could not be
@@ -548,43 +504,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
 	return wanted;
 }
 
-static size_t zero_bvec(size_t bytes, struct iov_iter *i)
-{
-	size_t skip, copy, wanted;
-	const struct bio_vec *bvec;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	bvec = i->bvec;
-	skip = i->iov_offset;
-	copy = min_t(size_t, bytes, bvec->bv_len - skip);
-
-	memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy);
-	skip += copy;
-	bytes -= copy;
-	while (bytes) {
-		bvec++;
-		copy = min(bytes, (size_t)bvec->bv_len);
-		memzero_page(bvec->bv_page, bvec->bv_offset, copy);
-		skip = copy;
-		bytes -= copy;
-	}
-	if (skip == bvec->bv_len) {
-		bvec++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= bvec - i->bvec;
-	i->bvec = bvec;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
-
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -625,11 +544,18 @@ EXPORT_SYMBOL(copy_from_iter);
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC) {
-		return zero_bvec(bytes, i);
-	} else {
-		return zero_iovec(bytes, i);
-	}
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__clear_user(v.iov_base, v.iov_len),
+		memzero_page(v.bv_page, v.bv_offset, v.bv_len)
+	)
+
+	return bytes;
 }
 EXPORT_SYMBOL(iov_iter_zero);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 05/25] iov_iter.c: convert iov_iter_get_pages_alloc() to iterate_all_kinds
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 107 ++++++++++++++++++++++++----------------------------------
 1 file changed, 45 insertions(+), 62 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 75e29ef..3214b9b 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -428,43 +428,6 @@ void iov_iter_init(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize,
-		   size_t *start)
-{
-	size_t offset = i->iov_offset;
-	const struct iovec *iov = i->iov;
-	size_t len;
-	unsigned long addr;
-	void *p;
-	int n;
-	int res;
-
-	len = iov->iov_len - offset;
-	if (len > i->count)
-		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
-	addr = (unsigned long)iov->iov_base + offset;
-	len += *start = addr & (PAGE_SIZE - 1);
-	addr &= ~(PAGE_SIZE - 1);
-	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
-	
-	p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
-	if (!p)
-		p = vmalloc(n * sizeof(struct page *));
-	if (!p)
-		return -ENOMEM;
-
-	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
-	if (unlikely(res < 0)) {
-		kvfree(p);
-		return res;
-	}
-	*pages = p;
-	return (res == n ? len : res * PAGE_SIZE) - *start;
-}
-
 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
 {
 	char *from = kmap_atomic(page);
@@ -622,27 +585,6 @@ static size_t zero_bvec(size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize,
-		   size_t *start)
-{
-	const struct bio_vec *bvec = i->bvec;
-	size_t len = bvec->bv_len - i->iov_offset;
-	if (len > i->count)
-		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
-	*start = bvec->bv_offset + i->iov_offset;
-
-	*pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
-	if (!*pages)
-		return -ENOMEM;
-
-	get_page(**pages = bvec->bv_page);
-
-	return len;
-}
-
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -777,14 +719,55 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
+static struct page **get_pages_array(size_t n)
+{
+	struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
+	if (!p)
+		p = vmalloc(n * sizeof(struct page *));
+	return p;
+}
+
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
 {
-	if (i->type & ITER_BVEC)
-		return get_pages_alloc_bvec(i, pages, maxsize, start);
-	else
-		return get_pages_alloc_iovec(i, pages, maxsize, start);
+	struct page **p;
+
+	if (maxsize > i->count)
+		maxsize = i->count;
+
+	if (!maxsize)
+		return 0;
+
+	iterate_all_kinds(i, maxsize, v, ({
+		unsigned long addr = (unsigned long)v.iov_base;
+		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
+		int n;
+		int res;
+
+		addr &= ~(PAGE_SIZE - 1);
+		n = DIV_ROUND_UP(len, PAGE_SIZE);
+		p = get_pages_array(n);
+		if (!p)
+			return -ENOMEM;
+		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+		if (unlikely(res < 0)) {
+			kvfree(p);
+			return res;
+		}
+		*pages = p;
+		return (res == n ? len : res * PAGE_SIZE) - *start;
+	0;}),({
+		/* can't be more than PAGE_SIZE */
+		*start = v.bv_offset;
+		*pages = p = get_pages_array(1);
+		if (!p)
+			return -ENOMEM;
+		get_page(*p = v.bv_page);
+		return v.bv_len;
+	})
+	)
+	return 0;
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 04/25] iov_iter.c: convert iov_iter_get_pages() to iterate_all_kinds
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 78 +++++++++++++++++++++--------------------------------------
 1 file changed, 28 insertions(+), 50 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index bc666e7..75e29ef 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -428,34 +428,6 @@ void iov_iter_init(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-static ssize_t get_pages_iovec(struct iov_iter *i,
-		   struct page **pages, size_t maxsize, unsigned maxpages,
-		   size_t *start)
-{
-	size_t offset = i->iov_offset;
-	const struct iovec *iov = i->iov;
-	size_t len;
-	unsigned long addr;
-	int n;
-	int res;
-
-	len = iov->iov_len - offset;
-	if (len > i->count)
-		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
-	addr = (unsigned long)iov->iov_base + offset;
-	len += *start = addr & (PAGE_SIZE - 1);
-	if (len > maxpages * PAGE_SIZE)
-		len = maxpages * PAGE_SIZE;
-	addr &= ~(PAGE_SIZE - 1);
-	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
-	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
-	if (unlikely(res < 0))
-		return res;
-	return (res == n ? len : res * PAGE_SIZE) - *start;
-}
-
 static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
@@ -650,24 +622,6 @@ static size_t zero_bvec(size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static ssize_t get_pages_bvec(struct iov_iter *i,
-		   struct page **pages, size_t maxsize, unsigned maxpages,
-		   size_t *start)
-{
-	const struct bio_vec *bvec = i->bvec;
-	size_t len = bvec->bv_len - i->iov_offset;
-	if (len > i->count)
-		len = i->count;
-	if (len > maxsize)
-		len = maxsize;
-	/* can't be more than PAGE_SIZE */
-	*start = bvec->bv_offset + i->iov_offset;
-
-	get_page(*pages = bvec->bv_page);
-
-	return len;
-}
-
 static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
@@ -792,10 +746,34 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
 {
-	if (i->type & ITER_BVEC)
-		return get_pages_bvec(i, pages, maxsize, maxpages, start);
-	else
-		return get_pages_iovec(i, pages, maxsize, maxpages, start);
+	if (maxsize > i->count)
+		maxsize = i->count;
+
+	if (!maxsize)
+		return 0;
+
+	iterate_all_kinds(i, maxsize, v, ({
+		unsigned long addr = (unsigned long)v.iov_base;
+		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
+		int n;
+		int res;
+
+		if (len > maxpages * PAGE_SIZE)
+			len = maxpages * PAGE_SIZE;
+		addr &= ~(PAGE_SIZE - 1);
+		n = DIV_ROUND_UP(len, PAGE_SIZE);
+		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+		if (unlikely(res < 0))
+			return res;
+		return (res == n ? len : res * PAGE_SIZE) - *start;
+	0;}),({
+		/* can't be more than PAGE_SIZE */
+		*start = v.bv_offset;
+		get_page(*pages = v.bv_page);
+		return v.bv_len;
+	})
+	)
+	return 0;
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 03/25] iov_iter.c: convert iov_iter_npages() to iterate_all_kinds
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 73 ++++++++++++++++-------------------------------------------
 1 file changed, 19 insertions(+), 54 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index e91bf0a..bc666e7 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -493,32 +493,6 @@ static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
 
-static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
-{
-	size_t offset = i->iov_offset;
-	size_t size = i->count;
-	const struct iovec *iov = i->iov;
-	int npages = 0;
-	int n;
-
-	for (n = 0; size && n < i->nr_segs; n++, iov++) {
-		unsigned long addr = (unsigned long)iov->iov_base + offset;
-		size_t len = iov->iov_len - offset;
-		offset = 0;
-		if (unlikely(!len))	/* empty segment */
-			continue;
-		if (len > size)
-			len = size;
-		npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE
-			  - addr / PAGE_SIZE;
-		if (npages >= maxpages)	/* don't bother going further */
-			return maxpages;
-		size -= len;
-		offset = 0;
-	}
-	return min(npages, maxpages);
-}
-
 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
 {
 	char *from = kmap_atomic(page);
@@ -715,30 +689,6 @@ static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
 	return len;
 }
 
-static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
-{
-	size_t offset = i->iov_offset;
-	size_t size = i->count;
-	const struct bio_vec *bvec = i->bvec;
-	int npages = 0;
-	int n;
-
-	for (n = 0; size && n < i->nr_segs; n++, bvec++) {
-		size_t len = bvec->bv_len - offset;
-		offset = 0;
-		if (unlikely(!len))	/* empty segment */
-			continue;
-		if (len > size)
-			len = size;
-		npages++;
-		if (npages >= maxpages)	/* don't bother going further */
-			return maxpages;
-		size -= len;
-		offset = 0;
-	}
-	return min(npages, maxpages);
-}
-
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -862,9 +812,24 @@ EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
 int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
-	if (i->type & ITER_BVEC)
-		return iov_iter_npages_bvec(i, maxpages);
-	else
-		return iov_iter_npages_iovec(i, maxpages);
+	size_t size = i->count;
+	int npages = 0;
+
+	if (!size)
+		return 0;
+
+	iterate_all_kinds(i, size, v, ({
+		unsigned long p = (unsigned long)v.iov_base;
+		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
+			- p / PAGE_SIZE;
+		if (npages >= maxpages)
+			return maxpages;
+	0;}),({
+		npages++;
+		if (npages >= maxpages)
+			return maxpages;
+	})
+	)
+	return npages;
 }
 EXPORT_SYMBOL(iov_iter_npages);
-- 
2.1.3

^ permalink raw reply related

* [PATCH 02/25] iov_iter.c: iterate_and_advance
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

same as iterate_all_kinds, but iterator is moved to the position past
the last byte we'd handled.

iov_iter_advance() converted to it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 104 ++++++++++++++++------------------------------------------
 1 file changed, 28 insertions(+), 76 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 798fcb4..e91bf0a 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -70,6 +70,33 @@
 	}							\
 }
 
+#define iterate_and_advance(i, n, v, I, B) {			\
+	size_t skip = i->iov_offset;				\
+	if (unlikely(i->type & ITER_BVEC)) {			\
+		const struct bio_vec *bvec;			\
+		struct bio_vec v;				\
+		iterate_bvec(i, n, v, bvec, skip, (B))		\
+		if (skip == bvec->bv_len) {			\
+			bvec++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= bvec - i->bvec;			\
+		i->bvec = bvec;					\
+	} else {						\
+		const struct iovec *iov;			\
+		struct iovec v;					\
+		iterate_iovec(i, n, v, iov, skip, (I))		\
+		if (skip == iov->iov_len) {			\
+			iov++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= iov - i->iov;			\
+		i->iov = iov;					\
+	}							\
+	i->count -= n;						\
+	i->iov_offset = skip;					\
+}
+
 static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -366,42 +393,6 @@ static size_t zero_iovec(size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static void advance_iovec(struct iov_iter *i, size_t bytes)
-{
-	BUG_ON(i->count < bytes);
-
-	if (likely(i->nr_segs == 1)) {
-		i->iov_offset += bytes;
-		i->count -= bytes;
-	} else {
-		const struct iovec *iov = i->iov;
-		size_t base = i->iov_offset;
-		unsigned long nr_segs = i->nr_segs;
-
-		/*
-		 * The !iov->iov_len check ensures we skip over unlikely
-		 * zero-length segments (without overruning the iovec).
-		 */
-		while (bytes || unlikely(i->count && !iov->iov_len)) {
-			int copy;
-
-			copy = min(bytes, iov->iov_len - base);
-			BUG_ON(!i->count || i->count < copy);
-			i->count -= copy;
-			bytes -= copy;
-			base += copy;
-			if (iov->iov_len == base) {
-				iov++;
-				nr_segs--;
-				base = 0;
-			}
-		}
-		i->iov = iov;
-		i->iov_offset = base;
-		i->nr_segs = nr_segs;
-	}
-}
-
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
  * of bytes. Returns 0 on success, or non-zero if the memory could not be
@@ -685,42 +676,6 @@ static size_t zero_bvec(size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static void advance_bvec(struct iov_iter *i, size_t bytes)
-{
-	BUG_ON(i->count < bytes);
-
-	if (likely(i->nr_segs == 1)) {
-		i->iov_offset += bytes;
-		i->count -= bytes;
-	} else {
-		const struct bio_vec *bvec = i->bvec;
-		size_t base = i->iov_offset;
-		unsigned long nr_segs = i->nr_segs;
-
-		/*
-		 * The !iov->iov_len check ensures we skip over unlikely
-		 * zero-length segments (without overruning the iovec).
-		 */
-		while (bytes || unlikely(i->count && !bvec->bv_len)) {
-			int copy;
-
-			copy = min(bytes, bvec->bv_len - base);
-			BUG_ON(!i->count || i->count < copy);
-			i->count -= copy;
-			bytes -= copy;
-			base += copy;
-			if (bvec->bv_len == base) {
-				bvec++;
-				nr_segs--;
-				base = 0;
-			}
-		}
-		i->bvec = bvec;
-		i->iov_offset = base;
-		i->nr_segs = nr_segs;
-	}
-}
-
 static ssize_t get_pages_bvec(struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
@@ -849,10 +804,7 @@ EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
-	if (i->type & ITER_BVEC)
-		advance_bvec(i, size);
-	else
-		advance_iovec(i, size);
+	iterate_and_advance(i, size, v, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
 
-- 
2.1.3

^ permalink raw reply related

* [PATCH 01/25] iov_iter.c: macros for iterating over iov_iter
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>

From: Al Viro <viro@zeniv.linux.org.uk>

iterate_all_kinds(iter, size, ident, step_iovec, step_bvec)
iterates through the ranges covered by iter (up to size bytes total),
repeating step_iovec or step_bvec for each of those.  ident is
declared in expansion of that thing, either as struct iovec or
struct bvec, and it contains the range we are currently looking
at.  step_bvec should be a void expression, step_iovec - a size_t
one, with non-zero meaning "stop here, that many bytes from this
range left".  In the end, the amount actually handled is stored
in size.

iov_iter_copy_from_user_atomic() and iov_iter_alignment() converted
to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 mm/iov_iter.c | 212 ++++++++++++++++++++++++----------------------------------
 1 file changed, 86 insertions(+), 126 deletions(-)

diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index e34a3cb..798fcb4 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -4,6 +4,72 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+#define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
+	size_t left;					\
+	size_t wanted = n;				\
+	__p = i->iov;					\
+	__v.iov_len = min(n, __p->iov_len - skip);	\
+	if (likely(__v.iov_len)) {			\
+		__v.iov_base = __p->iov_base + skip;	\
+		left = (STEP);				\
+		__v.iov_len -= left;			\
+		skip += __v.iov_len;			\
+		n -= __v.iov_len;			\
+	} else {					\
+		left = 0;				\
+	}						\
+	while (unlikely(!left && n)) {			\
+		__p++;					\
+		__v.iov_len = min(n, __p->iov_len);	\
+		if (unlikely(!__v.iov_len))		\
+			continue;			\
+		__v.iov_base = __p->iov_base;		\
+		left = (STEP);				\
+		__v.iov_len -= left;			\
+		skip = __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	n = wanted - n;					\
+}
+
+#define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
+	size_t wanted = n;				\
+	__p = i->bvec;					\
+	__v.bv_len = min_t(size_t, n, __p->bv_len - skip);	\
+	if (likely(__v.bv_len)) {			\
+		__v.bv_page = __p->bv_page;		\
+		__v.bv_offset = __p->bv_offset + skip; 	\
+		(void)(STEP);				\
+		skip += __v.bv_len;			\
+		n -= __v.bv_len;			\
+	}						\
+	while (unlikely(n)) {				\
+		__p++;					\
+		__v.bv_len = min_t(size_t, n, __p->bv_len);	\
+		if (unlikely(!__v.bv_len))		\
+			continue;			\
+		__v.bv_page = __p->bv_page;		\
+		__v.bv_offset = __p->bv_offset;		\
+		(void)(STEP);				\
+		skip = __v.bv_len;			\
+		n -= __v.bv_len;			\
+	}						\
+	n = wanted;					\
+}
+
+#define iterate_all_kinds(i, n, v, I, B) {			\
+	size_t skip = i->iov_offset;				\
+	if (unlikely(i->type & ITER_BVEC)) {			\
+		const struct bio_vec *bvec;			\
+		struct bio_vec v;				\
+		iterate_bvec(i, n, v, bvec, skip, (B))		\
+	} else {						\
+		const struct iovec *iov;			\
+		struct iovec v;					\
+		iterate_iovec(i, n, v, iov, skip, (I))		\
+	}							\
+}
+
 static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -300,54 +366,6 @@ static size_t zero_iovec(size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static size_t __iovec_copy_from_user_inatomic(char *vaddr,
-			const struct iovec *iov, size_t base, size_t bytes)
-{
-	size_t copied = 0, left = 0;
-
-	while (bytes) {
-		char __user *buf = iov->iov_base + base;
-		int copy = min(bytes, iov->iov_len - base);
-
-		base = 0;
-		left = __copy_from_user_inatomic(vaddr, buf, copy);
-		copied += copy;
-		bytes -= copy;
-		vaddr += copy;
-		iov++;
-
-		if (unlikely(left))
-			break;
-	}
-	return copied - left;
-}
-
-/*
- * Copy as much as we can into the page and return the number of bytes which
- * were successfully copied.  If a fault is encountered then return the number of
- * bytes which were copied.
- */
-static size_t copy_from_user_atomic_iovec(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-	char *kaddr;
-	size_t copied;
-
-	kaddr = kmap_atomic(page);
-	if (likely(i->nr_segs == 1)) {
-		int left;
-		char __user *buf = i->iov->iov_base + i->iov_offset;
-		left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
-		copied = bytes - left;
-	} else {
-		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-						i->iov, i->iov_offset, bytes);
-	}
-	kunmap_atomic(kaddr);
-
-	return copied;
-}
-
 static void advance_iovec(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
@@ -404,30 +422,6 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-static unsigned long alignment_iovec(const struct iov_iter *i)
-{
-	const struct iovec *iov = i->iov;
-	unsigned long res;
-	size_t size = i->count;
-	size_t n;
-
-	if (!size)
-		return 0;
-
-	res = (unsigned long)iov->iov_base + i->iov_offset;
-	n = iov->iov_len - i->iov_offset;
-	if (n >= size)
-		return res | size;
-	size -= n;
-	res |= n;
-	while (size > (++iov)->iov_len) {
-		res |= (unsigned long)iov->iov_base | iov->iov_len;
-		size -= iov->iov_len;
-	}
-	res |= (unsigned long)iov->iov_base | size;
-	return res;
-}
-
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
@@ -691,28 +685,6 @@ static size_t zero_bvec(size_t bytes, struct iov_iter *i)
 	return wanted - bytes;
 }
 
-static size_t copy_from_user_bvec(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-	char *kaddr;
-	size_t left;
-	const struct bio_vec *bvec;
-	size_t base = i->iov_offset;
-
-	kaddr = kmap_atomic(page);
-	for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
-		size_t copy = min(left, bvec->bv_len - base);
-		if (!bvec->bv_len)
-			continue;
-		memcpy_from_page(kaddr + offset, bvec->bv_page,
-				 bvec->bv_offset + base, copy);
-		offset += copy;
-		left -= copy;
-	}
-	kunmap_atomic(kaddr);
-	return bytes;
-}
-
 static void advance_bvec(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
@@ -749,30 +721,6 @@ static void advance_bvec(struct iov_iter *i, size_t bytes)
 	}
 }
 
-static unsigned long alignment_bvec(const struct iov_iter *i)
-{
-	const struct bio_vec *bvec = i->bvec;
-	unsigned long res;
-	size_t size = i->count;
-	size_t n;
-
-	if (!size)
-		return 0;
-
-	res = bvec->bv_offset + i->iov_offset;
-	n = bvec->bv_len - i->iov_offset;
-	if (n >= size)
-		return res | size;
-	size -= n;
-	res |= n;
-	while (size > (++bvec)->bv_len) {
-		res |= bvec->bv_offset | bvec->bv_len;
-		size -= bvec->bv_len;
-	}
-	res |= bvec->bv_offset | size;
-	return res;
-}
-
 static ssize_t get_pages_bvec(struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
@@ -887,10 +835,15 @@ EXPORT_SYMBOL(iov_iter_zero);
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
-	if (i->type & ITER_BVEC)
-		return copy_from_user_bvec(page, i, offset, bytes);
-	else
-		return copy_from_user_atomic_iovec(page, i, offset, bytes);
+	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
+	iterate_all_kinds(i, bytes, v,
+		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
+					  v.iov_base, v.iov_len),
+		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len)
+	)
+	kunmap_atomic(kaddr);
+	return bytes;
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
@@ -919,10 +872,17 @@ EXPORT_SYMBOL(iov_iter_single_seg_count);
 
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC)
-		return alignment_bvec(i);
-	else
-		return alignment_iovec(i);
+	unsigned long res = 0;
+	size_t size = i->count;
+
+	if (!size)
+		return 0;
+
+	iterate_all_kinds(i, size, v,
+		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
+		res |= v.bv_offset | v.bv_len
+	)
+	return res;
 }
 EXPORT_SYMBOL(iov_iter_alignment);
 
-- 
2.1.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox