linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: Matthew Wilcox <willy@infradead.org>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: David Howells <dhowells@redhat.com>,
	Al Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	Jens Axboe <axboe@kernel.dk>, Jeff Layton <jlayton@kernel.org>,
	Christian Brauner <brauner@kernel.org>,
	Chuck Lever III <chuck.lever@oracle.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	netdev@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Subject: [PATCH v3 17/55] ip6, udp6: Support MSG_SPLICE_PAGES
Date: Fri, 31 Mar 2023 17:08:36 +0100	[thread overview]
Message-ID: <20230331160914.1608208-18-dhowells@redhat.com> (raw)
In-Reply-To: <20230331160914.1608208-1-dhowells@redhat.com>

Make IP6/UDP6 sendmsg() support MSG_SPLICE_PAGES.  This causes pages to be
spliced from the source iterator if possible, copying the data if not.

This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: netdev@vger.kernel.org
---
 include/net/ip.h      |  4 ++++
 net/ipv4/ip_output.c  | 11 ++++++-----
 net/ipv6/ip6_output.c | 28 +++++++++++++++++++++++++---
 3 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..e27d2ceffcfa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -211,6 +211,10 @@ int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 		    __u8 tos);
 void ip_init(void);
+int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
+		      unsigned int fragheaderlen, unsigned int maxfraglen,
+		      unsigned int hh_len);
+int __ip_splice_pages(struct sock *sk, struct sk_buff *skb, void *from, int *pcopy);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
 		   int getfrag(void *from, char *to, int offset, int len,
 			       int odd, struct sk_buff *skb),
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 41a954ac9e1a..fa2546d944bc 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -959,9 +959,9 @@ csum_page(struct page *page, int offset, int copy)
 /*
  * Allocate a packet for MSG_SPLICE_PAGES.
  */
-static int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
-			     unsigned int fragheaderlen, unsigned int maxfraglen,
-			     unsigned int hh_len)
+int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
+		      unsigned int fragheaderlen, unsigned int maxfraglen,
+		      unsigned int hh_len)
 {
 	struct sk_buff *skb_prev = *pskb, *skb;
 	unsigned int fraggap = skb_prev->len - maxfraglen;
@@ -993,12 +993,12 @@ static int __ip_splice_alloc(struct sock *sk, struct sk_buff **pskb,
 	*pskb = skb;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_splice_alloc);
 
 /*
  * Add (or copy) data pages for MSG_SPLICE_PAGES.
  */
-static int __ip_splice_pages(struct sock *sk, struct sk_buff *skb,
-			     void *from, int *pcopy)
+int __ip_splice_pages(struct sock *sk, struct sk_buff *skb, void *from, int *pcopy)
 {
 	struct msghdr *msg = from;
 	struct page *page = NULL, **pages = &page;
@@ -1047,6 +1047,7 @@ static int __ip_splice_pages(struct sock *sk, struct sk_buff *skb,
 	*pcopy = copy;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__ip_splice_pages);
 
 static int __ip_append_data(struct sock *sk,
 			    struct flowi4 *fl4,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c314fdde0097..c95d034cb45a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1486,7 +1486,7 @@ static int __ip6_append_data(struct sock *sk,
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
 	int csummode = CHECKSUM_NONE;
-	unsigned int maxnonfragsize, headersize;
+	unsigned int maxnonfragsize, headersize, initial_length;
 	unsigned int wmem_alloc_delta = 0;
 	bool paged, extra_uref = false;
 
@@ -1559,6 +1559,7 @@ static int __ip6_append_data(struct sock *sk,
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;
 
+	initial_length = length;
 	if ((flags & MSG_ZEROCOPY) && length) {
 		struct msghdr *msg = from;
 
@@ -1589,6 +1590,14 @@ static int __ip6_append_data(struct sock *sk,
 				skb_zcopy_set(skb, uarg, &extra_uref);
 			}
 		}
+	} else if ((flags & MSG_SPLICE_PAGES) && length) {
+		if (inet_sk(sk)->hdrincl)
+			return -EPERM;
+		if (rt->dst.dev->features & NETIF_F_SG)
+			/* We need an empty buffer to attach stuff to */
+			initial_length = transhdrlen;
+		else
+			flags &= ~MSG_SPLICE_PAGES;
 	}
 
 	/*
@@ -1624,6 +1633,15 @@ static int __ip6_append_data(struct sock *sk,
 			unsigned int fraggap;
 			unsigned int alloclen, alloc_extra;
 			unsigned int pagedlen;
+
+			if (unlikely(flags & MSG_SPLICE_PAGES)) {
+				err = __ip_splice_alloc(sk, &skb, fragheaderlen,
+							maxfraglen, hh_len);
+				if (err < 0)
+					goto error;
+				continue;
+			}
+			initial_length = length;
 alloc_new_skb:
 			/* There's no room in the current skb */
 			if (skb)
@@ -1642,7 +1660,7 @@ static int __ip6_append_data(struct sock *sk,
 			 * If remaining data exceeds the mtu,
 			 * we know we need more fragment(s).
 			 */
-			datalen = length + fraggap;
+			datalen = initial_length + fraggap;
 
 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
@@ -1672,7 +1690,7 @@ static int __ip6_append_data(struct sock *sk,
 			}
 			alloclen += alloc_extra;
 
-			if (datalen != length + fraggap) {
+			if (datalen != initial_length + fraggap) {
 				/*
 				 * this is not the last fragment, the trailer
 				 * space is regarded as data space.
@@ -1778,6 +1796,10 @@ static int __ip6_append_data(struct sock *sk,
 				err = -EFAULT;
 				goto error;
 			}
+		} else if (flags & MSG_SPLICE_PAGES) {
+			err = __ip_splice_pages(sk, skb, from, &copy);
+			if (err < 0)
+				goto error;
 		} else if (!zc) {
 			int i = skb_shinfo(skb)->nr_frags;
 


  parent reply	other threads:[~2023-03-31 16:12 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-31 16:08 [PATCH v3 00/55] splice, net: Replace sendpage with sendmsg(MSG_SPLICE_PAGES) David Howells
2023-03-31 16:08 ` [PATCH v3 01/55] netfs: Fix netfs_extract_iter_to_sg() for ITER_UBUF/IOVEC David Howells
2023-03-31 19:05   ` Jeff Layton
2023-03-31 16:08 ` [PATCH v3 02/55] iov_iter: Remove last_offset member David Howells
2023-03-31 19:16   ` Jeff Layton
2023-03-31 16:08 ` [PATCH v3 03/55] net: Declare MSG_SPLICE_PAGES internal sendmsg() flag David Howells
2023-04-02 14:56   ` Willem de Bruijn
2023-03-31 16:08 ` [PATCH v3 04/55] mm: Move the page fragment allocator from page_alloc.c into its own file David Howells
2023-03-31 16:08 ` [PATCH v3 05/55] mm: Make the page_frag_cache allocator use multipage folios David Howells
2023-03-31 16:08 ` [PATCH v3 06/55] mm: Make the page_frag_cache allocator use per-cpu David Howells
2023-04-05 15:04   ` Christoph Hellwig
2023-03-31 16:08 ` [PATCH v3 07/55] tcp: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 08/55] tcp: Make sendmsg(MSG_SPLICE_PAGES) copy unspliceable data David Howells
2023-03-31 16:08 ` [PATCH v3 09/55] tcp: Convert do_tcp_sendpages() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 10/55] tcp_bpf: Inline do_tcp_sendpages as it's now a wrapper around tcp_sendmsg David Howells
2023-03-31 16:08 ` [PATCH v3 11/55] espintcp: Inline do_tcp_sendpages() David Howells
2023-03-31 16:08 ` [PATCH v3 12/55] tls: " David Howells
2023-03-31 16:08 ` [PATCH v3 13/55] siw: " David Howells
2023-03-31 16:08 ` [PATCH v3 14/55] tcp: Fold do_tcp_sendpages() into tcp_sendpage_locked() David Howells
2023-03-31 16:08 ` [PATCH v3 15/55] ip, udp: Support MSG_SPLICE_PAGES David Howells
2023-04-02 15:10   ` Willem de Bruijn
2023-04-03  9:50   ` David Howells
2023-04-03 13:46     ` Willem de Bruijn
2023-04-03 22:04     ` David Howells
2023-04-04 16:58       ` Willem de Bruijn
2023-04-04 17:16       ` David Howells
2023-04-04 17:36         ` Willem de Bruijn
2023-04-03 11:18   ` David Howells
2023-03-31 16:08 ` [PATCH v3 16/55] ip, udp: Make sendmsg(MSG_SPLICE_PAGES) copy unspliceable data David Howells
2023-03-31 16:08 ` David Howells [this message]
2023-03-31 16:08 ` [PATCH v3 18/55] udp: Convert udp_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 19/55] af_unix: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 20/55] af_unix: Make sendmsg(MSG_SPLICE_PAGES) copy unspliceable data David Howells
2023-03-31 16:08 ` [PATCH v3 21/55] crypto: af_alg: Pin pages rather than ref'ing if appropriate David Howells
2023-03-31 16:08 ` [PATCH v3 22/55] crypto: af_alg: Use netfs_extract_iter_to_sg() to create scatterlists David Howells
2023-03-31 16:08 ` [PATCH v3 23/55] crypto: af_alg: Indent the loop in af_alg_sendmsg() David Howells
2023-03-31 16:08 ` [PATCH v3 24/55] crypto: af_alg: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 25/55] crypto: af_alg: Convert af_alg_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 26/55] crypto: af_alg/hash: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 27/55] tls/device: " David Howells
2023-03-31 16:08 ` [PATCH v3 28/55] tls/device: Convert tls_device_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 29/55] tls/sw: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 30/55] tls/sw: Convert tls_sw_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 31/55] chelsio: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 32/55] chelsio: Convert chtls_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 33/55] kcm: Support MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 34/55] kcm: Convert kcm_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-31 16:08 ` [PATCH v3 35/55] splice, net: Use sendmsg(MSG_SPLICE_PAGES) rather than ->sendpage() David Howells
2023-03-31 16:08 ` [PATCH v3 36/55] splice, net: Reimplement splice_to_socket() to pass multiple bufs to sendmsg() David Howells
2023-03-31 16:08 ` [PATCH v3 37/55] Remove file->f_op->sendpage David Howells
2023-03-31 16:08 ` [PATCH v3 38/55] siw: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage to transmit David Howells
2023-04-04 10:52   ` Bernard Metzler
2023-04-05  8:18   ` David Howells
2023-03-31 16:08 ` [PATCH v3 39/55] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage David Howells
2023-03-31 16:08 ` [PATCH v3 40/55] iscsi: " David Howells
2023-03-31 16:09 ` [PATCH v3 41/55] iscsi: Assume "sendpage" is okay in iscsi_tcp_segment_map() David Howells
2023-04-24 17:19   ` Fabio M. De Francesco
2023-04-25  8:30   ` David Howells
2023-04-25 13:13     ` Fabio M. De Francesco
2023-03-31 16:09 ` [PATCH v3 42/55] tcp_bpf: Make tcp_bpf_sendpage() go through tcp_bpf_sendmsg(MSG_SPLICE_PAGES) David Howells
2023-03-31 16:09 ` [PATCH v3 43/55] net: Use sendmsg(MSG_SPLICE_PAGES) not sendpage in skb_send_sock() David Howells
2023-03-31 16:09 ` [PATCH v3 44/55] algif: Remove hash_sendpage*() David Howells
2023-03-31 16:09 ` [PATCH v3 45/55] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage() David Howells
2023-04-10 12:20   ` Xiubo Li
2023-03-31 16:09 ` [PATCH v3 46/55] rds: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage David Howells
2023-03-31 16:09 ` [PATCH v3 47/55] dlm: " David Howells
2023-03-31 16:09 ` [PATCH v3 48/55] sunrpc: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage David Howells
2023-03-31 16:09 ` [PATCH v3 49/55] nvme: " David Howells
2023-03-31 16:09 ` [PATCH v3 50/55] kcm: " David Howells
2023-03-31 16:09 ` [PATCH v3 51/55] smc: Drop smc_sendpage() in favour of smc_sendmsg() + MSG_SPLICE_PAGES David Howells
2023-04-26 13:07   ` D. Wythe
2023-04-28  3:12     ` D. Wythe
2023-03-31 16:09 ` [PATCH v3 52/55] ocfs2: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage() David Howells
2023-03-31 16:09 ` [PATCH v3 53/55] drbd: Use sendmsg(MSG_SPLICE_PAGES) rather than sendmsg() David Howells
2023-03-31 16:09 ` [PATCH v3 54/55] drdb: Send an entire bio in a single sendmsg David Howells
2023-03-31 16:09 ` [PATCH v3 55/55] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES) David Howells
2023-03-31 16:27 ` Trivial TLS server David Howells
2023-03-31 16:28 ` Trivial TLS client David Howells
2023-03-31 17:37 ` Test program for AF_KCM David Howells
2023-04-03  9:30 ` [PATCH v3 00/55] splice, net: Replace sendpage with sendmsg(MSG_SPLICE_PAGES) Christoph Hellwig
2023-04-03  9:34 ` Is AF_KCM functional? David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230331160914.1608208-18-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hch@infradead.org \
    --cc=jlayton@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).