From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: Willem de Bruijn <willemb@google.com>
Subject: [PATCH RFC net-next 04/11] udp: paged allocation with gso
Date: Tue, 17 Apr 2018 16:00:54 -0400 [thread overview]
Message-ID: <20180417200059.30154-5-willemdebruijn.kernel@gmail.com> (raw)
In-Reply-To: <20180417200059.30154-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
When sending large datagrams that are later segmented, store data in
page frags to avoid copying from linear in skb_segment.
This logic will also be used by zerocopy.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
net/ipv4/ip_output.c | 15 +++++++++++----
net/ipv6/ip6_output.c | 19 ++++++++++++++-----
2 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7abfb24ec5e5..9ccd6c28e420 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -878,11 +878,13 @@ static int __ip_append_data(struct sock *sk,
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
u32 tskey = 0;
+ bool paged;
skb = skb_peek_tail(queue);
exthdrlen = !skb ? rt->dst.header_len : 0;
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
+ paged = !!cork->gso_size;
if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
@@ -934,6 +936,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -954,8 +957,12 @@ static int __ip_append_data(struct sock *sk,
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
+ else if (!paged)
alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += exthdrlen;
@@ -999,7 +1006,7 @@ static int __ip_append_data(struct sock *sk,
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, fraglen + exthdrlen - pagedlen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1015,7 +1022,7 @@ static int __ip_append_data(struct sock *sk,
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1023,7 +1030,7 @@ static int __ip_append_data(struct sock *sk,
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3ce947c1d173..9fbcec4fb946 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1270,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
+ bool paged;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1277,6 +1278,7 @@ static int __ip6_append_data(struct sock *sk,
dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
}
+ paged = !!cork->gso_size;
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
@@ -1368,6 +1370,7 @@ static int __ip6_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int pagedlen = 0;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1390,11 +1393,17 @@ static int __ip6_append_data(struct sock *sk,
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
+ fraglen = datalen + fragheaderlen;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
- alloclen = datalen + fragheaderlen;
+ else if (!paged)
+ alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ pagedlen = fraglen - alloclen;
+ }
alloclen += dst_exthdrlen;
@@ -1416,7 +1425,7 @@ static int __ip6_append_data(struct sock *sk,
*/
alloclen += sizeof(struct frag_hdr);
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
@@ -1455,7 +1464,7 @@ static int __ip6_append_data(struct sock *sk,
/*
* Find where to start putting bytes
*/
- data = skb_put(skb, fraglen);
+ data = skb_put(skb, fraglen - pagedlen);
skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
skb->transport_header = (skb->network_header +
@@ -1478,7 +1487,7 @@ static int __ip6_append_data(struct sock *sk,
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
--
2.17.0.484.g0c8726318c-goog
next prev parent reply other threads:[~2018-04-17 20:01 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-17 20:00 [PATCH RFC net-next 00/11] udp gso Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 01/11] udp: expose inet cork to udp Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 02/11] udp: add gso Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 03/11] udp: better wmem accounting on gso Willem de Bruijn
2018-04-17 20:00 ` Willem de Bruijn [this message]
2018-04-17 20:00 ` [PATCH RFC net-next 05/11] udp: add gso segment cmsg Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 06/11] udp: add gso support to virtual devices Willem de Bruijn
2018-04-18 0:43 ` Dimitris Michailidis
2018-04-18 3:27 ` Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 07/11] udp: zerocopy Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 08/11] selftests: udp gso Willem de Bruijn
2018-04-17 20:00 ` [PATCH RFC net-next 09/11] selftests: udp gso with connected sockets Willem de Bruijn
2018-04-17 20:15 ` [PATCH RFC net-next 00/11] udp gso Sowmini Varadhan
2018-04-17 20:23 ` Willem de Bruijn
2018-04-17 20:48 ` Sowmini Varadhan
2018-04-17 21:07 ` Willem de Bruijn
2018-04-18 2:25 ` Samudrala, Sridhar
2018-04-18 3:33 ` Willem de Bruijn
2018-04-18 12:31 ` Sowmini Varadhan
2018-04-18 13:35 ` Eric Dumazet
2018-04-18 13:47 ` Sowmini Varadhan
2018-04-18 13:51 ` Willem de Bruijn
2018-04-18 15:08 ` Samudrala, Sridhar
2018-04-18 17:40 ` David Miller
2018-04-18 17:34 ` David Miller
2018-04-18 13:59 ` Willem de Bruijn
2018-04-18 14:28 ` Willem de Bruijn
2018-04-18 17:28 ` David Miller
2018-04-18 18:12 ` Alexander Duyck
2018-04-18 18:22 ` Willem de Bruijn
2018-04-20 17:38 ` Alexander Duyck
2018-04-20 21:58 ` Willem de Bruijn
2018-04-21 2:08 ` Alexander Duyck
2018-04-18 19:33 ` David Miller
2018-04-20 18:27 ` Tushar Dave
2018-04-20 20:08 ` Alexander Duyck
2018-04-21 3:11 ` Tushar Dave
2018-08-31 9:09 ` Paolo Abeni
2018-08-31 10:09 ` Eric Dumazet
2018-08-31 13:08 ` Willem de Bruijn
2018-08-31 13:44 ` Paolo Abeni
2018-08-31 15:11 ` Willem de Bruijn
2018-09-03 8:02 ` Steffen Klassert
2018-09-03 11:45 ` Sowmini Varadhan
2018-04-18 11:17 ` Paolo Abeni
2018-04-18 13:49 ` Willem de Bruijn
2018-05-24 0:02 ` Marcelo Ricardo Leitner
2018-05-24 1:15 ` Willem de Bruijn
2018-04-18 17:24 ` David Miller
2018-04-18 17:50 ` David Miller
2018-04-18 18:12 ` Willem de Bruijn
2018-04-19 17:45 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180417200059.30154-5-willemdebruijn.kernel@gmail.com \
--to=willemdebruijn.kernel@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).