From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: Willem de Bruijn <willemb@google.com>
Subject: [PATCH RFC v2 09/12] udp: enable sendmsg zerocopy
Date: Wed, 22 Feb 2017 11:38:58 -0500 [thread overview]
Message-ID: <20170222163901.90834-10-willemdebruijn.kernel@gmail.com> (raw)
In-Reply-To: <20170222163901.90834-1-willemdebruijn.kernel@gmail.com>
From: Willem de Bruijn <willemb@google.com>
Add MSG_ZEROCOPY support to inet/dgram. This includes udplite.
Tested:
loopback test snd_zerocopy_lo -u -z produces
without zerocopy (-u):
rx=173940 (10854 MB) tx=173940 txc=0
rx=367026 (22904 MB) tx=367026 txc=0
rx=564078 (35201 MB) tx=564078 txc=0
rx=756588 (47214 MB) tx=756588 txc=0
with zerocopy (-u -z):
rx=377994 (23588 MB) tx=377994 txc=377980
rx=792654 (49465 MB) tx=792654 txc=792632
rx=1209582 (75483 MB) tx=1209582 txc=1209552
rx=1628376 (101618 MB) tx=1628376 txc=1628338
loopback test currently fails with corking, due to
CHECKSUM_PARTIAL being disabled with UDP_CORK after commit
d749c9cbffd6 ("ipv4: no CHECKSUM_PARTIAL on MSG_MORE corked sockets")
I will suggest to allow it on NETIF_F_LOOPBACK.
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
include/linux/skbuff.h | 5 +++++
net/ipv4/ip_output.c | 34 +++++++++++++++++++++++++++++-----
2 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6ad1724ceb60..9e7386f3f7a8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -424,6 +424,11 @@ struct ubuf_info {
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+#define sock_can_zerocopy(sk, rt, csummode) \
+ ((rt->dst.dev->features & NETIF_F_SG) && \
+ ((sk->sk_type == SOCK_RAW) || \
+ (sk->sk_type == SOCK_DGRAM && csummode & CHECKSUM_UNNECESSARY)))
+
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 737ce826d7ec..9e0110d8a429 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -919,7 +919,7 @@ static int __ip_append_data(struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
-
+ struct ubuf_info *uarg = NULL;
struct ip_options *opt = cork->opt;
int hh_len;
int exthdrlen;
@@ -963,9 +963,16 @@ static int __ip_append_data(struct sock *sk,
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length &&
+ sock_can_zerocopy(sk, rt, skb ? skb->ip_summed : csummode)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ }
+
cork->length += length;
if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
+ (sk->sk_protocol == IPPROTO_UDP) && !uarg &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
@@ -1017,6 +1024,8 @@ static int __ip_append_data(struct sock *sk,
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
+ else if (uarg)
+ alloclen = min_t(int, fraglen, MAX_HEADER);
else
alloclen = fraglen;
@@ -1059,11 +1068,12 @@ static int __ip_append_data(struct sock *sk,
cork->tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
+ skb_zcopy_set(skb, uarg);
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, alloclen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1079,7 +1089,9 @@ static int __ip_append_data(struct sock *sk,
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = min(datalen,
+ alloclen - exthdrlen - fragheaderlen);
+ copy -= transhdrlen - fraggap;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1087,7 +1099,7 @@ static int __ip_append_data(struct sock *sk,
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1115,6 +1127,17 @@ static int __ip_append_data(struct sock *sk,
err = -EFAULT;
goto error;
}
+ } else if (uarg) {
+ struct iov_iter *iter;
+
+ if (sk->sk_type == SOCK_RAW)
+ iter = &((struct msghdr **)from)[0]->msg_iter;
+ else
+ iter = &((struct msghdr *)from)->msg_iter;
+ err = skb_zerocopy_add_frags_iter(sk, skb, iter, copy, uarg);
+ if (err < 0)
+ goto error;
+ copy = err;
} else {
int i = skb_shinfo(skb)->nr_frags;
@@ -1155,6 +1178,7 @@ static int __ip_append_data(struct sock *sk,
error_efault:
err = -EFAULT;
error:
+ sock_zerocopy_put_abort(uarg);
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;
--
2.11.0.483.g087da7b7c-goog
next prev parent reply other threads:[~2017-02-22 16:39 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-02-22 16:38 [PATCH RFC v2 00/12] socket sendmsg MSG_ZEROCOPY Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 01/12] sock: allocate skbs from optmem Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 02/12] sock: skb_copy_ubufs support for compound pages Willem de Bruijn
2017-02-22 20:33 ` Eric Dumazet
2017-02-23 1:51 ` Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 03/12] sock: add generic socket zerocopy Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 04/12] sock: enable sendmsg zerocopy Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 05/12] sock: sendmsg zerocopy notification coalescing Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 06/12] sock: sendmsg zerocopy ulimit Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 07/12] sock: sendmsg zerocopy limit bytes per notification Willem de Bruijn
2017-02-22 16:38 ` [PATCH RFC v2 08/12] tcp: enable sendmsg zerocopy Willem de Bruijn
2017-02-22 16:38 ` Willem de Bruijn [this message]
2017-02-22 16:38 ` [PATCH RFC v2 10/12] raw: enable sendmsg zerocopy with IP_HDRINCL Willem de Bruijn
2017-02-22 16:39 ` [PATCH RFC v2 11/12] packet: enable sendmsg zerocopy Willem de Bruijn
2017-02-22 16:39 ` [PATCH RFC v2 12/12] test: add sendmsg zerocopy tests Willem de Bruijn
2017-02-23 15:45 ` [PATCH RFC v2 00/12] socket sendmsg MSG_ZEROCOPY David Miller
2017-02-24 23:03 ` Alexei Starovoitov
2017-02-25 0:25 ` Willem de Bruijn
2017-02-27 18:57 ` Michael Kerrisk
2017-02-28 19:46 ` Andy Lutomirski
2017-02-28 20:43 ` Willem de Bruijn
[not found] ` <CAF=yD-K_0zO3pMeXf-UKGTsD4sNOdyN9KJkUb5MnCO_J5pisrA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-28 21:06 ` Andy Lutomirski
2017-03-01 3:28 ` David Miller
2017-03-01 3:43 ` Eric Dumazet
2017-03-02 19:26 ` Andy Lutomirski
2017-02-28 21:09 ` Andy Lutomirski
2017-02-28 21:28 ` Willem de Bruijn
2017-02-28 21:47 ` Eric Dumazet
[not found] ` <1488318476.9415.270.camel-XN9IlZ5yJG9HTL0Zs8A6p+yfmBU6pStAUsxypvmhUTTZJqsBc5GL+g@public.gmane.org>
2017-02-28 22:25 ` Andy Lutomirski
[not found] ` <CALCETrVQj1AEsLEGGkWW1zApGz6_x2rDmE0wz4ft+O5h07f_Ug-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-28 22:40 ` Eric Dumazet
2017-02-28 22:52 ` Andy Lutomirski
2017-02-28 23:22 ` Eric Dumazet
[not found] ` <1488324131.9415.278.camel-XN9IlZ5yJG9HTL0Zs8A6p+yfmBU6pStAUsxypvmhUTTZJqsBc5GL+g@public.gmane.org>
2017-03-01 0:28 ` Tom Herbert
[not found] ` <CALx6S357ssnbEu7CMrczEjiX25QYBJh3WG=w8KuAoxGQS4aKLA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-01 0:37 ` Eric Dumazet
2017-03-01 0:58 ` Willem de Bruijn
2017-03-01 1:50 ` Tom Herbert
2017-03-01 3:25 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170222163901.90834-10-willemdebruijn.kernel@gmail.com \
--to=willemdebruijn.kernel@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).