From: Prashant Bhole <prashantbhole.linux@gmail.com>
To: "David S . Miller" <davem@davemloft.net>,
"Michael S . Tsirkin" <mst@redhat.com>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Jesper Dangaard Brouer <hawk@kernel.org>
Cc: Prashant Bhole <prashantbhole.linux@gmail.com>,
Jason Wang <jasowang@redhat.com>, David Ahern <dsahern@gmail.com>,
Jakub Kicinski <jakub.kicinski@netronome.com>,
John Fastabend <john.fastabend@gmail.com>,
Toshiaki Makita <toshiaki.makita1@gmail.com>,
Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
Yonghong Song <yhs@fb.com>, Andrii Nakryiko <andriin@fb.com>,
netdev@vger.kernel.org
Subject: [RFC net-next 11/14] tun: run XDP program in tx path
Date: Wed, 18 Dec 2019 17:10:47 +0900 [thread overview]
Message-ID: <20191218081050.10170-12-prashantbhole.linux@gmail.com> (raw)
In-Reply-To: <20191218081050.10170-1-prashantbhole.linux@gmail.com>
Run the XDP program as soon as packet is removed from the ptr
ring. Since this is XDP in tx path, the traditional handling of
XDP actions XDP_TX/REDIRECT isn't valid. For this reason we call
do_xdp_generic_core instead of do_xdp_generic. do_xdp_generic_core
just runs the program and leaves the action handling to us.
Signed-off-by: Prashant Bhole <prashantbhole.linux@gmail.com>
---
drivers/net/tun.c | 149 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 146 insertions(+), 3 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8aee7abd53a2..1afded9252f5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -131,6 +131,7 @@ struct tap_filter {
/* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal
* to max number of VCPUs in guest. */
#define MAX_TAP_QUEUES 256
+#define MAX_TAP_BATCH 64
#define MAX_TAP_FLOWS 4096
#define TUN_FLOW_EXPIRE (3 * HZ)
@@ -2173,6 +2174,109 @@ static ssize_t tun_put_user(struct tun_struct *tun,
return total;
}
+static struct sk_buff *tun_prepare_xdp_skb(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (skb_shared(skb) || skb_cloned(skb)) {
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ consume_skb(skb);
+ return nskb;
+ }
+
+ return skb;
+}
+
+static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
+ struct sk_buff *skb)
+{
+ struct bpf_prog *xdp_prog;
+ struct xdp_buff xdp;
+ u32 act = XDP_PASS;
+
+ xdp_prog = rcu_dereference(tun->xdp_tx_prog);
+ if (xdp_prog) {
+ skb = tun_prepare_xdp_skb(skb);
+ if (!skb) {
+ act = XDP_DROP;
+ kfree_skb(skb);
+ goto drop;
+ }
+
+ act = do_xdp_generic_core(skb, &xdp, xdp_prog);
+ switch (act) {
+ case XDP_TX:
+ /* Rx path generic XDP will be called in this path
+ */
+ local_bh_disable();
+ netif_receive_skb(skb);
+ local_bh_enable();
+ break;
+ case XDP_PASS:
+ break;
+ case XDP_REDIRECT:
+ /* Since we are not handling this case yet, let's free
+ * skb here. In case of XDP_DROP/XDP_ABORTED, the skb
+ * was already freed in do_xdp_generic_core()
+ */
+ kfree_skb(skb);
+ /* fall through */
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(tun->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ goto drop;
+ }
+ }
+
+ return act;
+drop:
+ this_cpu_inc(tun->pcpu_stats->tx_dropped);
+ return act;
+}
+
+static u32 tun_do_xdp_tx(struct tun_struct *tun, struct tun_file *tfile,
+ struct xdp_frame *frame)
+{
+ struct bpf_prog *xdp_prog;
+ struct tun_page tpage;
+ struct xdp_buff xdp;
+ u32 act = XDP_PASS;
+ int flush = 0;
+
+ xdp_prog = rcu_dereference(tun->xdp_tx_prog);
+ if (xdp_prog) {
+ xdp.data_hard_start = frame->data - frame->headroom;
+ xdp.data = frame->data;
+ xdp.data_end = xdp.data + frame->len;
+ xdp.data_meta = xdp.data - frame->metasize;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ /* fall through */
+ case XDP_REDIRECT:
+ /* fall through */
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(tun->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ xdp_return_frame_rx_napi(frame);
+ break;
+ }
+ }
+
+ return act;
+}
+
static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
{
DECLARE_WAITQUEUE(wait, current);
@@ -2590,6 +2694,47 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return ret;
}
+static int tun_consume_packets(struct tun_file *tfile, void **ptr_array, int n)
+{
+ struct xdp_frame *frame;
+ struct tun_struct *tun;
+ int i, num_ptrs;
+ int pkt_cnt = 0;
+ void *pkts[MAX_TAP_BATCH];
+ void *ptr;
+ u32 act;
+
+ if (unlikely(!tfile))
+ return 0;
+
+ if (n > MAX_TAP_BATCH)
+ n = MAX_TAP_BATCH;
+
+ rcu_read_lock();
+ tun = rcu_dereference(tfile->tun);
+ if (unlikely(!tun)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ num_ptrs = ptr_ring_consume_batched(&tfile->tx_ring, pkts, n);
+ for (i = 0; i < num_ptrs; i++) {
+ ptr = pkts[i];
+ if (tun_is_xdp_frame(ptr)) {
+ frame = tun_ptr_to_xdp(ptr);
+ act = tun_do_xdp_tx(tun, tfile, frame);
+ } else {
+ act = tun_do_xdp_tx_generic(tun, ptr);
+ }
+
+ if (act == XDP_PASS)
+ ptr_array[pkt_cnt++] = ptr;
+ }
+
+ rcu_read_unlock();
+ return pkt_cnt;
+}
+
static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
int flags)
{
@@ -2610,9 +2755,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
ptr = ctl->ptr;
break;
case TUN_MSG_CONSUME_PKTS:
- ret = ptr_ring_consume_batched(&tfile->tx_ring,
- ctl->ptr,
- ctl->num);
+ ret = tun_consume_packets(tfile, ctl->ptr, ctl->num);
goto out;
case TUN_MSG_UNCONSUME_PKTS:
ptr_ring_unconsume(&tfile->tx_ring, ctl->ptr,
--
2.21.0
next prev parent reply other threads:[~2019-12-18 8:12 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-18 8:10 [RFC net-next 00/14] XDP in tx path Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 01/14] net: add tx path XDP support Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 02/14] tools: sync kernel uapi/linux/if_link.h header Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 03/14] libbpf: API for tx path XDP support Prashant Bhole
2019-12-18 18:20 ` Alexei Starovoitov
2019-12-18 8:10 ` [RFC net-next 04/14] samples/bpf: xdp1, add XDP tx support Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 05/14] net: core: rename netif_receive_generic_xdp() to do_generic_xdp_core() Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 06/14] net: core: export do_xdp_generic_core() Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 07/14] tuntap: check tun_msg_ctl type at necessary places Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 08/14] vhost_net: user tap recvmsg api to access ptr ring Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 09/14] tuntap: remove usage of ptr ring in vhost_net Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 10/14] tun: set tx path XDP program Prashant Bhole
2019-12-18 8:10 ` Prashant Bhole [this message]
2019-12-18 10:07 ` [RFC net-next 11/14] tun: run XDP program in tx path Jesper Dangaard Brouer
2019-12-18 11:48 ` Toke Høiland-Jørgensen
2019-12-18 16:33 ` David Ahern
2019-12-19 2:44 ` Jason Wang
2019-12-18 18:19 ` Alexei Starovoitov
2019-12-19 2:34 ` Prashant Bhole
2019-12-19 10:15 ` Toke Høiland-Jørgensen
2019-12-20 0:07 ` Prashant Bhole
2019-12-20 3:24 ` Jason Wang
2019-12-20 4:46 ` Prashant Bhole
2019-12-20 7:36 ` Jason Wang
2019-12-20 10:11 ` Toke Høiland-Jørgensen
2019-12-20 16:11 ` David Ahern
2019-12-20 22:17 ` Prashant Bhole
2019-12-23 6:05 ` Jason Wang
2019-12-23 8:09 ` Prashant Bhole
2019-12-23 8:34 ` Jason Wang
2019-12-23 11:06 ` Prashant Bhole
2019-12-18 16:29 ` David Ahern
2019-12-19 1:47 ` Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 12/14] tun: add a way to inject tx path packet into Rx path Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 13/14] tun: handle XDP_TX action of tx path XDP program Prashant Bhole
2019-12-18 8:10 ` [RFC net-next 14/14] tun: run xdp prog when tun is read from file interface Prashant Bhole
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20191218081050.10170-12-prashantbhole.linux@gmail.com \
--to=prashantbhole.linux@gmail.com \
--cc=andriin@fb.com \
--cc=ast@kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=hawk@kernel.org \
--cc=jakub.kicinski@netronome.com \
--cc=jasowang@redhat.com \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=songliubraving@fb.com \
--cc=toshiaki.makita1@gmail.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.