From: "Michael S. Tsirkin" <mst@redhat.com>
To: David Ahern <dsahern@kernel.org>
Cc: netdev@vger.kernel.org, prashantbhole.linux@gmail.com,
jasowang@redhat.com, davem@davemloft.net,
jakub.kicinski@netronome.com, jbrouer@redhat.com,
toke@redhat.com, toshiaki.makita1@gmail.com,
daniel@iogearbox.net, john.fastabend@gmail.com, ast@kernel.org,
kafai@fb.com, songliubraving@fb.com, yhs@fb.com, andriin@fb.com,
dsahern@gmail.com
Subject: Re: [PATCH bpf-next 10/12] tun: run XDP program in tx path
Date: Thu, 23 Jan 2020 03:23:22 -0500 [thread overview]
Message-ID: <20200123032154-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <20200123014210.38412-11-dsahern@kernel.org>
On Wed, Jan 22, 2020 at 06:42:08PM -0700, David Ahern wrote:
> From: Prashant Bhole <prashantbhole.linux@gmail.com>
>
> Run the XDP program as soon as packet is removed from the ptr
> ring. Since this is XDP in tx path, the traditional handling of
> XDP actions XDP_TX/REDIRECT isn't valid. For this reason we call
> do_xdp_generic_core instead of do_xdp_generic. do_xdp_generic_core
> just runs the program and leaves the action handling to us.
>
> Signed-off-by: Prashant Bhole <prashantbhole.linux@gmail.com>
> ---
> drivers/net/tun.c | 153 +++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 150 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index b6bac773f2a0..71bcd4ec2571 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -130,6 +130,7 @@ struct tap_filter {
> /* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal
> * to max number of VCPUs in guest. */
> #define MAX_TAP_QUEUES 256
> +#define MAX_TAP_BATCH 64
> #define MAX_TAP_FLOWS 4096
>
> #define TUN_FLOW_EXPIRE (3 * HZ)
> @@ -175,6 +176,7 @@ struct tun_file {
> struct tun_struct *detached;
> struct ptr_ring tx_ring;
> struct xdp_rxq_info xdp_rxq;
> + void *pkt_ptrs[MAX_TAP_BATCH];
> };
>
> struct tun_page {
> @@ -2140,6 +2142,107 @@ static ssize_t tun_put_user(struct tun_struct *tun,
> return total;
> }
>
> +static struct sk_buff *tun_prepare_xdp_skb(struct sk_buff *skb)
> +{
> + struct sk_buff *nskb;
> +
> + if (skb_shared(skb) || skb_cloned(skb)) {
> + nskb = skb_copy(skb, GFP_ATOMIC);
> + consume_skb(skb);
> + return nskb;
> + }
> +
> + return skb;
> +}
> +
> +static u32 tun_do_xdp_tx_generic(struct tun_struct *tun,
> + struct sk_buff *skb)
> +{
> + struct bpf_prog *xdp_prog;
> + struct xdp_buff xdp;
> + u32 act = XDP_PASS;
> +
> + xdp_prog = rcu_dereference(tun->xdp_egress_prog);
> + if (xdp_prog) {
> + skb = tun_prepare_xdp_skb(skb);
> + if (!skb) {
> + act = XDP_DROP;
> + kfree_skb(skb);
> + goto drop;
> + }
> +
> + act = do_xdp_generic_core(skb, &xdp, xdp_prog);
> + switch (act) {
> + case XDP_TX:
> + /* Rx path generic XDP will be called in this path
> + */
> + local_bh_disable();
> + netif_receive_skb(skb);
> + local_bh_enable();
> + break;
> + case XDP_PASS:
> + break;
> + case XDP_REDIRECT:
> + /* Since we are not handling this case yet, let's free
> + * skb here. In case of XDP_DROP/XDP_ABORTED, the skb
> + * was already freed in do_xdp_generic_core()
> + */
> + kfree_skb(skb);
> + /* fall through */
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + /* fall through */
> + case XDP_ABORTED:
> + trace_xdp_exception(tun->dev, xdp_prog, act);
> + /* fall through */
> + case XDP_DROP:
> + goto drop;
> + }
> + }
> +
> + return act;
> +drop:
> + this_cpu_inc(tun->pcpu_stats->tx_dropped);
> + return act;
> +}
> +
> +static u32 tun_do_xdp_tx(struct tun_struct *tun, struct tun_file *tfile,
> + struct xdp_frame *frame)
> +{
> + struct bpf_prog *xdp_prog;
> + struct xdp_buff xdp;
> + u32 act = XDP_PASS;
> +
> + xdp_prog = rcu_dereference(tun->xdp_egress_prog);
> + if (xdp_prog) {
> + xdp.data_hard_start = frame->data - frame->headroom;
> + xdp.data = frame->data;
> + xdp.data_end = xdp.data + frame->len;
> + xdp.data_meta = xdp.data - frame->metasize;
> +
> + act = bpf_prog_run_xdp(xdp_prog, &xdp);
> + switch (act) {
> + case XDP_PASS:
> + break;
> + case XDP_TX:
> + /* fall through */
> + case XDP_REDIRECT:
> + /* fall through */
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + /* fall through */
> + case XDP_ABORTED:
> + trace_xdp_exception(tun->dev, xdp_prog, act);
> + /* fall through */
> + case XDP_DROP:
> + xdp_return_frame_rx_napi(frame);
> + break;
> + }
> + }
> +
> + return act;
> +}
> +
> static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
> {
> DECLARE_WAITQUEUE(wait, current);
> @@ -2557,6 +2660,52 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> return ret;
> }
>
> +static int tun_consume_packets(struct tun_file *tfile, void **ptr_array, int n)
> +{
> + void **pkts = tfile->pkt_ptrs;
> + struct xdp_frame *frame;
> + struct tun_struct *tun;
> + int i, num_ptrs;
> + int pkt_cnt = 0;
> + void *ptr;
> + u32 act;
> + int batchsz;
> +
> + if (unlikely(!tfile))
> + return 0;
> +
> + rcu_read_lock();
> + tun = rcu_dereference(tfile->tun);
> + if (unlikely(!tun)) {
> + rcu_read_unlock();
> + return 0;
> + }
> +
> + while (n) {
> + batchsz = (n > MAX_TAP_BATCH) ? MAX_TAP_BATCH : n;
> + n -= batchsz;
> + num_ptrs = ptr_ring_consume_batched(&tfile->tx_ring, pkts,
> + batchsz);
> + if (!num_ptrs)
> + break;
Can't we avoid looping over the packets in the current case
where there are no xdp programs at all?
> + for (i = 0; i < num_ptrs; i++) {
> + ptr = pkts[i];
> + if (tun_is_xdp_frame(ptr)) {
> + frame = tun_ptr_to_xdp(ptr);
> + act = tun_do_xdp_tx(tun, tfile, frame);
> + } else {
> + act = tun_do_xdp_tx_generic(tun, ptr);
> + }
> +
> + if (act == XDP_PASS)
> + ptr_array[pkt_cnt++] = ptr;
> + }
> + }
> +
> + rcu_read_unlock();
> + return pkt_cnt;
> +}
> +
> static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
> int flags)
> {
> @@ -2577,9 +2726,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
> ptr = ctl->ptr;
> break;
> case TUN_MSG_CONSUME_PKTS:
> - ret = ptr_ring_consume_batched(&tfile->tx_ring,
> - ctl->ptr,
> - ctl->num);
> + ret = tun_consume_packets(tfile, ctl->ptr, ctl->num);
> goto out;
> case TUN_MSG_UNCONSUME_PKTS:
> ptr_ring_unconsume(&tfile->tx_ring, ctl->ptr,
> --
> 2.21.1 (Apple Git-122.3)
next prev parent reply other threads:[~2020-01-23 8:23 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-23 1:41 [PATCH bpf-next 00/12] Add support for XDP in egress path David Ahern
2020-01-23 1:41 ` [PATCH bpf-next 01/12] net: Add new XDP setup and query commands David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 02/12] net: Add BPF_XDP_EGRESS as a bpf_attach_type David Ahern
2020-01-23 11:34 ` Toke Høiland-Jørgensen
2020-01-23 21:32 ` David Ahern
2020-01-24 9:49 ` Toke Høiland-Jørgensen
2020-01-24 7:33 ` Martin Lau
2020-01-23 1:42 ` [PATCH bpf-next 03/12] net: Add IFLA_XDP_EGRESS for XDP programs in the egress path David Ahern
2020-01-23 11:35 ` Toke Høiland-Jørgensen
2020-01-23 21:33 ` David Ahern
2020-01-24 15:21 ` Jakub Kicinski
2020-01-24 15:36 ` Toke Høiland-Jørgensen
2020-01-26 1:43 ` David Ahern
2020-01-26 4:54 ` Alexei Starovoitov
2020-02-02 17:59 ` David Ahern
2020-01-26 12:49 ` Jesper Dangaard Brouer
2020-01-26 16:38 ` David Ahern
2020-01-26 22:17 ` Jakub Kicinski
2020-01-28 14:13 ` Jesper Dangaard Brouer
2020-01-30 14:45 ` Jakub Kicinski
2020-02-01 16:03 ` Toke Høiland-Jørgensen
2020-02-02 17:48 ` David Ahern
2020-01-26 22:11 ` Jakub Kicinski
2020-01-27 4:03 ` David Ahern
2020-01-27 14:16 ` Jakub Kicinski
2020-01-28 3:43 ` David Ahern
2020-01-28 13:57 ` Jakub Kicinski
2020-02-01 16:24 ` Toke Høiland-Jørgensen
2020-02-01 17:08 ` Jakub Kicinski
2020-02-01 20:05 ` Toke Høiland-Jørgensen
2020-02-02 4:15 ` Jakub Kicinski
2020-02-03 19:56 ` Toke Høiland-Jørgensen
2020-02-03 20:13 ` Toke Høiland-Jørgensen
2020-02-03 22:15 ` Jesper Dangaard Brouer
2020-02-04 11:00 ` Toke Høiland-Jørgensen
2020-02-04 17:09 ` Jakub Kicinski
2020-02-05 15:30 ` Toke Høiland-Jørgensen
2020-02-02 17:45 ` David Ahern
2020-02-02 19:12 ` Jakub Kicinski
2020-02-02 17:43 ` David Ahern
2020-02-02 19:31 ` Jakub Kicinski
2020-02-02 21:51 ` David Ahern
2020-02-01 15:59 ` Toke Høiland-Jørgensen
2020-02-02 17:54 ` David Ahern
2020-02-03 20:09 ` Toke Høiland-Jørgensen
2020-01-23 1:42 ` [PATCH bpf-next 04/12] net: core: rename netif_receive_generic_xdp() to do_generic_xdp_core() David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 05/12] tuntap: check tun_msg_ctl type at necessary places David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 06/12] tun: move shared functions to if_tun.h David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 07/12] vhost_net: user tap recvmsg api to access ptr ring David Ahern
2020-01-23 8:26 ` Michael S. Tsirkin
2020-01-23 1:42 ` [PATCH bpf-next 08/12] tuntap: remove usage of ptr ring in vhost_net David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 09/12] tun: set egress XDP program David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 10/12] tun: run XDP program in tx path David Ahern
2020-01-23 8:23 ` Michael S. Tsirkin [this message]
2020-01-24 13:36 ` Prashant Bhole
2020-01-24 13:44 ` Prashant Bhole
2020-01-23 1:42 ` [PATCH bpf-next 11/12] libbpf: Add egress XDP support David Ahern
2020-01-23 1:42 ` [PATCH bpf-next 12/12] samples/bpf: xdp1, add " David Ahern
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200123032154-mutt-send-email-mst@kernel.org \
--to=mst@redhat.com \
--cc=andriin@fb.com \
--cc=ast@kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=dsahern@kernel.org \
--cc=jakub.kicinski@netronome.com \
--cc=jasowang@redhat.com \
--cc=jbrouer@redhat.com \
--cc=john.fastabend@gmail.com \
--cc=kafai@fb.com \
--cc=netdev@vger.kernel.org \
--cc=prashantbhole.linux@gmail.com \
--cc=songliubraving@fb.com \
--cc=toke@redhat.com \
--cc=toshiaki.makita1@gmail.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.