From: "Björn Töpel" <bjorn.topel@gmail.com>
To: bjorn.topel@gmail.com, magnus.karlsson@intel.com,
alexander.h.duyck@intel.com, alexander.duyck@gmail.com,
john.fastabend@gmail.com, ast@fb.com, brouer@redhat.com,
willemdebruijn.kernel@gmail.com, daniel@iogearbox.net,
mst@redhat.com, netdev@vger.kernel.org
Cc: michael.lundkvist@ericsson.com, jesse.brandeburg@intel.com,
anjali.singhai@intel.com, qi.z.zhang@intel.com
Subject: [PATCH bpf-next v2 13/15] xsk: support for Tx
Date: Fri, 27 Apr 2018 14:17:26 +0200 [thread overview]
Message-ID: <20180427121728.18512-14-bjorn.topel@gmail.com> (raw)
In-Reply-To: <20180427121728.18512-1-bjorn.topel@gmail.com>
From: Magnus Karlsson <magnus.karlsson@intel.com>
Here, Tx support is added. The user fills the Tx queue with frames to
be sent by the kernel, and let's the kernel know using the sendmsg
syscall.
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
---
net/xdp/xsk.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++--
net/xdp/xsk_queue.h | 93 ++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 200 insertions(+), 4 deletions(-)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 2d7b0c90d996..b33c535c7996 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -36,6 +36,8 @@
#include "xsk_queue.h"
#include "xdp_umem.h"
+#define TX_BATCH_SIZE 16
+
static struct xdp_sock *xdp_sk(struct sock *sk)
{
return (struct xdp_sock *)sk;
@@ -101,6 +103,108 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return err;
}
+static void xsk_destruct_skb(struct sk_buff *skb)
+{
+ u32 id = (u32)(long)skb_shinfo(skb)->destructor_arg;
+ struct xdp_sock *xs = xdp_sk(skb->sk);
+
+ WARN_ON_ONCE(xskq_produce_id(xs->umem->cq, id));
+
+ sock_wfree(skb);
+}
+
+static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
+ size_t total_len)
+{
+ bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
+ u32 max_batch = TX_BATCH_SIZE;
+ struct xdp_sock *xs = xdp_sk(sk);
+ bool sent_frame = false;
+ struct xdp_desc desc;
+ struct sk_buff *skb;
+ int err = 0;
+
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
+ if (need_wait)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&xs->mutex);
+
+ while (xskq_peek_desc(xs->tx, &desc)) {
+ char *buffer;
+ u32 id, len;
+
+ if (max_batch-- == 0) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ if (xskq_reserve_id(xs->umem->cq)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ len = desc.len;
+ if (unlikely(len > xs->dev->mtu)) {
+ err = -EMSGSIZE;
+ goto out;
+ }
+
+ skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
+ if (unlikely(!skb)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ skb_put(skb, len);
+ id = desc.idx;
+ buffer = xdp_umem_get_data(xs->umem, id) + desc.offset;
+ err = skb_store_bits(skb, 0, buffer, len);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ goto out;
+ }
+
+ skb->dev = xs->dev;
+ skb->priority = sk->sk_priority;
+ skb->mark = sk->sk_mark;
+ skb_shinfo(skb)->destructor_arg = (void *)(long)id;
+ skb->destructor = xsk_destruct_skb;
+
+ err = dev_direct_xmit(skb, xs->queue_id);
+ /* Ignore NET_XMIT_CN as packet might have been sent */
+ if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
+ err = -EAGAIN;
+ /* SKB consumed by dev_direct_xmit() */
+ goto out;
+ }
+
+ sent_frame = true;
+ xskq_discard_desc(xs->tx);
+ }
+
+out:
+ if (sent_frame)
+ sk->sk_write_space(sk);
+
+ mutex_unlock(&xs->mutex);
+ return err;
+}
+
+static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
+{
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (unlikely(!xs->dev))
+ return -ENXIO;
+ if (unlikely(!(xs->dev->flags & IFF_UP)))
+ return -ENETDOWN;
+
+ return xsk_generic_xmit(sk, m, total_len);
+}
+
static unsigned int xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
@@ -110,6 +214,8 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
if (xs->rx && !xskq_empty_desc(xs->rx))
mask |= POLLIN | POLLRDNORM;
+ if (xs->tx && !xskq_full_desc(xs->tx))
+ mask |= POLLOUT | POLLWRNORM;
return mask;
}
@@ -270,6 +376,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->queue_id = sxdp->sxdp_queue_id;
xskq_set_umem(xs->rx, &xs->umem->props);
+ xskq_set_umem(xs->tx, &xs->umem->props);
out_unlock:
if (err)
@@ -383,8 +490,6 @@ static int xsk_mmap(struct file *file, struct socket *sock,
q = xs->umem->fq;
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
q = xs->umem->cq;
- else
- return -EINVAL;
}
if (!q)
@@ -420,7 +525,7 @@ static const struct proto_ops xsk_proto_ops = {
.shutdown = sock_no_shutdown,
.setsockopt = xsk_setsockopt,
.getsockopt = sock_no_getsockopt,
- .sendmsg = sock_no_sendmsg,
+ .sendmsg = xsk_sendmsg,
.recvmsg = sock_no_recvmsg,
.mmap = xsk_mmap,
.sendpage = sock_no_sendpage,
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 0a9b92b4f93a..3497e8808608 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -111,7 +111,93 @@ static inline void xskq_discard_id(struct xsk_queue *q)
(void)xskq_validate_id(q);
}
-/* Rx queue */
+static inline int xskq_produce_id(struct xsk_queue *q, u32 id)
+{
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+ ring->desc[q->prod_tail++ & q->ring_mask] = id;
+
+ /* Order producer and data */
+ smp_wmb();
+
+ WRITE_ONCE(q->ring->producer, q->prod_tail);
+ return 0;
+}
+
+static inline int xskq_reserve_id(struct xsk_queue *q)
+{
+ if (xskq_nb_free(q, q->prod_head, 1) == 0)
+ return -ENOSPC;
+
+ q->prod_head++;
+ return 0;
+}
+
+/* Rx/Tx queue */
+
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+{
+ u32 buff_len;
+
+ if (unlikely(d->idx >= q->umem_props.nframes)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ buff_len = q->umem_props.frame_size;
+ if (unlikely(d->len > buff_len || d->len == 0 ||
+ d->offset > buff_len || d->offset + d->len > buff_len)) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
+}
+
+static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
+ struct xdp_desc *desc)
+{
+ while (q->cons_tail != q->cons_head) {
+ struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
+ unsigned int idx = q->cons_tail & q->ring_mask;
+
+ if (xskq_is_valid_desc(q, &ring->desc[idx])) {
+ if (desc)
+ *desc = ring->desc[idx];
+ return desc;
+ }
+
+ q->cons_tail++;
+ }
+
+ return NULL;
+}
+
+static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
+ struct xdp_desc *desc)
+{
+ struct xdp_rxtx_ring *ring;
+
+ if (q->cons_tail == q->cons_head) {
+ WRITE_ONCE(q->ring->consumer, q->cons_tail);
+ q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
+
+ /* Order consumer and data */
+ smp_rmb();
+
+ return xskq_validate_desc(q, desc);
+ }
+
+ ring = (struct xdp_rxtx_ring *)q->ring;
+ *desc = ring->desc[q->cons_tail & q->ring_mask];
+ return desc;
+}
+
+static inline void xskq_discard_desc(struct xsk_queue *q)
+{
+ q->cons_tail++;
+ (void)xskq_validate_desc(q, NULL);
+}
static inline int xskq_produce_batch_desc(struct xsk_queue *q,
u32 id, u32 len, u16 offset)
@@ -139,6 +225,11 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
WRITE_ONCE(q->ring->producer, q->prod_tail);
}
+static inline bool xskq_full_desc(struct xsk_queue *q)
+{
+ return (xskq_nb_avail(q, q->nentries) == q->nentries);
+}
+
static inline bool xskq_empty_desc(struct xsk_queue *q)
{
return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries);
--
2.14.1
next prev parent reply other threads:[~2018-04-27 12:18 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-27 12:17 [PATCH bpf-next v2 00/15] Introducing AF_XDP support Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 01/15] net: initial AF_XDP skeleton Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 02/15] xsk: add user memory registration support sockopt Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 03/15] xsk: add umem fill queue support and mmap Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 04/15] xsk: add Rx queue setup and mmap support Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 05/15] xsk: add support for bind for Rx Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 06/15] xsk: add Rx receive functions and poll support Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 07/15] bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 08/15] xsk: wire up XDP_DRV side of AF_XDP Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 09/15] xsk: wire up XDP_SKB " Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 10/15] xsk: add umem completion queue support and mmap Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 11/15] xsk: add Tx queue setup and mmap support Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 12/15] dev: packet: make packet_direct_xmit a common function Björn Töpel
2018-04-27 12:17 ` Björn Töpel [this message]
2018-04-27 12:17 ` [PATCH bpf-next v2 14/15] xsk: statistics support Björn Töpel
2018-04-27 12:17 ` [PATCH bpf-next v2 15/15] samples/bpf: sample application and documentation for AF_XDP sockets Björn Töpel
2018-04-27 12:21 ` [PATCH bpf-next v2 00/15] Introducing AF_XDP support Björn Töpel
2018-04-27 17:16 ` Willem de Bruijn
2018-04-27 18:12 ` Björn Töpel
2018-04-27 19:06 ` Willem de Bruijn
2018-04-27 19:24 ` Björn Töpel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180427121728.18512-14-bjorn.topel@gmail.com \
--to=bjorn.topel@gmail.com \
--cc=alexander.duyck@gmail.com \
--cc=alexander.h.duyck@intel.com \
--cc=anjali.singhai@intel.com \
--cc=ast@fb.com \
--cc=brouer@redhat.com \
--cc=daniel@iogearbox.net \
--cc=jesse.brandeburg@intel.com \
--cc=john.fastabend@gmail.com \
--cc=magnus.karlsson@intel.com \
--cc=michael.lundkvist@ericsson.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=qi.z.zhang@intel.com \
--cc=willemdebruijn.kernel@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).