From: Jonathan Lemon <jonathan.lemon@gmail.com>
To: <netdev@vger.kernel.org>
Cc: <kernel-team@fb.com>
Subject: [RFC PATCH v2 20/21] mlx5e: hook up the netgpu functions
Date: Mon, 27 Jul 2020 15:44:43 -0700 [thread overview]
Message-ID: <20200727224444.2987641-21-jonathan.lemon@gmail.com> (raw)
In-Reply-To: <20200727224444.2987641-1-jonathan.lemon@gmail.com>
From: Jonathan Lemon <bsd@fb.com>
Hook up all the netgpu functions to the mlx5e driver.
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +-
.../net/ethernet/mellanox/mlx5/core/en/txrx.h | 3 +
.../net/ethernet/mellanox/mlx5/core/en_main.c | 36 ++++++++++++
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 58 ++++++++++++++++---
.../net/ethernet/mellanox/mlx5/core/en_tx.c | 19 ++++++
.../net/ethernet/mellanox/mlx5/core/en_txrx.c | 16 ++++-
6 files changed, 125 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ae555c6be847..f6d63e99a6b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -297,7 +297,8 @@ struct mlx5e_cq_decomp {
enum mlx5e_dma_map_type {
MLX5E_DMA_MAP_SINGLE,
- MLX5E_DMA_MAP_PAGE
+ MLX5E_DMA_MAP_PAGE,
+ MLX5E_DMA_MAP_FIXED
};
struct mlx5e_sq_dma {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index cf425a60cddc..eb5dbcbc0f58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -253,6 +253,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
case MLX5E_DMA_MAP_PAGE:
dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE);
break;
+ case MLX5E_DMA_MAP_FIXED:
+ /* DMA mappings are fixed, or managed elsewhere. */
+ break;
default:
WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n");
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d75f22471357..36afe73faa0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -62,6 +62,7 @@
#include "en/xsk/setup.h"
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
+#include "en/netgpu/setup.h"
#include "en/hv_vhca_stats.h"
#include "en/devlink.h"
#include "lib/mlx5.h"
@@ -1970,6 +1971,24 @@ mlx5e_xsk_optional_open(struct mlx5e_priv *priv, int ix,
return err;
}
+static int
+mlx5e_netgpu_optional_open(struct mlx5e_priv *priv, int ix,
+ struct mlx5e_params *params,
+ struct mlx5e_channel_param *cparam,
+ struct mlx5e_channel *c)
+{
+ struct netgpu_ifq *ifq;
+ int err = 0;
+
+ ifq = mlx5e_netgpu_get_ifq(params, params->xsk, ix);
+
+ if (ifq)
+ err = mlx5e_open_netgpu(priv, params, ifq, c);
+
+ return err;
+}
+
+
static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam,
@@ -2017,6 +2036,11 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
goto err_close_queues;
}
+ /* This opens a second set of shadow queues for netgpu */
+ err = mlx5e_netgpu_optional_open(priv, ix, params, cparam, c);
+ if (unlikely(err))
+ goto err_close_queues;
+
*cp = c;
return 0;
@@ -2053,6 +2077,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_deactivate_xsk(c);
+ if (test_bit(MLX5E_CHANNEL_STATE_NETGPU, c->state))
+ mlx5e_deactivate_netgpu(c);
+
mlx5e_deactivate_rq(&c->rq);
mlx5e_deactivate_icosq(&c->async_icosq);
mlx5e_deactivate_icosq(&c->icosq);
@@ -2064,6 +2091,10 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
{
if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
mlx5e_close_xsk(c);
+
+ if (test_bit(MLX5E_CHANNEL_STATE_NETGPU, c->state))
+ mlx5e_close_netgpu(c);
+
mlx5e_close_queues(c);
netif_napi_del(&c->napi);
@@ -3042,11 +3073,13 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
+ mlx5e_netgpu_redirect_rqts_to_channels(priv, &priv->channels);
}
void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
+ mlx5e_netgpu_redirect_rqts_to_drop(priv, &priv->channels);
mlx5e_redirect_rqts_to_drop(priv);
@@ -4581,6 +4614,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
case XDP_SETUP_XSK_UMEM:
return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem,
xdp->xsk.queue_id);
+ case XDP_SETUP_NETGPU:
+ return mlx5e_netgpu_setup_ifq(dev, xdp->netgpu.ifq,
+ &xdp->netgpu.queue_id);
default:
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 74860f3827b1..746fbb417c3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -50,6 +50,7 @@
#include "en/xdp.h"
#include "en/xsk/rx.h"
#include "en/health.h"
+#include "en/netgpu/setup.h"
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
@@ -266,8 +267,11 @@ static inline int mlx5e_page_alloc(struct mlx5e_rq *rq,
{
if (rq->umem)
return mlx5e_xsk_page_alloc_umem(rq, dma_info);
- else
- return mlx5e_page_alloc_pool(rq, dma_info);
+
+ if (dma_info->netgpu_source)
+ return mlx5e_netgpu_get_page(rq, dma_info);
+
+ return mlx5e_page_alloc_pool(rq, dma_info);
}
void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info)
@@ -279,6 +283,9 @@ void mlx5e_page_release_dynamic(struct mlx5e_rq *rq,
struct mlx5e_dma_info *dma_info,
bool recycle)
{
+ if (dma_info->netgpu_source)
+ return mlx5e_netgpu_put_page(rq, dma_info, recycle);
+
if (likely(recycle)) {
if (mlx5e_rx_cache_put(rq, dma_info))
return;
@@ -394,6 +401,9 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
return -ENOMEM;
}
+ if (rq->netgpu && !mlx5e_netgpu_avail(rq, wqe_bulk))
+ return -ENOMEM;
+
for (i = 0; i < wqe_bulk; i++) {
struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i);
@@ -402,6 +412,9 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
goto free_wqes;
}
+ if (rq->netgpu)
+ mlx5e_netgpu_taken(rq);
+
return 0;
free_wqes:
@@ -416,12 +429,18 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
struct mlx5e_dma_info *di, u32 frag_offset, u32 len,
unsigned int truesize)
{
- dma_sync_single_for_cpu(rq->pdev,
- di->addr + frag_offset,
- len, DMA_FROM_DEVICE);
- page_ref_inc(di->page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
di->page, frag_offset, len, truesize);
+
+ if (skb->zc_netgpu) {
+ di->page = NULL;
+ } else {
+ page_ref_inc(di->page);
+
+ dma_sync_single_for_cpu(rq->pdev,
+ di->addr + frag_offset,
+ len, DMA_FROM_DEVICE);
+ }
}
static inline void
@@ -1152,16 +1171,26 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
{
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
struct mlx5e_wqe_frag_info *head_wi = wi;
- u16 headlen = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
+ bool hd_split = rq->netgpu;
+ u16 header_len = hd_split ? TOTAL_HEADERS : MLX5E_RX_MAX_HEAD;
+ u16 headlen = min_t(u32, header_len, cqe_bcnt);
u16 frag_headlen = headlen;
u16 byte_cnt = cqe_bcnt - headlen;
struct sk_buff *skb;
+ /* RST packets may have short headers (74) and no payload */
+ if (hd_split && headlen != TOTAL_HEADERS && byte_cnt) {
+ /* XXX add drop counter */
+ pr_warn_once("BAD hd_split: headlen %d != %d\n",
+ headlen, TOTAL_HEADERS);
+ return NULL;
+ }
+
/* XDP is not supported in this configuration, as incoming packets
* might spread among multiple pages.
*/
skb = napi_alloc_skb(rq->cq.napi,
- ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
+ ALIGN(header_len, sizeof(long)));
if (unlikely(!skb)) {
rq->stats->buff_alloc_err++;
return NULL;
@@ -1169,6 +1198,19 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
prefetchw(skb->data);
+ if (hd_split) {
+ /* first frag is only headers, should skip this frag and
+ * assume that all of the headers already copied to the skb
+ * inline data.
+ */
+ frag_info++;
+ frag_headlen = 0;
+ wi++;
+
+ skb->zc_netgpu = 1;
+ skb_shinfo(skb)->destructor_arg = rq->netgpu;
+ }
+
while (byte_cnt) {
u16 frag_consumed_bytes =
min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index da596de3abba..4a5f884771e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -39,6 +39,7 @@
#include "ipoib/ipoib.h"
#include "en_accel/en_accel.h"
#include "lib/clock.h"
+#include "en/netgpu/setup.h"
static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
{
@@ -207,6 +208,24 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
dseg++;
}
+ if (skb_netdma(skb)) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ int fsz = skb_frag_size(frag);
+
+ dma_addr = mlx5e_netgpu_get_dma(skb, frag);
+
+ dseg->addr = cpu_to_be64(dma_addr);
+ dseg->lkey = sq->mkey_be;
+ dseg->byte_count = cpu_to_be32(fsz);
+
+ mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_FIXED);
+ num_dma++;
+ dseg++;
+ }
+ return num_dma;
+ }
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
int fsz = skb_frag_size(frag);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index e3dbab2a294c..383289e85b01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -122,6 +122,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
+ bool netgpu_open = test_bit(MLX5E_CHANNEL_STATE_NETGPU, c->state);
bool aff_change = false;
bool busy_xsk = false;
bool busy = false;
@@ -139,7 +140,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq);
if (likely(budget)) { /* budget=0 means: don't poll rx rings */
- if (xsk_open)
+ if (xsk_open || netgpu_open)
work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget);
if (likely(budget - work_done))
@@ -159,6 +160,14 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_post_rx_mpwqes,
mlx5e_post_rx_wqes,
rq);
+
+ if (netgpu_open) {
+ busy_xsk |= INDIRECT_CALL_2(xskrq->post_wqes,
+ mlx5e_post_rx_mpwqes,
+ mlx5e_post_rx_wqes,
+ xskrq);
+ }
+
if (xsk_open) {
busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq);
busy_xsk |= mlx5e_napi_xsk_post(xsksq, xskrq);
@@ -192,6 +201,11 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&c->async_icosq.cq);
mlx5e_cq_arm(&c->xdpsq.cq);
+ if (netgpu_open) {
+ mlx5e_handle_rx_dim(xskrq);
+ mlx5e_cq_arm(&xskrq->cq);
+ }
+
if (xsk_open) {
mlx5e_handle_rx_dim(xskrq);
mlx5e_cq_arm(&xsksq->cq);
--
2.24.1
next prev parent reply other threads:[~2020-07-27 22:46 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-27 22:44 [RFC PATCH v2 00/21] netgpu: networking between NIC and GPU/CPU Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 01/21] linux/log2.h: enclose macro arg in parens Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 02/21] mm/memory_hotplug: add {add|release}_memory_pages Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 03/21] mm: Allow DMA mapping of pages which are not online Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 04/21] kernel/user: export free_uid Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 05/21] uapi/misc: add shqueue.h for shared queues Jonathan Lemon
2020-07-28 19:53 ` kernel test robot
2020-07-27 22:44 ` [RFC PATCH v2 06/21] include: add netgpu UAPI and kernel definitions Jonathan Lemon
2020-07-29 1:25 ` kernel test robot
2020-07-27 22:44 ` [RFC PATCH v2 07/21] netdevice: add SETUP_NETGPU to the netdev_bpf structure Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 08/21] skbuff: add a zc_netgpu bitflag Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 09/21] core/skbuff: use skb_zdata for testing whether skb is zerocopy Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 10/21] netgpu: add network/gpu/host dma module Jonathan Lemon
2020-07-28 16:26 ` Greg KH
2020-07-28 17:41 ` Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 11/21] core/skbuff: add page recycling logic for netgpu pages Jonathan Lemon
2020-07-28 16:28 ` Greg KH
2020-07-28 18:00 ` Jonathan Lemon
2020-07-28 18:26 ` Greg KH
2020-07-27 22:44 ` [RFC PATCH v2 12/21] lib: have __zerocopy_sg_from_iter get netgpu pages for a sk Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 13/21] net/tcp: Pad TCP options out to a fixed size for netgpu Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 14/21] net/tcp: add netgpu ioctl setting up zero copy RX queues Jonathan Lemon
2020-07-28 2:16 ` Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 15/21] net/tcp: add MSG_NETDMA flag for sendmsg() Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 16/21] mlx5: remove the umem parameter from mlx5e_open_channel Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 17/21] mlx5e: add header split ability Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 18/21] mlx5e: add netgpu entries to mlx5 structures Jonathan Lemon
2020-07-27 22:44 ` [RFC PATCH v2 19/21] mlx5e: add the netgpu driver functions Jonathan Lemon
2020-07-28 16:27 ` Greg KH
2020-07-27 22:44 ` Jonathan Lemon [this message]
2020-07-27 22:44 ` [RFC PATCH v2 21/21] netgpu/nvidia: add Nvidia plugin for netgpu Jonathan Lemon
2020-07-28 16:31 ` Greg KH
2020-07-28 17:18 ` Chris Mason
2020-07-28 17:27 ` Christoph Hellwig
2020-07-28 18:47 ` Chris Mason
2020-07-28 19:55 ` [RFC PATCH v2 00/21] netgpu: networking between NIC and GPU/CPU Stephen Hemminger
2020-07-28 20:43 ` Jonathan Lemon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200727224444.2987641-21-jonathan.lemon@gmail.com \
--to=jonathan.lemon@gmail.com \
--cc=kernel-team@fb.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.