From: Haiyang Zhang <haiyangz@microsoft.com>
To: linux-hyperv@vger.kernel.org, netdev@vger.kernel.org
Cc: haiyangz@microsoft.com, decui@microsoft.com, kys@microsoft.com,
paulros@microsoft.com, olaf@aepfle.de, vkuznets@redhat.com,
davem@davemloft.net, wei.liu@kernel.org, edumazet@google.com,
kuba@kernel.org, pabeni@redhat.com, leon@kernel.org,
longli@microsoft.com, ssengar@linux.microsoft.com,
linux-rdma@vger.kernel.org, daniel@iogearbox.net,
john.fastabend@gmail.com, bpf@vger.kernel.org, ast@kernel.org,
sharmaajay@microsoft.com, hawk@kernel.org,
linux-kernel@vger.kernel.org
Subject: [PATCH V2,net-next, 2/3] net: mana: Enable RX path to handle various MTU sizes
Date: Fri, 7 Apr 2023 13:59:55 -0700 [thread overview]
Message-ID: <1680901196-20643-3-git-send-email-haiyangz@microsoft.com> (raw)
In-Reply-To: <1680901196-20643-1-git-send-email-haiyangz@microsoft.com>
Update RX data path to allocate and use RX queue DMA buffers with
proper size based on potentially various MTU sizes.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
V2:
Refectored to multiple patches for readability. Suggested by Yunsheng Lin.
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 188 +++++++++++-------
include/net/mana/mana.h | 13 +-
2 files changed, 124 insertions(+), 77 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 112c642dc89b..e5d5dea763f2 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1185,10 +1185,10 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
}
-static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
- struct xdp_buff *xdp)
+static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
+ uint pkt_len, struct xdp_buff *xdp)
{
- struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE);
+ struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
if (!skb)
return NULL;
@@ -1196,11 +1196,12 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
if (xdp->data_hard_start) {
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
- } else {
- skb_reserve(skb, XDP_PACKET_HEADROOM);
- skb_put(skb, pkt_len);
+ return skb;
}
+ skb_reserve(skb, rxq->headroom);
+ skb_put(skb, pkt_len);
+
return skb;
}
@@ -1233,7 +1234,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;
- skb = mana_build_skb(buf_va, pkt_len, &xdp);
+ skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
if (!skb)
goto drop;
@@ -1282,14 +1283,72 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
u64_stats_update_end(&rx_stats->syncp);
drop:
- WARN_ON_ONCE(rxq->xdp_save_page);
- rxq->xdp_save_page = virt_to_page(buf_va);
+ WARN_ON_ONCE(rxq->xdp_save_va);
+ /* Save for reuse */
+ rxq->xdp_save_va = buf_va;
++ndev->stats.rx_dropped;
return;
}
+static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
+ dma_addr_t *da, bool is_napi)
+{
+ struct page *page;
+ void *va;
+
+ /* Reuse XDP dropped page if available */
+ if (rxq->xdp_save_va) {
+ va = rxq->xdp_save_va;
+ rxq->xdp_save_va = NULL;
+ } else if (rxq->alloc_size > PAGE_SIZE) {
+ if (is_napi)
+ va = napi_alloc_frag(rxq->alloc_size);
+ else
+ va = netdev_alloc_frag(rxq->alloc_size);
+
+ if (!va)
+ return NULL;
+ } else {
+ page = dev_alloc_page();
+ if (!page)
+ return NULL;
+
+ va = page_to_virt(page);
+ }
+
+ *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
+ DMA_FROM_DEVICE);
+
+ if (dma_mapping_error(dev, *da)) {
+ put_page(virt_to_head_page(va));
+ return NULL;
+ }
+
+ return va;
+}
+
+/* Allocate frag for rx buffer, and save the old buf */
+static void mana_refill_rxoob(struct device *dev, struct mana_rxq *rxq,
+ struct mana_recv_buf_oob *rxoob, void **old_buf)
+{
+ dma_addr_t da;
+ void *va;
+
+ va = mana_get_rxfrag(rxq, dev, &da, true);
+
+ if (!va)
+ return;
+
+ dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
+ DMA_FROM_DEVICE);
+ *old_buf = rxoob->buf_va;
+
+ rxoob->buf_va = va;
+ rxoob->sgl[0].address = da;
+}
+
static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct gdma_comp *cqe)
{
@@ -1299,10 +1358,8 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct mana_recv_buf_oob *rxbuf_oob;
struct mana_port_context *apc;
struct device *dev = gc->dev;
- void *new_buf, *old_buf;
- struct page *new_page;
+ void *old_buf = NULL;
u32 curr, pktlen;
- dma_addr_t da;
apc = netdev_priv(ndev);
@@ -1345,40 +1402,11 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
- /* Reuse XDP dropped page if available */
- if (rxq->xdp_save_page) {
- new_page = rxq->xdp_save_page;
- rxq->xdp_save_page = NULL;
- } else {
- new_page = alloc_page(GFP_ATOMIC);
- }
-
- if (new_page) {
- da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize,
- DMA_FROM_DEVICE);
-
- if (dma_mapping_error(dev, da)) {
- __free_page(new_page);
- new_page = NULL;
- }
- }
-
- new_buf = new_page ? page_to_virt(new_page) : NULL;
-
- if (new_buf) {
- dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize,
- DMA_FROM_DEVICE);
-
- old_buf = rxbuf_oob->buf_va;
-
- /* refresh the rxbuf_oob with the new page */
- rxbuf_oob->buf_va = new_buf;
- rxbuf_oob->buf_dma_addr = da;
- rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr;
- } else {
- old_buf = NULL; /* drop the packet if no memory */
- }
+ mana_refill_rxoob(dev, rxq, rxbuf_oob, &old_buf);
+ /* Unsuccessful refill will have old_buf == NULL.
+ * In this case, mana_rx_skb() will drop the packet.
+ */
mana_rx_skb(old_buf, oob, rxq);
drop:
@@ -1659,8 +1687,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
mana_deinit_cq(apc, &rxq->rx_cq);
- if (rxq->xdp_save_page)
- __free_page(rxq->xdp_save_page);
+ if (rxq->xdp_save_va)
+ put_page(virt_to_head_page(rxq->xdp_save_va));
for (i = 0; i < rxq->num_rx_buf; i++) {
rx_oob = &rxq->rx_oobs[i];
@@ -1668,10 +1696,10 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
if (!rx_oob->buf_va)
continue;
- dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize,
- DMA_FROM_DEVICE);
+ dma_unmap_single(dev, rx_oob->sgl[0].address,
+ rx_oob->sgl[0].size, DMA_FROM_DEVICE);
- free_page((unsigned long)rx_oob->buf_va);
+ put_page(virt_to_head_page(rx_oob->buf_va));
rx_oob->buf_va = NULL;
}
@@ -1681,6 +1709,26 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
kfree(rxq);
}
+static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
+ struct mana_rxq *rxq, struct device *dev)
+{
+ dma_addr_t da;
+ void *va;
+
+ va = mana_get_rxfrag(rxq, dev, &da, false);
+
+ if (!va)
+ return -ENOMEM;
+
+ rx_oob->buf_va = va;
+
+ rx_oob->sgl[0].address = da;
+ rx_oob->sgl[0].size = rxq->datasize;
+ rx_oob->sgl[0].mem_key = mem_key;
+
+ return 0;
+}
+
#define MANA_WQE_HEADER_SIZE 16
#define MANA_WQE_SGE_SIZE 16
@@ -1690,11 +1738,10 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
struct gdma_context *gc = apc->ac->gdma_dev->gdma_context;
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
- struct page *page;
- dma_addr_t da;
u32 buf_idx;
+ int ret;
- WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
+ WARN_ON(rxq->datasize == 0);
*rxq_size = 0;
*cq_size = 0;
@@ -1703,25 +1750,12 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
rx_oob = &rxq->rx_oobs[buf_idx];
memset(rx_oob, 0, sizeof(*rx_oob));
- page = alloc_page(GFP_KERNEL);
- if (!page)
- return -ENOMEM;
-
- da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize,
- DMA_FROM_DEVICE);
-
- if (dma_mapping_error(dev, da)) {
- __free_page(page);
- return -ENOMEM;
- }
-
- rx_oob->buf_va = page_to_virt(page);
- rx_oob->buf_dma_addr = da;
-
rx_oob->num_sge = 1;
- rx_oob->sgl[0].address = rx_oob->buf_dma_addr;
- rx_oob->sgl[0].size = rxq->datasize;
- rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey;
+
+ ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq,
+ dev);
+ if (ret)
+ return ret;
rx_oob->wqe_req.sgl = rx_oob->sgl;
rx_oob->wqe_req.num_sge = rx_oob->num_sge;
@@ -1764,6 +1798,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
struct mana_obj_spec wq_spec;
struct mana_obj_spec cq_spec;
struct gdma_queue_spec spec;
+ unsigned int mtu = ndev->mtu;
struct mana_cq *cq = NULL;
struct gdma_context *gc;
u32 cq_size, rq_size;
@@ -1780,9 +1815,18 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
rxq->ndev = ndev;
rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE;
rxq->rxq_idx = rxq_idx;
- rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64);
rxq->rxobj = INVALID_MANA_HANDLE;
+ rxq->datasize = ALIGN(mtu + ETH_HLEN, 64);
+
+ if (mtu > MANA_XDP_MTU_MAX) {
+ rxq->alloc_size = mtu + MANA_RXBUF_PAD;
+ rxq->headroom = 0;
+ } else {
+ rxq->alloc_size = mtu + MANA_RXBUF_PAD + XDP_PACKET_HEADROOM;
+ rxq->headroom = XDP_PACKET_HEADROOM;
+ }
+
err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
if (err)
goto out;
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index bb11a6535d80..fee99d704281 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -36,9 +36,6 @@ enum TRI_STATE {
#define COMP_ENTRY_SIZE 64
-#define ADAPTER_MTU_SIZE 1500
-#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
-
#define RX_BUFFERS_PER_QUEUE 512
#define MAX_SEND_BUFFERS_PER_QUEUE 256
@@ -282,7 +279,6 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
- dma_addr_t buf_dma_addr;
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@@ -295,6 +291,11 @@ struct mana_recv_buf_oob {
struct gdma_posted_wqe_info wqe_inf;
};
+#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ + ETH_HLEN)
+
+#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
+
struct mana_rxq {
struct gdma_queue *gdma_rq;
/* Cache the gdma receive queue id */
@@ -304,6 +305,8 @@ struct mana_rxq {
u32 rxq_idx;
u32 datasize;
+ u32 alloc_size;
+ u32 headroom;
mana_handle_t rxobj;
@@ -322,7 +325,7 @@ struct mana_rxq {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
- struct page *xdp_save_page;
+ void *xdp_save_va; /* for reusing */
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
--
2.25.1
next prev parent reply other threads:[~2023-04-07 21:01 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-07 20:59 [PATCH V2,net-next, 0/3] net: mana: Add support for jumbo frame Haiyang Zhang
2023-04-07 20:59 ` [PATCH V2,net-next, 1/3] net: mana: Use napi_build_skb in RX path Haiyang Zhang
2023-04-11 15:06 ` Jesse Brandeburg
2023-04-07 20:59 ` Haiyang Zhang [this message]
2023-04-11 15:06 ` [PATCH V2,net-next, 2/3] net: mana: Enable RX path to handle various MTU sizes Jesse Brandeburg
2023-04-11 23:10 ` Jacob Keller
2023-04-12 14:38 ` Haiyang Zhang
2023-04-07 20:59 ` [PATCH V2,net-next, 3/3] net: mana: Add support for jumbo frame Haiyang Zhang
2023-04-11 15:06 ` Jesse Brandeburg
2023-04-11 23:12 ` Jacob Keller
2023-04-12 14:36 ` Haiyang Zhang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1680901196-20643-3-git-send-email-haiyangz@microsoft.com \
--to=haiyangz@microsoft.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=decui@microsoft.com \
--cc=edumazet@google.com \
--cc=hawk@kernel.org \
--cc=john.fastabend@gmail.com \
--cc=kuba@kernel.org \
--cc=kys@microsoft.com \
--cc=leon@kernel.org \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=netdev@vger.kernel.org \
--cc=olaf@aepfle.de \
--cc=pabeni@redhat.com \
--cc=paulros@microsoft.com \
--cc=sharmaajay@microsoft.com \
--cc=ssengar@linux.microsoft.com \
--cc=vkuznets@redhat.com \
--cc=wei.liu@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).