From: Dipayaan Roy <dipayanroy@linux.microsoft.com>
To: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
decui@microsoft.com, andrew+netdev@lunn.ch, davem@davemloft.net,
edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
leon@kernel.org, longli@microsoft.com, kotaranov@microsoft.com,
horms@kernel.org, shradhagupta@linux.microsoft.com,
ssengar@linux.microsoft.com, ernis@linux.microsoft.com,
shirazsaleem@microsoft.com, linux-hyperv@vger.kernel.org,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-rdma@vger.kernel.org, stephen@networkplumber.org,
jacob.e.keller@intel.com, leitao@debian.org, kees@kernel.org,
john.fastabend@gmail.com, hawk@kernel.org, bpf@vger.kernel.org,
daniel@iogearbox.net, ast@kernel.org, sdf@fomichev.me,
dipayanroy@microsoft.com
Subject: [PATCH net-next v6 2/2] net: mana: force full-page RX buffers via ethtool private flag
Date: Tue, 7 Apr 2026 12:59:19 -0700 [thread overview]
Message-ID: <20260407200216.272659-3-dipayanroy@linux.microsoft.com> (raw)
In-Reply-To: <20260407200216.272659-1-dipayanroy@linux.microsoft.com>
On some ARM64 platforms with 4K PAGE_SIZE, page_pool fragment
allocation in the RX refill path can cause 15-20% throughput
regression under high connection counts (>16 TCP streams).
Add an ethtool private flag "full-page-rx" that allows the user to
force one RX buffer per page, bypassing the page_pool fragment path.
This restores line-rate (180+ Gbps) performance on affected platforms.
Usage:
ethtool --set-priv-flags eth0 full-page-rx on
There is no behavioral change by default. The flag must be explicitly
enabled by the user or udev rule.
The existing single-buffer-per-page logic for XDP and jumbo frames is
consolidated into a new helper mana_use_single_rxbuf_per_page() which
is now the single decision point for both the automatic and
user-controlled paths.
Signed-off-by: Dipayaan Roy <dipayanroy@linux.microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 22 ++++-
.../ethernet/microsoft/mana/mana_ethtool.c | 89 +++++++++++++++++++
include/net/mana/mana.h | 8 ++
3 files changed, 117 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 49c65cc1697c..59a1626c2be1 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -744,6 +744,25 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da)
return va;
}
+static bool
+mana_use_single_rxbuf_per_page(struct mana_port_context *apc, u32 mtu)
+{
+ /* On some platforms with 4K PAGE_SIZE, page_pool fragment allocation
+ * in the RX refill path (~2kB buffer) can cause significant throughput
+ * regression under high connection counts. Allow user to force one RX
+ * buffer per page via ethtool private flag to bypass the fragment
+ * path.
+ */
+ if (apc->priv_flags & BIT(MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF))
+ return true;
+
+ /* For xdp and jumbo frames make sure only one packet fits per page. */
+ if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc))
+ return true;
+
+ return false;
+}
+
/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
static void mana_get_rxbuf_cfg(struct mana_port_context *apc,
int mtu, u32 *datasize, u32 *alloc_size,
@@ -754,8 +773,7 @@ static void mana_get_rxbuf_cfg(struct mana_port_context *apc,
/* Calculate datasize first (consistent across all cases) */
*datasize = mtu + ETH_HLEN;
- /* For xdp and jumbo frames make sure only one packet fits per page */
- if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) {
+ if (mana_use_single_rxbuf_per_page(apc, mtu)) {
if (mana_xdp_get(apc)) {
*headroom = XDP_PACKET_HEADROOM;
*alloc_size = PAGE_SIZE;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a28ca461c135..0547c903f613 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -133,6 +133,10 @@ static const struct mana_stats_desc mana_phy_stats[] = {
{ "hc_tc7_tx_pause_phy", offsetof(struct mana_ethtool_phy_stats, tx_pause_tc7_phy) },
};
+static const char mana_priv_flags[MANA_PRIV_FLAG_MAX][ETH_GSTRING_LEN] = {
+ [MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF] = "full-page-rx"
+};
+
static int mana_get_sset_count(struct net_device *ndev, int stringset)
{
struct mana_port_context *apc = netdev_priv(ndev);
@@ -144,6 +148,10 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
ARRAY_SIZE(mana_phy_stats) +
ARRAY_SIZE(mana_hc_stats) +
num_queues * (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT);
+
+ case ETH_SS_PRIV_FLAGS:
+ return MANA_PRIV_FLAG_MAX;
+
default:
return -EINVAL;
}
@@ -192,6 +200,14 @@ static void mana_get_strings_stats(struct mana_port_context *apc, u8 **data)
}
}
+static void mana_get_strings_priv_flags(u8 **data)
+{
+ int i;
+
+ for (i = 0; i < MANA_PRIV_FLAG_MAX; i++)
+ ethtool_puts(data, mana_priv_flags[i]);
+}
+
static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
{
struct mana_port_context *apc = netdev_priv(ndev);
@@ -200,6 +216,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
case ETH_SS_STATS:
mana_get_strings_stats(apc, &data);
break;
+ case ETH_SS_PRIV_FLAGS:
+ mana_get_strings_priv_flags(&data);
+ break;
default:
break;
}
@@ -590,6 +609,74 @@ static int mana_get_link_ksettings(struct net_device *ndev,
return 0;
}
+static u32 mana_get_priv_flags(struct net_device *ndev)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+
+ return apc->priv_flags;
+}
+
+static int mana_set_priv_flags(struct net_device *ndev, u32 priv_flags)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+ u32 changed = apc->priv_flags ^ priv_flags;
+ u32 old_priv_flags = apc->priv_flags;
+ bool schedule_port_reset = false;
+ int err = 0;
+
+ if (!changed)
+ return 0;
+
+ /* Reject unknown bits */
+ if (priv_flags & ~GENMASK(MANA_PRIV_FLAG_MAX - 1, 0))
+ return -EINVAL;
+
+ if (changed & BIT(MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF)) {
+ apc->priv_flags = priv_flags;
+
+ if (!apc->port_is_up) {
+ /* Port is down, flag updated to apply on next up
+ * so just return.
+ */
+ return 0;
+ }
+
+ /* Pre-allocate buffers to prevent failure in mana_attach
+ * later
+ */
+ err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues);
+ if (err) {
+ netdev_err(ndev,
+ "Insufficient memory for new allocations\n");
+ apc->priv_flags = old_priv_flags;
+ return err;
+ }
+
+ err = mana_detach(ndev, false);
+ if (err) {
+ netdev_err(ndev, "mana_detach failed: %d\n", err);
+ apc->priv_flags = old_priv_flags;
+ goto out;
+ }
+
+ err = mana_attach(ndev);
+ if (err) {
+ netdev_err(ndev, "mana_attach failed: %d\n", err);
+ apc->priv_flags = old_priv_flags;
+ schedule_port_reset = true;
+ }
+ }
+
+out:
+ mana_pre_dealloc_rxbufs(apc);
+
+ if (err && schedule_port_reset)
+ queue_work(apc->ac->per_port_queue_reset_wq,
+ &apc->queue_reset_work);
+
+ return err;
+}
+
const struct ethtool_ops mana_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_RX_CQE_FRAMES,
.get_ethtool_stats = mana_get_ethtool_stats,
@@ -608,4 +695,6 @@ const struct ethtool_ops mana_ethtool_ops = {
.set_ringparam = mana_set_ringparam,
.get_link_ksettings = mana_get_link_ksettings,
.get_link = ethtool_op_get_link,
+ .get_priv_flags = mana_get_priv_flags,
+ .set_priv_flags = mana_set_priv_flags,
};
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 3336688fed5e..fd87e3d6c1f4 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -30,6 +30,12 @@ enum TRI_STATE {
TRI_STATE_TRUE = 1
};
+/* MANA ethtool private flag bit positions */
+enum mana_priv_flag_bits {
+ MANA_PRIV_FLAG_USE_FULL_PAGE_RXBUF = 0,
+ MANA_PRIV_FLAG_MAX,
+};
+
/* Number of entries for hardware indirection table must be in power of 2 */
#define MANA_INDIRECT_TABLE_MAX_SIZE 512
#define MANA_INDIRECT_TABLE_DEF_SIZE 64
@@ -531,6 +537,8 @@ struct mana_port_context {
u32 rxbpre_headroom;
u32 rxbpre_frag_count;
+ u32 priv_flags;
+
struct bpf_prog *bpf_prog;
/* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
--
2.43.0
next prev parent reply other threads:[~2026-04-07 20:02 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-07 19:59 [PATCH net-next v6 0/2] net: mana: add ethtool private flag for full-page RX buffers Dipayaan Roy
2026-04-07 19:59 ` [PATCH net-next v6 1/2] net: mana: refactor mana_get_strings() and mana_get_sset_count() to use switch Dipayaan Roy
2026-04-07 19:59 ` Dipayaan Roy [this message]
2026-04-10 1:35 ` [PATCH net-next v6 0/2] net: mana: add ethtool private flag for full-page RX buffers Jakub Kicinski
2026-04-12 19:59 ` Jakub Kicinski
2026-04-14 16:00 ` Dipayaan Roy
2026-04-16 15:31 ` Jakub Kicinski
2026-04-23 12:48 ` Dipayaan Roy
2026-04-23 16:33 ` Jakub Kicinski
2026-04-24 16:24 ` David Wei
2026-04-24 20:05 ` David Wei
2026-04-25 8:05 ` Dipayaan Roy
2026-04-27 16:01 ` David Wei
2026-04-27 20:17 ` Jakub Kicinski
2026-05-06 16:52 ` Dipayaan Roy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260407200216.272659-3-dipayanroy@linux.microsoft.com \
--to=dipayanroy@linux.microsoft.com \
--cc=andrew+netdev@lunn.ch \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=decui@microsoft.com \
--cc=dipayanroy@microsoft.com \
--cc=edumazet@google.com \
--cc=ernis@linux.microsoft.com \
--cc=haiyangz@microsoft.com \
--cc=hawk@kernel.org \
--cc=horms@kernel.org \
--cc=jacob.e.keller@intel.com \
--cc=john.fastabend@gmail.com \
--cc=kees@kernel.org \
--cc=kotaranov@microsoft.com \
--cc=kuba@kernel.org \
--cc=kys@microsoft.com \
--cc=leitao@debian.org \
--cc=leon@kernel.org \
--cc=linux-hyperv@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=longli@microsoft.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=sdf@fomichev.me \
--cc=shirazsaleem@microsoft.com \
--cc=shradhagupta@linux.microsoft.com \
--cc=ssengar@linux.microsoft.com \
--cc=stephen@networkplumber.org \
--cc=wei.liu@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.