* [PATCH net-next, 1/2] net: mana: Add support for coalesced RX packets on CQE
2026-01-02 21:35 [PATCH net-next, 0/2] net: mana: Add support for coalesced RX packets Haiyang Zhang
@ 2026-01-02 21:35 ` Haiyang Zhang
2026-01-03 0:11 ` Jakub Kicinski
2026-01-02 21:35 ` [PATCH net-next, 2/2] net: mana: Add ethtool counters for RX CQEs in coalesced type Haiyang Zhang
1 sibling, 1 reply; 4+ messages in thread
From: Haiyang Zhang @ 2026-01-02 21:35 UTC (permalink / raw)
To: linux-hyperv, netdev, K. Y. Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Long Li, Konstantin Taranov,
Simon Horman, Erni Sri Satya Vennela, Shradha Gupta,
Saurabh Sengar, Aditya Garg, Dipayaan Roy, Shiraz Saleem,
linux-kernel, linux-rdma
Cc: paulros
From: Haiyang Zhang <haiyangz@microsoft.com>
Our NIC can have up to 4 RX packets on 1 CQE. To support this feature,
check and process the type CQE_RX_COALESCED_4. The default setting is
disabled, to avoid possible regression on latency.
And add ethtool handler to switch this feature. To turn it on, run:
ethtool -C <nic> rx-frames 4
To turn it off:
ethtool -C <nic> rx-frames 1
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 32 ++++++-----
.../ethernet/microsoft/mana/mana_ethtool.c | 55 +++++++++++++++++++
include/net/mana/mana.h | 2 +
3 files changed, 74 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 1ad154f9db1a..a46a1adf83bc 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1330,7 +1330,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
req->update_hashkey = update_key;
req->update_indir_tab = update_tab;
req->default_rxobj = apc->default_rxobj;
- req->cqe_coalescing_enable = 0;
+ req->cqe_coalescing_enable = apc->cqe_coalescing_enable;
if (update_key)
memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE);
@@ -1864,11 +1864,12 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
}
static void mana_rx_skb(void *buf_va, bool from_pool,
- struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq)
+ struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq,
+ int i)
{
struct mana_stats_rx *rx_stats = &rxq->stats;
struct net_device *ndev = rxq->ndev;
- uint pkt_len = cqe->ppi[0].pkt_len;
+ uint pkt_len = cqe->ppi[i].pkt_len;
u16 rxq_idx = rxq->rxq_idx;
struct napi_struct *napi;
struct xdp_buff xdp = {};
@@ -1912,7 +1913,7 @@ static void mana_rx_skb(void *buf_va, bool from_pool,
}
if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) {
- hash_value = cqe->ppi[0].pkt_hash;
+ hash_value = cqe->ppi[i].pkt_hash;
if (cqe->rx_hashtype & MANA_HASH_L4)
skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4);
@@ -2047,9 +2048,11 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct mana_recv_buf_oob *rxbuf_oob;
struct mana_port_context *apc;
struct device *dev = gc->dev;
+ bool coalesced = false;
void *old_buf = NULL;
u32 curr, pktlen;
bool old_fp;
+ int i = 0;
apc = netdev_priv(ndev);
@@ -2064,9 +2067,8 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
goto drop;
case CQE_RX_COALESCED_4:
- netdev_err(ndev, "RX coalescing is unsupported\n");
- apc->eth_stats.rx_coalesced_err++;
- return;
+ coalesced = true;
+ break;
case CQE_RX_OBJECT_FENCE:
complete(&rxq->fence_event);
@@ -2079,14 +2081,10 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
return;
}
- pktlen = oob->ppi[0].pkt_len;
-
- if (pktlen == 0) {
- /* data packets should never have packetlength of zero */
- netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
- rxq->gdma_id, cq->gdma_id, rxq->rxobj);
+nextpkt:
+ pktlen = oob->ppi[i].pkt_len;
+ if (pktlen == 0)
return;
- }
curr = rxq->buf_index;
rxbuf_oob = &rxq->rx_oobs[curr];
@@ -2097,12 +2095,15 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
/* Unsuccessful refill will have old_buf == NULL.
* In this case, mana_rx_skb() will drop the packet.
*/
- mana_rx_skb(old_buf, old_fp, oob, rxq);
+ mana_rx_skb(old_buf, old_fp, oob, rxq, i);
drop:
mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
mana_post_pkt_rxq(rxq);
+
+ if (coalesced && (++i < MANA_RXCOMP_OOB_NUM_PPI))
+ goto nextpkt;
}
static void mana_poll_rx_cq(struct mana_cq *cq)
@@ -3276,6 +3277,7 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
apc->port_handle = INVALID_MANA_HANDLE;
apc->pf_filter_handle = INVALID_MANA_HANDLE;
apc->port_idx = port_idx;
+ apc->cqe_coalescing_enable = 0;
mutex_init(&apc->vport_mutex);
apc->vport_use_count = 0;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 0e2f4343ac67..1b9ed5c9bbff 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -397,6 +397,58 @@ static void mana_get_channels(struct net_device *ndev,
channel->combined_count = apc->num_queues;
}
+static int mana_get_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+
+ ec->rx_max_coalesced_frames =
+ apc->cqe_coalescing_enable ? MANA_RXCOMP_OOB_NUM_PPI : 1;
+
+ return 0;
+}
+
+static int mana_set_coalesce(struct net_device *ndev,
+ struct ethtool_coalesce *ec,
+ struct kernel_ethtool_coalesce *kernel_coal,
+ struct netlink_ext_ack *extack)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+ u8 saved_cqe_coalescing_enable;
+ int err;
+
+ if (ec->rx_max_coalesced_frames != 1 &&
+ ec->rx_max_coalesced_frames != MANA_RXCOMP_OOB_NUM_PPI) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "rx-frames must be 1 or %u, got %u",
+ MANA_RXCOMP_OOB_NUM_PPI,
+ ec->rx_max_coalesced_frames);
+ return -EINVAL;
+ }
+
+ saved_cqe_coalescing_enable = apc->cqe_coalescing_enable;
+ apc->cqe_coalescing_enable =
+ ec->rx_max_coalesced_frames == MANA_RXCOMP_OOB_NUM_PPI;
+
+ if (!apc->port_is_up)
+ return 0;
+
+ err = mana_config_rss(apc, TRI_STATE_TRUE, false, false);
+
+ if (err) {
+ netdev_err(ndev, "Set rx-frames to %u failed:%d\n",
+ ec->rx_max_coalesced_frames, err);
+ NL_SET_ERR_MSG_FMT(extack, "Set rx-frames to %u failed:%d\n",
+ ec->rx_max_coalesced_frames, err);
+
+ apc->cqe_coalescing_enable = saved_cqe_coalescing_enable;
+ }
+
+ return err;
+}
+
static int mana_set_channels(struct net_device *ndev,
struct ethtool_channels *channels)
{
@@ -517,6 +569,7 @@ static int mana_get_link_ksettings(struct net_device *ndev,
}
const struct ethtool_ops mana_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES,
.get_ethtool_stats = mana_get_ethtool_stats,
.get_sset_count = mana_get_sset_count,
.get_strings = mana_get_strings,
@@ -527,6 +580,8 @@ const struct ethtool_ops mana_ethtool_ops = {
.set_rxfh = mana_set_rxfh,
.get_channels = mana_get_channels,
.set_channels = mana_set_channels,
+ .get_coalesce = mana_get_coalesce,
+ .set_coalesce = mana_set_coalesce,
.get_ringparam = mana_get_ringparam,
.set_ringparam = mana_set_ringparam,
.get_link_ksettings = mana_get_link_ksettings,
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index d7e089c6b694..51d26ebeff6c 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -556,6 +556,8 @@ struct mana_port_context {
bool port_is_up;
bool port_st_save; /* Saved port state */
+ u8 cqe_coalescing_enable;
+
struct mana_ethtool_stats eth_stats;
struct mana_ethtool_phy_stats phy_stats;
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH net-next, 2/2] net: mana: Add ethtool counters for RX CQEs in coalesced type
2026-01-02 21:35 [PATCH net-next, 0/2] net: mana: Add support for coalesced RX packets Haiyang Zhang
2026-01-02 21:35 ` [PATCH net-next, 1/2] net: mana: Add support for coalesced RX packets on CQE Haiyang Zhang
@ 2026-01-02 21:35 ` Haiyang Zhang
1 sibling, 0 replies; 4+ messages in thread
From: Haiyang Zhang @ 2026-01-02 21:35 UTC (permalink / raw)
To: linux-hyperv, netdev, K. Y. Srinivasan, Haiyang Zhang, Wei Liu,
Dexuan Cui, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Long Li, Konstantin Taranov,
Simon Horman, Erni Sri Satya Vennela, Shradha Gupta,
Saurabh Sengar, Aditya Garg, Dipayaan Roy, Shiraz Saleem,
linux-kernel, linux-rdma
Cc: paulros
From: Haiyang Zhang <haiyangz@microsoft.com>
For RX CQEs with type CQE_RX_COALESCED_4, to measure the coalescing
efficiency, add counters to count how many contains 2, 3, 4 packets
respectively.
Also, add a counter for the error case of first packet with length == 0.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 25 +++++++++++++++++--
.../ethernet/microsoft/mana/mana_ethtool.c | 17 ++++++++++---
include/net/mana/mana.h | 10 +++++---
3 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a46a1adf83bc..78824567d80b 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -2083,8 +2083,22 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
nextpkt:
pktlen = oob->ppi[i].pkt_len;
- if (pktlen == 0)
+ if (pktlen == 0) {
+ /* Collect coalesced CQE count based on packets processed.
+ * Coalesced CQEs have at least 2 packets, so index is i - 2.
+ */
+ if (i > 1) {
+ u64_stats_update_begin(&rxq->stats.syncp);
+ rxq->stats.coalesced_cqe[i - 2]++;
+ u64_stats_update_end(&rxq->stats.syncp);
+ } else if (i == 0) {
+ /* Error case stat */
+ u64_stats_update_begin(&rxq->stats.syncp);
+ rxq->stats.pkt_len0_err++;
+ u64_stats_update_end(&rxq->stats.syncp);
+ }
return;
+ }
curr = rxq->buf_index;
rxbuf_oob = &rxq->rx_oobs[curr];
@@ -2102,8 +2116,15 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
mana_post_pkt_rxq(rxq);
- if (coalesced && (++i < MANA_RXCOMP_OOB_NUM_PPI))
+ if (!coalesced)
+ return;
+
+ if (++i < MANA_RXCOMP_OOB_NUM_PPI)
goto nextpkt;
+
+ u64_stats_update_begin(&rxq->stats.syncp);
+ rxq->stats.coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 2]++;
+ u64_stats_update_end(&rxq->stats.syncp);
}
static void mana_poll_rx_cq(struct mana_cq *cq)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index 1b9ed5c9bbff..773f50b1a4f4 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -20,8 +20,6 @@ static const struct mana_stats_desc mana_eth_stats[] = {
tx_cqe_unknown_type)},
{"tx_linear_pkt_cnt", offsetof(struct mana_ethtool_stats,
tx_linear_pkt_cnt)},
- {"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
- rx_coalesced_err)},
{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
rx_cqe_unknown_type)},
};
@@ -151,7 +149,7 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
{
struct mana_port_context *apc = netdev_priv(ndev);
unsigned int num_queues = apc->num_queues;
- int i;
+ int i, j;
if (stringset != ETH_SS_STATS)
return;
@@ -170,6 +168,9 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
ethtool_sprintf(&data, "rx_%d_xdp_drop", i);
ethtool_sprintf(&data, "rx_%d_xdp_tx", i);
ethtool_sprintf(&data, "rx_%d_xdp_redirect", i);
+ ethtool_sprintf(&data, "rx_%d_pkt_len0_err", i);
+ for (j = 0; j < MANA_RXCOMP_OOB_NUM_PPI - 1; j++)
+ ethtool_sprintf(&data, "rx_%d_coalesced_cqe_%d", i, j + 2);
}
for (i = 0; i < num_queues; i++) {
@@ -203,6 +204,8 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
u64 xdp_xmit;
u64 xdp_drop;
u64 xdp_tx;
+ u64 pkt_len0_err;
+ u64 coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 1];
u64 tso_packets;
u64 tso_bytes;
u64 tso_inner_packets;
@@ -211,7 +214,7 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
u64 short_pkt_fmt;
u64 csum_partial;
u64 mana_map_err;
- int q, i = 0;
+ int q, i = 0, j;
if (!apc->port_is_up)
return;
@@ -241,6 +244,9 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
xdp_drop = rx_stats->xdp_drop;
xdp_tx = rx_stats->xdp_tx;
xdp_redirect = rx_stats->xdp_redirect;
+ pkt_len0_err = rx_stats->pkt_len0_err;
+ for (j = 0; j < MANA_RXCOMP_OOB_NUM_PPI - 1; j++)
+ coalesced_cqe[j] = rx_stats->coalesced_cqe[j];
} while (u64_stats_fetch_retry(&rx_stats->syncp, start));
data[i++] = packets;
@@ -248,6 +254,9 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
data[i++] = xdp_drop;
data[i++] = xdp_tx;
data[i++] = xdp_redirect;
+ data[i++] = pkt_len0_err;
+ for (j = 0; j < MANA_RXCOMP_OOB_NUM_PPI - 1; j++)
+ data[i++] = coalesced_cqe[j];
}
for (q = 0; q < num_queues; q++) {
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 51d26ebeff6c..f8dd19860103 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -61,8 +61,11 @@ enum TRI_STATE {
#define MAX_PORTS_IN_MANA_DEV 256
+/* Maximum number of packets per coalesced CQE */
+#define MANA_RXCOMP_OOB_NUM_PPI 4
+
/* Update this count whenever the respective structures are changed */
-#define MANA_STATS_RX_COUNT 5
+#define MANA_STATS_RX_COUNT (6 + MANA_RXCOMP_OOB_NUM_PPI - 1)
#define MANA_STATS_TX_COUNT 11
#define MANA_RX_FRAG_ALIGNMENT 64
@@ -73,6 +76,8 @@ struct mana_stats_rx {
u64 xdp_drop;
u64 xdp_tx;
u64 xdp_redirect;
+ u64 pkt_len0_err;
+ u64 coalesced_cqe[MANA_RXCOMP_OOB_NUM_PPI - 1];
struct u64_stats_sync syncp;
};
@@ -227,8 +232,6 @@ struct mana_rxcomp_perpkt_info {
u32 pkt_hash;
}; /* HW DATA */
-#define MANA_RXCOMP_OOB_NUM_PPI 4
-
/* Receive completion OOB */
struct mana_rxcomp_oob {
struct mana_cqe_header cqe_hdr;
@@ -378,7 +381,6 @@ struct mana_ethtool_stats {
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
u64 tx_linear_pkt_cnt;
- u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread