All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 0/3] Support PERF MGMT for RXE
@ 2026-04-08  0:09 zhenwei pi
  2026-04-08  0:09 ` [PATCH v5 1/3] RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT zhenwei pi
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: zhenwei pi @ 2026-04-08  0:09 UTC (permalink / raw)
  To: linux-kernel, linux-rdma; +Cc: zyjzyj2000, jgg, leon, zhenwei pi

v5:
- remove patch "RDMA/core: Fix memory free for GID table", it was
  applied by Jason separately.
- suggested by Yanjun, use 'skb_network_offset' to calculate the
  length of received packets.

v4:
- drop rxe_ib_device_get_netdev and RXE_PORT, use 1 instead
- avoid UAF to get skb length
- remove one-line wrapper rxe_counter_get, use atomic64_read instead
- fix memory free for GID table, this is a new patch in this series.

v3:
- merge 'RDMA/rxe: use rxe_counter_get' into previous commit
- zero *out* MAD memory
- return success with error status rather than failure to avoid
  uplayer hang

v2:
- Fix overflow for PMA counter *link_downed_counter*
- Use *rxe_counter_get* instead of *atomic64_read* for hw-counters

v1:
Support PERF MGMT for RXE, add sent/received bytes for RXE counters,
also improve coding style.

zhenwei pi (3):
  RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT
  RDMA/rxe: add SENT/RCVD bytes
  RDMA/rxe: support perf mgmt GET method

 drivers/infiniband/sw/rxe/Makefile          |   1 +
 drivers/infiniband/sw/rxe/rxe_hw_counters.c |   2 +
 drivers/infiniband/sw/rxe/rxe_hw_counters.h |   2 +
 drivers/infiniband/sw/rxe/rxe_loc.h         |   6 ++
 drivers/infiniband/sw/rxe/rxe_mad.c         | 101 ++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_mcast.c       |   4 +-
 drivers/infiniband/sw/rxe/rxe_net.c         |   9 +-
 drivers/infiniband/sw/rxe/rxe_recv.c        |   2 +
 drivers/infiniband/sw/rxe/rxe_verbs.c       |   5 +-
 drivers/infiniband/sw/rxe/rxe_verbs.h       |  10 +-
 10 files changed, 129 insertions(+), 13 deletions(-)
 create mode 100644 drivers/infiniband/sw/rxe/rxe_mad.c

-- 
2.43.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v5 1/3] RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT
  2026-04-08  0:09 [PATCH v5 0/3] Support PERF MGMT for RXE zhenwei pi
@ 2026-04-08  0:09 ` zhenwei pi
  2026-04-08  0:09 ` [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes zhenwei pi
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 9+ messages in thread
From: zhenwei pi @ 2026-04-08  0:09 UTC (permalink / raw)
  To: linux-kernel, linux-rdma; +Cc: zyjzyj2000, jgg, leon, zhenwei pi

Suggested by Leon, remove the rxe_ib_device_get_netdev() wrapper and
the RXE_PORT definition. These additions do not improve readability,
and RXE has always had only a single port.

Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
---
 drivers/infiniband/sw/rxe/rxe_mcast.c | 4 ++--
 drivers/infiniband/sw/rxe/rxe_net.c   | 7 +++----
 drivers/infiniband/sw/rxe/rxe_verbs.c | 4 ++--
 drivers/infiniband/sw/rxe/rxe_verbs.h | 6 ------
 4 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index 5cad72073eca..acd03bd87794 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -34,7 +34,7 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
 	struct net_device *ndev;
 	int ret;
 
-	ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+	ndev = ib_device_get_netdev(&rxe->ib_dev, 1);
 	if (!ndev)
 		return -ENODEV;
 
@@ -59,7 +59,7 @@ static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
 	struct net_device *ndev;
 	int ret;
 
-	ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+	ndev = ib_device_get_netdev(&rxe->ib_dev, 1);
 	if (!ndev)
 		return -ENODEV;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 211bd3000acc..6621d01ac32d 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -602,7 +602,7 @@ const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num)
 	struct net_device *ndev;
 	char *ndev_name;
 
-	ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+	ndev = ib_device_get_netdev(&rxe->ib_dev, 1);
 	if (!ndev)
 		return NULL;
 	ndev_name = ndev->name;
@@ -646,12 +646,11 @@ static void rxe_sock_put(struct sock *sk,
 
 void rxe_net_del(struct ib_device *dev)
 {
-	struct rxe_dev *rxe = container_of(dev, struct rxe_dev, ib_dev);
 	struct net_device *ndev;
 	struct sock *sk;
 	struct net *net;
 
-	ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+	ndev = ib_device_get_netdev(dev, 1);
 	if (!ndev)
 		return;
 
@@ -699,7 +698,7 @@ void rxe_set_port_state(struct rxe_dev *rxe)
 {
 	struct net_device *ndev;
 
-	ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
+	ndev = ib_device_get_netdev(&rxe->ib_dev, 1);
 	if (!ndev)
 		return;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 4d4891dc2884..eb17b6086d5e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -50,7 +50,7 @@ static int rxe_query_port(struct ib_device *ibdev,
 		goto err_out;
 	}
 
-	ndev = rxe_ib_device_get_netdev(ibdev);
+	ndev = ib_device_get_netdev(ibdev, 1);
 	if (!ndev) {
 		err = -ENODEV;
 		goto err_out;
@@ -1441,7 +1441,7 @@ static int rxe_enable_driver(struct ib_device *ib_dev)
 	struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
 	struct net_device *ndev;
 
-	ndev = rxe_ib_device_get_netdev(ib_dev);
+	ndev = ib_device_get_netdev(ib_dev, 1);
 	if (!ndev)
 		return -ENODEV;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index d92f80d16f78..e800545d1046 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -415,7 +415,6 @@ struct rxe_port {
 	u32			qp_gsi_index;
 };
 
-#define	RXE_PORT	1
 struct rxe_dev {
 	struct ib_device	ib_dev;
 	struct ib_device_attr	attr;
@@ -451,11 +450,6 @@ struct rxe_dev {
 	struct rxe_port		port;
 };
 
-static inline struct net_device *rxe_ib_device_get_netdev(struct ib_device *dev)
-{
-	return ib_device_get_netdev(dev, RXE_PORT);
-}
-
 static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index)
 {
 	atomic64_inc(&rxe->stats_counters[index]);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes
  2026-04-08  0:09 [PATCH v5 0/3] Support PERF MGMT for RXE zhenwei pi
  2026-04-08  0:09 ` [PATCH v5 1/3] RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT zhenwei pi
@ 2026-04-08  0:09 ` zhenwei pi
  2026-04-10 22:37   ` yanjun.zhu
  2026-04-08  0:09 ` [PATCH v5 3/3] RDMA/rxe: support perf mgmt GET method zhenwei pi
  2026-04-09  5:26 ` [PATCH v5 0/3] Support PERF MGMT for RXE Zhu Yanjun
  3 siblings, 1 reply; 9+ messages in thread
From: zhenwei pi @ 2026-04-08  0:09 UTC (permalink / raw)
  To: linux-kernel, linux-rdma; +Cc: zyjzyj2000, jgg, leon, zhenwei pi

There is a lack of sent/received counter in bytes.

Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
---
 drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 ++
 drivers/infiniband/sw/rxe/rxe_hw_counters.h | 2 ++
 drivers/infiniband/sw/rxe/rxe_net.c         | 2 ++
 drivers/infiniband/sw/rxe/rxe_recv.c        | 2 ++
 drivers/infiniband/sw/rxe/rxe_verbs.h       | 6 ++++++
 5 files changed, 14 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index 437917a7d8f2..17edaa9a9b9b 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -22,6 +22,8 @@ static const struct rdma_stat_desc rxe_counter_descs[] = {
 	[RXE_CNT_LINK_DOWNED].name         =  "link_downed",
 	[RXE_CNT_RDMA_SEND].name           =  "rdma_sends",
 	[RXE_CNT_RDMA_RECV].name           =  "rdma_recvs",
+	[RXE_CNT_SENT_BYTES].name          =  "sent_bytes",
+	[RXE_CNT_RCVD_BYTES].name          =  "rcvd_bytes",
 };
 
 int rxe_ib_get_hw_stats(struct ib_device *ibdev,
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index 051f9e1c3852..01b355103cbc 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -26,6 +26,8 @@ enum rxe_counters {
 	RXE_CNT_LINK_DOWNED,
 	RXE_CNT_RDMA_SEND,
 	RXE_CNT_RDMA_RECV,
+	RXE_CNT_SENT_BYTES,
+	RXE_CNT_RCVD_BYTES,
 	RXE_NUM_OF_COUNTERS
 };
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 6621d01ac32d..86660031ffa2 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -503,6 +503,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	int err;
 	int is_request = pkt->mask & RXE_REQ_MASK;
 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+	unsigned int skblen = skb->len;
 	unsigned long flags;
 
 	spin_lock_irqsave(&qp->state_lock, flags);
@@ -526,6 +527,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	}
 
 	rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
+	rxe_counter_add(rxe, RXE_CNT_SENT_BYTES, skblen);
 	goto done;
 
 drop:
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 5861e4244049..e7bab89e7d8d 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -318,6 +318,7 @@ void rxe_rcv(struct sk_buff *skb)
 	int err;
 	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
 	struct rxe_dev *rxe = pkt->rxe;
+	unsigned int skblen = skb->len - skb_network_offset(skb);
 
 	if (unlikely(skb->len < RXE_BTH_BYTES))
 		goto drop;
@@ -341,6 +342,7 @@ void rxe_rcv(struct sk_buff *skb)
 	if (unlikely(err))
 		goto drop;
 
+	rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);
 	rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
 
 	if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index e800545d1046..0f5ffd94643f 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -455,6 +455,12 @@ static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index)
 	atomic64_inc(&rxe->stats_counters[index]);
 }
 
+static inline void rxe_counter_add(struct rxe_dev *rxe, enum rxe_counters index,
+				   s64 val)
+{
+	atomic64_add(val, &rxe->stats_counters[index]);
+}
+
 static inline struct rxe_dev *to_rdev(struct ib_device *dev)
 {
 	return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v5 3/3] RDMA/rxe: support perf mgmt GET method
  2026-04-08  0:09 [PATCH v5 0/3] Support PERF MGMT for RXE zhenwei pi
  2026-04-08  0:09 ` [PATCH v5 1/3] RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT zhenwei pi
  2026-04-08  0:09 ` [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes zhenwei pi
@ 2026-04-08  0:09 ` zhenwei pi
  2026-04-09  5:26 ` [PATCH v5 0/3] Support PERF MGMT for RXE Zhu Yanjun
  3 siblings, 0 replies; 9+ messages in thread
From: zhenwei pi @ 2026-04-08  0:09 UTC (permalink / raw)
  To: linux-kernel, linux-rdma; +Cc: zyjzyj2000, jgg, leon, zhenwei pi

In RXE, hardware counters are already supported, but not in a
standardized manner. For instance, user-space monitoring tools like
atop only read from the *counters* directory. Therefore, it is
necessary to add perf management support to RXE.

Also use rxe_counter_get instead of raw atomic64_read in hw-counters.

Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
---
 drivers/infiniband/sw/rxe/Makefile    |   1 +
 drivers/infiniband/sw/rxe/rxe_loc.h   |   6 ++
 drivers/infiniband/sw/rxe/rxe_mad.c   | 101 ++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_verbs.c |   1 +
 4 files changed, 109 insertions(+)
 create mode 100644 drivers/infiniband/sw/rxe/rxe_mad.c

diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 3977f4f13258..e097c1ca1874 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -23,6 +23,7 @@ rdma_rxe-y := \
 	rxe_task.o \
 	rxe_net.o \
 	rxe_hw_counters.o \
+	rxe_mad.o \
 	rxe_ns.o
 
 rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index e095c12699cb..64d636bf80fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -242,4 +242,10 @@ static inline int rxe_ib_advise_mr(struct ib_pd *pd,
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
+/* rxe-mad.c */
+int rxe_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad *in, struct ib_mad *out,
+		    size_t *out_mad_size, u16 *out_mad_pkey_index);
+
 #endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mad.c b/drivers/infiniband/sw/rxe/rxe_mad.c
new file mode 100644
index 000000000000..7cf6d94e636e
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mad.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2026 zhenwei pi <zhenwei.pi@linux.dev>
+ */
+
+#include <rdma/ib_pma.h>
+#include "rxe.h"
+#include "rxe_hw_counters.h"
+
+static int rxe_get_pma_info(struct ib_mad *out)
+{
+	struct ib_class_port_info cpi = {};
+
+	cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+	memcpy((out->data + 40), &cpi, sizeof(cpi));
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_pma_counters(struct rxe_dev *rxe, struct ib_mad *out)
+{
+	struct ib_pma_portcounters *pma_cnt = (struct ib_pma_portcounters *)(out->data + 40);
+	s64 val;
+
+	/* IBA release 1.8, 16.1.3.5: During operation, instead of overflowing, they shall stop
+	 * at all ones.
+	 */
+	val = atomic64_read(&rxe->stats_counters[RXE_CNT_LINK_DOWNED]);
+	if (val > U8_MAX)
+		pma_cnt->link_downed_counter = U8_MAX;
+	else
+		pma_cnt->link_downed_counter = (u8)val;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_pma_counters_ext(struct rxe_dev *rxe, struct ib_mad *out)
+{
+	struct ib_pma_portcounters_ext *pma_cnt_ext =
+		(struct ib_pma_portcounters_ext *)(out->data + 40);
+	s64 val;
+
+	val = atomic64_read(&rxe->stats_counters[RXE_CNT_SENT_BYTES]);
+	pma_cnt_ext->port_xmit_data = cpu_to_be64(val >> 2);
+
+	val = atomic64_read(&rxe->stats_counters[RXE_CNT_RCVD_BYTES]);
+	pma_cnt_ext->port_rcv_data = cpu_to_be64(val >> 2);
+
+	val = atomic64_read(&rxe->stats_counters[RXE_CNT_SENT_PKTS]);
+	pma_cnt_ext->port_xmit_packets = cpu_to_be64(val);
+
+	val = atomic64_read(&rxe->stats_counters[RXE_CNT_RCVD_PKTS]);
+	pma_cnt_ext->port_rcv_packets = cpu_to_be64(val);
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_perf_mgmt(struct rxe_dev *rxe, const struct ib_mad *in, struct ib_mad *out)
+{
+	switch (in->mad_hdr.attr_id) {
+	case IB_PMA_CLASS_PORT_INFO:
+		return rxe_get_pma_info(out);
+
+	case IB_PMA_PORT_COUNTERS:
+		return rxe_get_pma_counters(rxe, out);
+
+	case IB_PMA_PORT_COUNTERS_EXT:
+		return rxe_get_pma_counters_ext(rxe, out);
+
+	default:
+		out->mad_hdr.status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+	}
+}
+
+int rxe_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad *in, struct ib_mad *out,
+		    size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+	struct rxe_dev *rxe = to_rdev(ibdev);
+	u8 mgmt_class = in->mad_hdr.mgmt_class;
+	u8 method = in->mad_hdr.method;
+
+	if (port_num != 1)
+		return IB_MAD_RESULT_FAILURE;
+
+	memset(out, 0, sizeof(*out));
+	switch (mgmt_class) {
+	case IB_MGMT_CLASS_PERF_MGMT:
+		if (method == IB_MGMT_METHOD_GET)
+			return rxe_get_perf_mgmt(rxe, in, out);
+		break;
+
+	default:
+		break;
+	}
+
+	out->mad_hdr.status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD);
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index eb17b6086d5e..8edd4dd1f031 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1496,6 +1496,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.post_recv = rxe_post_recv,
 	.post_send = rxe_post_send,
 	.post_srq_recv = rxe_post_srq_recv,
+	.process_mad = rxe_process_mad,
 	.query_ah = rxe_query_ah,
 	.query_device = rxe_query_device,
 	.query_pkey = rxe_query_pkey,
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v5 0/3] Support PERF MGMT for RXE
  2026-04-08  0:09 [PATCH v5 0/3] Support PERF MGMT for RXE zhenwei pi
                   ` (2 preceding siblings ...)
  2026-04-08  0:09 ` [PATCH v5 3/3] RDMA/rxe: support perf mgmt GET method zhenwei pi
@ 2026-04-09  5:26 ` Zhu Yanjun
  3 siblings, 0 replies; 9+ messages in thread
From: Zhu Yanjun @ 2026-04-09  5:26 UTC (permalink / raw)
  To: zhenwei pi, linux-kernel, linux-rdma, yanjun.zhu@linux.dev
  Cc: zyjzyj2000, jgg, leon

在 2026/4/7 17:09, zhenwei pi 写道:
> v5:
> - remove patch "RDMA/core: Fix memory free for GID table", it was
>    applied by Jason separately.
> - suggested by Yanjun, use 'skb_network_offset' to calculate the
>    length of received packets.
> 

I am not sure whether you would prefer to add a test case under 
tools/testing/selftests/rdma or in rdma-core to verify this feature.

If it is possible to include a test case in either selftests or 
rdma-core, it would be very helpful for the supporters to validate this 
patch set.

Just my two cents.

Zhu Yanjun

> v4:
> - drop rxe_ib_device_get_netdev and RXE_PORT, use 1 instead
> - avoid UAF to get skb length
> - remove one-line wrapper rxe_counter_get, use atomic64_read instead
> - fix memory free for GID table, this is a new patch in this series.
> 
> v3:
> - merge 'RDMA/rxe: use rxe_counter_get' into previous commit
> - zero *out* MAD memory
> - return success with error status rather than failure to avoid
>    uplayer hang
> 
> v2:
> - Fix overflow for PMA counter *link_downed_counter*
> - Use *rxe_counter_get* instead of *atomic64_read* for hw-counters
> 
> v1:
> Support PERF MGMT for RXE, add sent/received bytes for RXE counters,
> also improve coding style.
> 
> zhenwei pi (3):
>    RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT
>    RDMA/rxe: add SENT/RCVD bytes
>    RDMA/rxe: support perf mgmt GET method
> 
>   drivers/infiniband/sw/rxe/Makefile          |   1 +
>   drivers/infiniband/sw/rxe/rxe_hw_counters.c |   2 +
>   drivers/infiniband/sw/rxe/rxe_hw_counters.h |   2 +
>   drivers/infiniband/sw/rxe/rxe_loc.h         |   6 ++
>   drivers/infiniband/sw/rxe/rxe_mad.c         | 101 ++++++++++++++++++++
>   drivers/infiniband/sw/rxe/rxe_mcast.c       |   4 +-
>   drivers/infiniband/sw/rxe/rxe_net.c         |   9 +-
>   drivers/infiniband/sw/rxe/rxe_recv.c        |   2 +
>   drivers/infiniband/sw/rxe/rxe_verbs.c       |   5 +-
>   drivers/infiniband/sw/rxe/rxe_verbs.h       |  10 +-
>   10 files changed, 129 insertions(+), 13 deletions(-)
>   create mode 100644 drivers/infiniband/sw/rxe/rxe_mad.c
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes
  2026-04-08  0:09 ` [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes zhenwei pi
@ 2026-04-10 22:37   ` yanjun.zhu
  2026-04-11  1:45     ` Zhu Yanjun
  0 siblings, 1 reply; 9+ messages in thread
From: yanjun.zhu @ 2026-04-10 22:37 UTC (permalink / raw)
  To: zhenwei pi, linux-kernel, linux-rdma, Zhu Yanjun; +Cc: zyjzyj2000, jgg, leon

On 4/7/26 5:09 PM, zhenwei pi wrote:
> There is a lack of sent/received counter in bytes.
> 
> Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
> ---
>   drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 ++
>   drivers/infiniband/sw/rxe/rxe_hw_counters.h | 2 ++
>   drivers/infiniband/sw/rxe/rxe_net.c         | 2 ++
>   drivers/infiniband/sw/rxe/rxe_recv.c        | 2 ++
>   drivers/infiniband/sw/rxe/rxe_verbs.h       | 6 ++++++
>   5 files changed, 14 insertions(+)
> 
> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
> index 437917a7d8f2..17edaa9a9b9b 100644
> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
> @@ -22,6 +22,8 @@ static const struct rdma_stat_desc rxe_counter_descs[] = {
>   	[RXE_CNT_LINK_DOWNED].name         =  "link_downed",
>   	[RXE_CNT_RDMA_SEND].name           =  "rdma_sends",
>   	[RXE_CNT_RDMA_RECV].name           =  "rdma_recvs",
> +	[RXE_CNT_SENT_BYTES].name          =  "sent_bytes",
> +	[RXE_CNT_RCVD_BYTES].name          =  "rcvd_bytes",
>   };
>   
>   int rxe_ib_get_hw_stats(struct ib_device *ibdev,
> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
> index 051f9e1c3852..01b355103cbc 100644
> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
> @@ -26,6 +26,8 @@ enum rxe_counters {
>   	RXE_CNT_LINK_DOWNED,
>   	RXE_CNT_RDMA_SEND,
>   	RXE_CNT_RDMA_RECV,
> +	RXE_CNT_SENT_BYTES,
> +	RXE_CNT_RCVD_BYTES,
>   	RXE_NUM_OF_COUNTERS
>   };
>   
> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
> index 6621d01ac32d..86660031ffa2 100644
> --- a/drivers/infiniband/sw/rxe/rxe_net.c
> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
> @@ -503,6 +503,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>   	int err;
>   	int is_request = pkt->mask & RXE_REQ_MASK;
>   	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
> +	unsigned int skblen = skb->len;
>   	unsigned long flags;
>   
>   	spin_lock_irqsave(&qp->state_lock, flags);
> @@ -526,6 +527,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>   	}
>   
>   	rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
> +	rxe_counter_add(rxe, RXE_CNT_SENT_BYTES, skblen);
>   	goto done;
>   
>   drop:
> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
> index 5861e4244049..e7bab89e7d8d 100644
> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
> @@ -318,6 +318,7 @@ void rxe_rcv(struct sk_buff *skb)
>   	int err;
>   	struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>   	struct rxe_dev *rxe = pkt->rxe;
> +	unsigned int skblen = skb->len - skb_network_offset(skb);
>   
>   	if (unlikely(skb->len < RXE_BTH_BYTES))
>   		goto drop;
> @@ -341,6 +342,7 @@ void rxe_rcv(struct sk_buff *skb)
>   	if (unlikely(err))
>   		goto drop;
>   
> +	rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);
>   	rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
>   
>   	if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))

int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
                     struct sk_buff *skb)
{
         int err;
         int is_request = pkt->mask & RXE_REQ_MASK;
         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
         unsigned long flags;

skb->len is printed here, that is len1
...
         if (pkt->mask & RXE_LOOPBACK_MASK)
                 err = rxe_loopback(skb, pkt);
         else
                 err = rxe_send(skb, pkt);
...
}

In the following function

static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
{
...
         if (skb->protocol == htons(ETH_P_IP))
                 skb_pull(skb, sizeof(struct iphdr));
         else
                 skb_pull(skb, sizeof(struct ipv6hdr));

...
         /* remove udp header */
         skb_pull(skb, sizeof(struct udphdr));

print skb->len here, that is len2

         rxe_rcv(skb);

...
}

Does len1 equal to len2?

If not, the transmitted length appears to differ from the received 
length when using loopback.

I am not sure whether this is expected behavior.

The same observation also applies to the non-loopback case.

Zhu Yanjun

> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
> index e800545d1046..0f5ffd94643f 100644
> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
> @@ -455,6 +455,12 @@ static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index)
>   	atomic64_inc(&rxe->stats_counters[index]);
>   }
>   
> +static inline void rxe_counter_add(struct rxe_dev *rxe, enum rxe_counters index,
> +				   s64 val)
> +{
> +	atomic64_add(val, &rxe->stats_counters[index]);
> +}
> +
>   static inline struct rxe_dev *to_rdev(struct ib_device *dev)
>   {
>   	return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes
  2026-04-10 22:37   ` yanjun.zhu
@ 2026-04-11  1:45     ` Zhu Yanjun
  2026-04-11  7:24       ` zhenwei pi
  0 siblings, 1 reply; 9+ messages in thread
From: Zhu Yanjun @ 2026-04-11  1:45 UTC (permalink / raw)
  To: yanjun.zhu, zhenwei pi, linux-kernel, linux-rdma; +Cc: zyjzyj2000, jgg, leon


在 2026/4/10 15:37, yanjun.zhu 写道:
> On 4/7/26 5:09 PM, zhenwei pi wrote:
>> There is a lack of sent/received counter in bytes.
>>
>> Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
>> ---
>>   drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 ++
>>   drivers/infiniband/sw/rxe/rxe_hw_counters.h | 2 ++
>>   drivers/infiniband/sw/rxe/rxe_net.c         | 2 ++
>>   drivers/infiniband/sw/rxe/rxe_recv.c        | 2 ++
>>   drivers/infiniband/sw/rxe/rxe_verbs.h       | 6 ++++++
>>   5 files changed, 14 insertions(+)
>>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c 
>> b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>> index 437917a7d8f2..17edaa9a9b9b 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>> @@ -22,6 +22,8 @@ static const struct rdma_stat_desc 
>> rxe_counter_descs[] = {
>>       [RXE_CNT_LINK_DOWNED].name         =  "link_downed",
>>       [RXE_CNT_RDMA_SEND].name           =  "rdma_sends",
>>       [RXE_CNT_RDMA_RECV].name           =  "rdma_recvs",
>> +    [RXE_CNT_SENT_BYTES].name          =  "sent_bytes",
>> +    [RXE_CNT_RCVD_BYTES].name          =  "rcvd_bytes",
>>   };
>>     int rxe_ib_get_hw_stats(struct ib_device *ibdev,
>> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h 
>> b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>> index 051f9e1c3852..01b355103cbc 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>> @@ -26,6 +26,8 @@ enum rxe_counters {
>>       RXE_CNT_LINK_DOWNED,
>>       RXE_CNT_RDMA_SEND,
>>       RXE_CNT_RDMA_RECV,
>> +    RXE_CNT_SENT_BYTES,
>> +    RXE_CNT_RCVD_BYTES,
>>       RXE_NUM_OF_COUNTERS
>>   };
>>   diff --git a/drivers/infiniband/sw/rxe/rxe_net.c 
>> b/drivers/infiniband/sw/rxe/rxe_net.c
>> index 6621d01ac32d..86660031ffa2 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>> @@ -503,6 +503,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct 
>> rxe_pkt_info *pkt,
>>       int err;
>>       int is_request = pkt->mask & RXE_REQ_MASK;
>>       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>> +    unsigned int skblen = skb->len;
>>       unsigned long flags;
>>         spin_lock_irqsave(&qp->state_lock, flags);
>> @@ -526,6 +527,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct 
>> rxe_pkt_info *pkt,
>>       }
>>         rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
>> +    rxe_counter_add(rxe, RXE_CNT_SENT_BYTES, skblen);
>>       goto done;
>>     drop:
>> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c 
>> b/drivers/infiniband/sw/rxe/rxe_recv.c
>> index 5861e4244049..e7bab89e7d8d 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
>> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
>> @@ -318,6 +318,7 @@ void rxe_rcv(struct sk_buff *skb)
>>       int err;
>>       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>>       struct rxe_dev *rxe = pkt->rxe;
>> +    unsigned int skblen = skb->len - skb_network_offset(skb);
>>         if (unlikely(skb->len < RXE_BTH_BYTES))
>>           goto drop;
>> @@ -341,6 +342,7 @@ void rxe_rcv(struct sk_buff *skb)
>>       if (unlikely(err))
>>           goto drop;
>>   +    rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);
>>       rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
>>         if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
>
> int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>                     struct sk_buff *skb)
> {
>         int err;
>         int is_request = pkt->mask & RXE_REQ_MASK;
>         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>         unsigned long flags;
>
> skb->len is printed here, that is len1
> ...
>         if (pkt->mask & RXE_LOOPBACK_MASK)
>                 err = rxe_loopback(skb, pkt);
>         else
>                 err = rxe_send(skb, pkt);
> ...
> }
>
> In the following function
>
> static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
> {
> ...
>         if (skb->protocol == htons(ETH_P_IP))
>                 skb_pull(skb, sizeof(struct iphdr));
>         else
>                 skb_pull(skb, sizeof(struct ipv6hdr));
>
> ...
>         /* remove udp header */
>         skb_pull(skb, sizeof(struct udphdr));
>
> print skb->len here, that is len2
>
>         rxe_rcv(skb);
>
> ...
> }
>
> Does len1 equal to len2?

I have made tests. The difference between len1 and len2 is 28.

It should be the total of ipv4 header + udp header because I use ipv4 
address to make tests.

I am not sure if the bytes of recv data should equal to the bytes of 
xmit data.


Zhu Yanjun

>
> If not, the transmitted length appears to differ from the received 
> length when using loopback.
>
> I am not sure whether this is expected behavior.
>
> The same observation also applies to the non-loopback case.
>
> Zhu Yanjun
>
>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h 
>> b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> index e800545d1046..0f5ffd94643f 100644
>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>> @@ -455,6 +455,12 @@ static inline void rxe_counter_inc(struct 
>> rxe_dev *rxe, enum rxe_counters index)
>>       atomic64_inc(&rxe->stats_counters[index]);
>>   }
>>   +static inline void rxe_counter_add(struct rxe_dev *rxe, enum 
>> rxe_counters index,
>> +                   s64 val)
>> +{
>> +    atomic64_add(val, &rxe->stats_counters[index]);
>> +}
>> +
>>   static inline struct rxe_dev *to_rdev(struct ib_device *dev)
>>   {
>>       return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;

-- 
Best Regards,
Yanjun.Zhu


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes
  2026-04-11  1:45     ` Zhu Yanjun
@ 2026-04-11  7:24       ` zhenwei pi
  2026-04-11 18:57         ` Zhu Yanjun
  0 siblings, 1 reply; 9+ messages in thread
From: zhenwei pi @ 2026-04-11  7:24 UTC (permalink / raw)
  To: Zhu Yanjun, linux-kernel, linux-rdma; +Cc: zyjzyj2000, jgg, leon



On 4/11/26 09:45, Zhu Yanjun wrote:
> 
> 在 2026/4/10 15:37, yanjun.zhu 写道:
>> On 4/7/26 5:09 PM, zhenwei pi wrote:
>>> There is a lack of sent/received counter in bytes.
>>>
>>> Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
>>> ---
>>>   drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 ++
>>>   drivers/infiniband/sw/rxe/rxe_hw_counters.h | 2 ++
>>>   drivers/infiniband/sw/rxe/rxe_net.c         | 2 ++
>>>   drivers/infiniband/sw/rxe/rxe_recv.c        | 2 ++
>>>   drivers/infiniband/sw/rxe/rxe_verbs.h       | 6 ++++++
>>>   5 files changed, 14 insertions(+)
>>>
>>> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/ 
>>> infiniband/sw/rxe/rxe_hw_counters.c
>>> index 437917a7d8f2..17edaa9a9b9b 100644
>>> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>>> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>>> @@ -22,6 +22,8 @@ static const struct rdma_stat_desc 
>>> rxe_counter_descs[] = {
>>>       [RXE_CNT_LINK_DOWNED].name         =  "link_downed",
>>>       [RXE_CNT_RDMA_SEND].name           =  "rdma_sends",
>>>       [RXE_CNT_RDMA_RECV].name           =  "rdma_recvs",
>>> +    [RXE_CNT_SENT_BYTES].name          =  "sent_bytes",
>>> +    [RXE_CNT_RCVD_BYTES].name          =  "rcvd_bytes",
>>>   };
>>>     int rxe_ib_get_hw_stats(struct ib_device *ibdev,
>>> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/ 
>>> infiniband/sw/rxe/rxe_hw_counters.h
>>> index 051f9e1c3852..01b355103cbc 100644
>>> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>>> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>>> @@ -26,6 +26,8 @@ enum rxe_counters {
>>>       RXE_CNT_LINK_DOWNED,
>>>       RXE_CNT_RDMA_SEND,
>>>       RXE_CNT_RDMA_RECV,
>>> +    RXE_CNT_SENT_BYTES,
>>> +    RXE_CNT_RCVD_BYTES,
>>>       RXE_NUM_OF_COUNTERS
>>>   };
>>>   diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/ 
>>> infiniband/sw/rxe/rxe_net.c
>>> index 6621d01ac32d..86660031ffa2 100644
>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>>> @@ -503,6 +503,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct 
>>> rxe_pkt_info *pkt,
>>>       int err;
>>>       int is_request = pkt->mask & RXE_REQ_MASK;
>>>       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>>> +    unsigned int skblen = skb->len;
>>>       unsigned long flags;
>>>         spin_lock_irqsave(&qp->state_lock, flags);
>>> @@ -526,6 +527,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct 
>>> rxe_pkt_info *pkt,
>>>       }
>>>         rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
>>> +    rxe_counter_add(rxe, RXE_CNT_SENT_BYTES, skblen);
>>>       goto done;
>>>     drop:
>>> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/ 
>>> infiniband/sw/rxe/rxe_recv.c
>>> index 5861e4244049..e7bab89e7d8d 100644
>>> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
>>> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
>>> @@ -318,6 +318,7 @@ void rxe_rcv(struct sk_buff *skb)
>>>       int err;
>>>       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>>>       struct rxe_dev *rxe = pkt->rxe;
>>> +    unsigned int skblen = skb->len - skb_network_offset(skb);
>>>         if (unlikely(skb->len < RXE_BTH_BYTES))
>>>           goto drop;
>>> @@ -341,6 +342,7 @@ void rxe_rcv(struct sk_buff *skb)
>>>       if (unlikely(err))
>>>           goto drop;
>>>   +    rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);
>>>       rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
>>>         if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
>>
>> int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>>                     struct sk_buff *skb)
>> {
>>         int err;
>>         int is_request = pkt->mask & RXE_REQ_MASK;
>>         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>>         unsigned long flags;
>>
>> skb->len is printed here, that is len1
>> ...
>>         if (pkt->mask & RXE_LOOPBACK_MASK)
>>                 err = rxe_loopback(skb, pkt);
>>         else
>>                 err = rxe_send(skb, pkt);
>> ...
>> }
>>
>> In the following function
>>
>> static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
>> {
>> ...
>>         if (skb->protocol == htons(ETH_P_IP))
>>                 skb_pull(skb, sizeof(struct iphdr));
>>         else
>>                 skb_pull(skb, sizeof(struct ipv6hdr));
>>
>> ...
>>         /* remove udp header */
>>         skb_pull(skb, sizeof(struct udphdr));
>>
>> print skb->len here, that is len2
>>
>>         rxe_rcv(skb);
>>
>> ...
>> }
>>
>> Does len1 equal to len2?
> 
> I have made tests. The difference between len1 and len2 is 28.
> 
> It should be the total of ipv4 header + udp header because I use ipv4 
> address to make tests.
> 
> I am not sure if the bytes of recv data should equal to the bytes of 
> xmit data.
> 
> 
> Zhu Yanjun
> 

Yes, I got the same result of 28 bytes difference during v4 version. So 
I try to use such logic to recalculate the length:

+	unsigned int skblen = skb->len + sizeof(struct udphdr);
...
+	if (skb->protocol == htons(ETH_P_IP))
+		skblen += sizeof(struct iphdr);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		skblen += sizeof(struct ipv6hdr);
+	rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);

then use 'unsigned int skblen = skb->len - skb_network_offset(skb);' in v5.

>>
>> If not, the transmitted length appears to differ from the received 
>> length when using loopback.
>>
>> I am not sure whether this is expected behavior.
>>
>> The same observation also applies to the non-loopback case.
>>
>> Zhu Yanjun
>>
>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/ 
>>> infiniband/sw/rxe/rxe_verbs.h
>>> index e800545d1046..0f5ffd94643f 100644
>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>> @@ -455,6 +455,12 @@ static inline void rxe_counter_inc(struct 
>>> rxe_dev *rxe, enum rxe_counters index)
>>>       atomic64_inc(&rxe->stats_counters[index]);
>>>   }
>>>   +static inline void rxe_counter_add(struct rxe_dev *rxe, enum 
>>> rxe_counters index,
>>> +                   s64 val)
>>> +{
>>> +    atomic64_add(val, &rxe->stats_counters[index]);
>>> +}
>>> +
>>>   static inline struct rxe_dev *to_rdev(struct ib_device *dev)
>>>   {
>>>       return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes
  2026-04-11  7:24       ` zhenwei pi
@ 2026-04-11 18:57         ` Zhu Yanjun
  0 siblings, 0 replies; 9+ messages in thread
From: Zhu Yanjun @ 2026-04-11 18:57 UTC (permalink / raw)
  To: zhenwei pi, linux-kernel, linux-rdma, yanjun.zhu@linux.dev
  Cc: zyjzyj2000, jgg, leon

在 2026/4/11 0:24, zhenwei pi 写道:
> 
> 
> On 4/11/26 09:45, Zhu Yanjun wrote:
>>
>> 在 2026/4/10 15:37, yanjun.zhu 写道:
>>> On 4/7/26 5:09 PM, zhenwei pi wrote:
>>>> There is a lack of sent/received counter in bytes.
>>>>
>>>> Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
>>>> ---
>>>>   drivers/infiniband/sw/rxe/rxe_hw_counters.c | 2 ++
>>>>   drivers/infiniband/sw/rxe/rxe_hw_counters.h | 2 ++
>>>>   drivers/infiniband/sw/rxe/rxe_net.c         | 2 ++
>>>>   drivers/infiniband/sw/rxe/rxe_recv.c        | 2 ++
>>>>   drivers/infiniband/sw/rxe/rxe_verbs.h       | 6 ++++++
>>>>   5 files changed, 14 insertions(+)
>>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/ 
>>>> infiniband/sw/rxe/rxe_hw_counters.c
>>>> index 437917a7d8f2..17edaa9a9b9b 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
>>>> @@ -22,6 +22,8 @@ static const struct rdma_stat_desc 
>>>> rxe_counter_descs[] = {
>>>>       [RXE_CNT_LINK_DOWNED].name         =  "link_downed",
>>>>       [RXE_CNT_RDMA_SEND].name           =  "rdma_sends",
>>>>       [RXE_CNT_RDMA_RECV].name           =  "rdma_recvs",
>>>> +    [RXE_CNT_SENT_BYTES].name          =  "sent_bytes",
>>>> +    [RXE_CNT_RCVD_BYTES].name          =  "rcvd_bytes",
>>>>   };
>>>>     int rxe_ib_get_hw_stats(struct ib_device *ibdev,
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/ 
>>>> infiniband/sw/rxe/rxe_hw_counters.h
>>>> index 051f9e1c3852..01b355103cbc 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
>>>> @@ -26,6 +26,8 @@ enum rxe_counters {
>>>>       RXE_CNT_LINK_DOWNED,
>>>>       RXE_CNT_RDMA_SEND,
>>>>       RXE_CNT_RDMA_RECV,
>>>> +    RXE_CNT_SENT_BYTES,
>>>> +    RXE_CNT_RCVD_BYTES,
>>>>       RXE_NUM_OF_COUNTERS
>>>>   };
>>>>   diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/ 
>>>> infiniband/sw/rxe/rxe_net.c
>>>> index 6621d01ac32d..86660031ffa2 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_net.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_net.c
>>>> @@ -503,6 +503,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct 
>>>> rxe_pkt_info *pkt,
>>>>       int err;
>>>>       int is_request = pkt->mask & RXE_REQ_MASK;
>>>>       struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>>>> +    unsigned int skblen = skb->len;
>>>>       unsigned long flags;
>>>>         spin_lock_irqsave(&qp->state_lock, flags);
>>>> @@ -526,6 +527,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct 
>>>> rxe_pkt_info *pkt,
>>>>       }
>>>>         rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
>>>> +    rxe_counter_add(rxe, RXE_CNT_SENT_BYTES, skblen);
>>>>       goto done;
>>>>     drop:
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/ 
>>>> infiniband/sw/rxe/rxe_recv.c
>>>> index 5861e4244049..e7bab89e7d8d 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_recv.c
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_recv.c
>>>> @@ -318,6 +318,7 @@ void rxe_rcv(struct sk_buff *skb)
>>>>       int err;
>>>>       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
>>>>       struct rxe_dev *rxe = pkt->rxe;
>>>> +    unsigned int skblen = skb->len - skb_network_offset(skb);
>>>>         if (unlikely(skb->len < RXE_BTH_BYTES))
>>>>           goto drop;
>>>> @@ -341,6 +342,7 @@ void rxe_rcv(struct sk_buff *skb)
>>>>       if (unlikely(err))
>>>>           goto drop;
>>>>   +    rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);
>>>>       rxe_counter_inc(rxe, RXE_CNT_RCVD_PKTS);
>>>>         if (unlikely(bth_qpn(pkt) == IB_MULTICAST_QPN))
>>>
>>> int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
>>>                     struct sk_buff *skb)
>>> {
>>>         int err;
>>>         int is_request = pkt->mask & RXE_REQ_MASK;
>>>         struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
>>>         unsigned long flags;
>>>
>>> skb->len is printed here, that is len1
>>> ...
>>>         if (pkt->mask & RXE_LOOPBACK_MASK)
>>>                 err = rxe_loopback(skb, pkt);
>>>         else
>>>                 err = rxe_send(skb, pkt);
>>> ...
>>> }
>>>
>>> In the following function
>>>
>>> static int rxe_loopback(struct sk_buff *skb, struct rxe_pkt_info *pkt)
>>> {
>>> ...
>>>         if (skb->protocol == htons(ETH_P_IP))
>>>                 skb_pull(skb, sizeof(struct iphdr));
>>>         else
>>>                 skb_pull(skb, sizeof(struct ipv6hdr));
>>>
>>> ...
>>>         /* remove udp header */
>>>         skb_pull(skb, sizeof(struct udphdr));
>>>
>>> print skb->len here, that is len2
>>>
>>>         rxe_rcv(skb);
>>>
>>> ...
>>> }
>>>
>>> Does len1 equal to len2?
>>
>> I have made tests. The difference between len1 and len2 is 28.
>>
>> It should be the total of ipv4 header + udp header because I use ipv4 
>> address to make tests.
>>
>> I am not sure if the bytes of recv data should equal to the bytes of 
>> xmit data.
>>
>>
>> Zhu Yanjun
>>
> 
> Yes, I got the same result of 28 bytes difference during v4 version. So 
> I try to use such logic to recalculate the length:
> 
> +    unsigned int skblen = skb->len + sizeof(struct udphdr);
> ...
> +    if (skb->protocol == htons(ETH_P_IP))
> +        skblen += sizeof(struct iphdr);
> +    else if (skb->protocol == htons(ETH_P_IPV6))
> +        skblen += sizeof(struct ipv6hdr);
> +    rxe_counter_add(rxe, RXE_CNT_RCVD_BYTES, skblen);
> 
> then use 'unsigned int skblen = skb->len - skb_network_offset(skb);' in v5.

Thanks. Not necessary to use this 'unsigned int skblen = skb->len - 
skb_network_offset(skb);'.

The above udphdr and iphdr/ip6hdr should be OK.

Thanks a lot.
Zhu Yanjun

> 
>>>
>>> If not, the transmitted length appears to differ from the received 
>>> length when using loopback.
>>>
>>> I am not sure whether this is expected behavior.
>>>
>>> The same observation also applies to the non-loopback case.
>>>
>>> Zhu Yanjun
>>>
>>>> diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/ 
>>>> infiniband/sw/rxe/rxe_verbs.h
>>>> index e800545d1046..0f5ffd94643f 100644
>>>> --- a/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
>>>> @@ -455,6 +455,12 @@ static inline void rxe_counter_inc(struct 
>>>> rxe_dev *rxe, enum rxe_counters index)
>>>>       atomic64_inc(&rxe->stats_counters[index]);
>>>>   }
>>>>   +static inline void rxe_counter_add(struct rxe_dev *rxe, enum 
>>>> rxe_counters index,
>>>> +                   s64 val)
>>>> +{
>>>> +    atomic64_add(val, &rxe->stats_counters[index]);
>>>> +}
>>>> +
>>>>   static inline struct rxe_dev *to_rdev(struct ib_device *dev)
>>>>   {
>>>>       return dev ? container_of(dev, struct rxe_dev, ib_dev) : NULL;
>>
> 


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2026-04-11 18:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-08  0:09 [PATCH v5 0/3] Support PERF MGMT for RXE zhenwei pi
2026-04-08  0:09 ` [PATCH v5 1/3] RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT zhenwei pi
2026-04-08  0:09 ` [PATCH v5 2/3] RDMA/rxe: add SENT/RCVD bytes zhenwei pi
2026-04-10 22:37   ` yanjun.zhu
2026-04-11  1:45     ` Zhu Yanjun
2026-04-11  7:24       ` zhenwei pi
2026-04-11 18:57         ` Zhu Yanjun
2026-04-08  0:09 ` [PATCH v5 3/3] RDMA/rxe: support perf mgmt GET method zhenwei pi
2026-04-09  5:26 ` [PATCH v5 0/3] Support PERF MGMT for RXE Zhu Yanjun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.