From: zhenwei pi <zhenwei.pi@linux.dev>
To: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org
Cc: zyjzyj2000@gmail.com, jgg@ziepe.ca, leon@kernel.org,
zhenwei pi <zhenwei.pi@linux.dev>
Subject: [PATCH v4 4/4] RDMA/rxe: support perf mgmt GET method
Date: Mon, 6 Apr 2026 21:28:29 +0800 [thread overview]
Message-ID: <20260406132830.435381-5-zhenwei.pi@linux.dev> (raw)
In-Reply-To: <20260406132830.435381-1-zhenwei.pi@linux.dev>
In RXE, hardware counters are already supported, but not in a
standardized manner. For instance, user-space monitoring tools like
atop only read from the *counters* directory. Therefore, it is
necessary to add perf management support to RXE.
Also use rxe_counter_get instead of raw atomic64_read in hw-counters.
Signed-off-by: zhenwei pi <zhenwei.pi@linux.dev>
---
drivers/infiniband/sw/rxe/Makefile | 1 +
drivers/infiniband/sw/rxe/rxe_loc.h | 6 ++
drivers/infiniband/sw/rxe/rxe_mad.c | 101 ++++++++++++++++++++++++++
drivers/infiniband/sw/rxe/rxe_verbs.c | 1 +
4 files changed, 109 insertions(+)
create mode 100644 drivers/infiniband/sw/rxe/rxe_mad.c
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 3977f4f13258..e097c1ca1874 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -23,6 +23,7 @@ rdma_rxe-y := \
rxe_task.o \
rxe_net.o \
rxe_hw_counters.o \
+ rxe_mad.o \
rxe_ns.o
rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index e095c12699cb..64d636bf80fd 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -242,4 +242,10 @@ static inline int rxe_ib_advise_mr(struct ib_pd *pd,
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+/* rxe-mad.c */
+int rxe_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+
#endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mad.c b/drivers/infiniband/sw/rxe/rxe_mad.c
new file mode 100644
index 000000000000..7cf6d94e636e
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_mad.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2026 zhenwei pi <zhenwei.pi@linux.dev>
+ */
+
+#include <rdma/ib_pma.h>
+#include "rxe.h"
+#include "rxe_hw_counters.h"
+
+static int rxe_get_pma_info(struct ib_mad *out)
+{
+ struct ib_class_port_info cpi = {};
+
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out->data + 40), &cpi, sizeof(cpi));
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_pma_counters(struct rxe_dev *rxe, struct ib_mad *out)
+{
+ struct ib_pma_portcounters *pma_cnt = (struct ib_pma_portcounters *)(out->data + 40);
+ s64 val;
+
+ /* IBA release 1.8, 16.1.3.5: During operation, instead of overflowing, they shall stop
+ * at all ones.
+ */
+ val = atomic64_read(&rxe->stats_counters[RXE_CNT_LINK_DOWNED]);
+ if (val > U8_MAX)
+ pma_cnt->link_downed_counter = U8_MAX;
+ else
+ pma_cnt->link_downed_counter = (u8)val;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_pma_counters_ext(struct rxe_dev *rxe, struct ib_mad *out)
+{
+ struct ib_pma_portcounters_ext *pma_cnt_ext =
+ (struct ib_pma_portcounters_ext *)(out->data + 40);
+ s64 val;
+
+ val = atomic64_read(&rxe->stats_counters[RXE_CNT_SENT_BYTES]);
+ pma_cnt_ext->port_xmit_data = cpu_to_be64(val >> 2);
+
+ val = atomic64_read(&rxe->stats_counters[RXE_CNT_RCVD_BYTES]);
+ pma_cnt_ext->port_rcv_data = cpu_to_be64(val >> 2);
+
+ val = atomic64_read(&rxe->stats_counters[RXE_CNT_SENT_PKTS]);
+ pma_cnt_ext->port_xmit_packets = cpu_to_be64(val);
+
+ val = atomic64_read(&rxe->stats_counters[RXE_CNT_RCVD_PKTS]);
+ pma_cnt_ext->port_rcv_packets = cpu_to_be64(val);
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+static int rxe_get_perf_mgmt(struct rxe_dev *rxe, const struct ib_mad *in, struct ib_mad *out)
+{
+ switch (in->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ return rxe_get_pma_info(out);
+
+ case IB_PMA_PORT_COUNTERS:
+ return rxe_get_pma_counters(rxe, out);
+
+ case IB_PMA_PORT_COUNTERS_EXT:
+ return rxe_get_pma_counters_ext(rxe, out);
+
+ default:
+ out->mad_hdr.status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ }
+}
+
+int rxe_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+ struct rxe_dev *rxe = to_rdev(ibdev);
+ u8 mgmt_class = in->mad_hdr.mgmt_class;
+ u8 method = in->mad_hdr.method;
+
+ if (port_num != 1)
+ return IB_MAD_RESULT_FAILURE;
+
+ memset(out, 0, sizeof(*out));
+ switch (mgmt_class) {
+ case IB_MGMT_CLASS_PERF_MGMT:
+ if (method == IB_MGMT_METHOD_GET)
+ return rxe_get_perf_mgmt(rxe, in, out);
+ break;
+
+ default:
+ break;
+ }
+
+ out->mad_hdr.status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD);
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index d3b2d610ca37..1ef5cddf620a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1505,6 +1505,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.post_recv = rxe_post_recv,
.post_send = rxe_post_send,
.post_srq_recv = rxe_post_srq_recv,
+ .process_mad = rxe_process_mad,
.query_ah = rxe_query_ah,
.query_device = rxe_query_device,
.query_pkey = rxe_query_pkey,
--
2.43.0
prev parent reply other threads:[~2026-04-06 13:29 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-06 13:28 [PATCH v4 0/4] Support PERF MGMT for RXE zhenwei pi
2026-04-06 13:28 ` [PATCH v4 1/4] RDMA/core: Fix memory free for GID table zhenwei pi
2026-04-07 14:51 ` Jason Gunthorpe
2026-04-06 13:28 ` [PATCH v4 2/4] RDMA/rxe: remove rxe_ib_device_get_netdev() and RXE_PORT zhenwei pi
2026-04-06 13:28 ` [PATCH v4 3/4] RDMA/rxe: add SENT/RCVD bytes zhenwei pi
2026-04-06 14:55 ` Zhu Yanjun
2026-04-07 0:58 ` zhenwei pi
2026-04-06 13:28 ` zhenwei pi [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260406132830.435381-5-zhenwei.pi@linux.dev \
--to=zhenwei.pi@linux.dev \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=zyjzyj2000@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.