* [PATCH for-next v6 1/5] RDMA/core: Add Completion Counters support
2026-06-04 11:46 [PATCH for-next v6 0/5] Introduce Completion Counters Michael Margolin
@ 2026-06-04 11:46 ` Michael Margolin
2026-06-04 11:46 ` [PATCH for-next v6 2/5] RDMA/core: Prevent destroying in-use completion counters Michael Margolin
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Michael Margolin @ 2026-06-04 11:46 UTC (permalink / raw)
To: jgg, leon, linux-rdma; +Cc: sleybo, matua, gal.pressman, Yonatan Nachum
Add core infrastructure for Completion Counters, a light-weight
alternative to polling CQ for tracking operation completions.
Define the UVERBS_OBJECT_COMP_CNTR ioctl object with create, destroy,
modify and read methods for both success and error counters. Add a QP
attach method on the QP object to associate a completion counter with a
queue pair.
Add ib_comp_cntr struct, ib_comp_cntr_attach_attr, device ops, and
DECLARE_RDMA_OBJ_SIZE for driver object allocation.
Only userspace Completion Counters are supported at this stage.
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
---
drivers/infiniband/core/Makefile | 1 +
drivers/infiniband/core/device.c | 6 +
drivers/infiniband/core/rdma_core.h | 1 +
drivers/infiniband/core/uverbs_cmd.c | 1 +
.../core/uverbs_std_types_comp_cntr.c | 173 ++++++++++++++++++
drivers/infiniband/core/uverbs_std_types_qp.c | 48 ++++-
drivers/infiniband/core/uverbs_uapi.c | 1 +
include/rdma/ib_verbs.h | 40 ++++
include/uapi/rdma/ib_user_ioctl_cmds.h | 38 ++++
include/uapi/rdma/ib_user_ioctl_verbs.h | 19 ++
include/uapi/rdma/ib_user_verbs.h | 2 +-
11 files changed, 328 insertions(+), 2 deletions(-)
create mode 100644 drivers/infiniband/core/uverbs_std_types_comp_cntr.c
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ab7a2197bc86..47ef6b0afd29 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -38,6 +38,7 @@ ib_umad-y := user_mad.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types.o uverbs_ioctl.o \
uverbs_std_types_cq.o \
+ uverbs_std_types_comp_cntr.o \
uverbs_std_types_dmabuf.o \
uverbs_std_types_dmah.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 21ada0fe9059..75ab0069eeac 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2743,6 +2743,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_ah);
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
+ SET_DEVICE_OP(dev_ops, create_comp_cntr);
SET_DEVICE_OP(dev_ops, create_user_cq);
SET_DEVICE_OP(dev_ops, create_flow);
SET_DEVICE_OP(dev_ops, create_qp);
@@ -2763,6 +2764,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, destroy_ah);
SET_DEVICE_OP(dev_ops, destroy_counters);
SET_DEVICE_OP(dev_ops, destroy_cq);
+ SET_DEVICE_OP(dev_ops, destroy_comp_cntr);
SET_DEVICE_OP(dev_ops, destroy_flow);
SET_DEVICE_OP(dev_ops, destroy_flow_action);
SET_DEVICE_OP(dev_ops, destroy_qp);
@@ -2814,6 +2816,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
+ SET_DEVICE_OP(dev_ops, qp_attach_comp_cntr);
SET_DEVICE_OP(dev_ops, modify_srq);
SET_DEVICE_OP(dev_ops, modify_wq);
SET_DEVICE_OP(dev_ops, peek_cq);
@@ -2837,12 +2840,14 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, query_ucontext);
SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
SET_DEVICE_OP(dev_ops, read_counters);
+ SET_DEVICE_OP(dev_ops, read_comp_cntr);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
SET_DEVICE_OP(dev_ops, resize_user_cq);
+ SET_DEVICE_OP(dev_ops, modify_comp_cntr);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
@@ -2851,6 +2856,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_comp_cntr);
SET_OBJ_SIZE(dev_ops, ib_dmah);
SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index b626d3d24d08..64ef97165fd9 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -152,6 +152,7 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
extern const struct uapi_definition uverbs_def_obj_async_fd[];
extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_comp_cntr[];
extern const struct uapi_definition uverbs_def_obj_cq[];
extern const struct uapi_definition uverbs_def_obj_device[];
extern const struct uapi_definition uverbs_def_obj_dm[];
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 10bd7cafd976..e7cd91d522e7 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3603,6 +3603,7 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
resp.max_dm_size = attr.max_dm_size;
+ resp.max_comp_cntr = attr.max_comp_cntr;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
return uverbs_response(attrs, &resp, sizeof(resp));
diff --git a/drivers/infiniband/core/uverbs_std_types_comp_cntr.c b/drivers/infiniband/core/uverbs_std_types_comp_cntr.c
new file mode 100644
index 000000000000..91ad54b270cf
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_comp_cntr.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_comp_cntr(struct ib_uobject *uobject, enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uobject->object;
+ int ret;
+
+ ret = cc->device->ops.destroy_comp_cntr(cc);
+ if (ret)
+ return ret;
+
+ kfree(cc);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_CREATE)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE);
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_comp_cntr *cc;
+ int ret;
+
+ if (!ib_dev->ops.create_comp_cntr ||
+ !ib_dev->ops.destroy_comp_cntr ||
+ !ib_dev->ops.qp_attach_comp_cntr)
+ return -EOPNOTSUPP;
+
+ cc = rdma_zalloc_drv_obj(ib_dev, ib_comp_cntr);
+ if (!cc)
+ return -ENOMEM;
+
+ cc->device = ib_dev;
+ cc->uobject = uobj;
+
+ ret = ib_dev->ops.create_comp_cntr(cc, attrs);
+ if (ret)
+ goto err_free;
+
+ uobj->object = cc;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_COMP_CNTR_RESP_COUNT_MAX_VALUE,
+ &cc->comp_count_max_value, sizeof(cc->comp_count_max_value));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_COMP_CNTR_RESP_ERR_COUNT_MAX_VALUE,
+ &cc->err_count_max_value, sizeof(cc->err_count_max_value));
+ return ret;
+
+err_free:
+ kfree(cc);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_MODIFY)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(attrs, UVERBS_ATTR_MODIFY_COMP_CNTR_HANDLE);
+ enum ib_comp_cntr_modify_op op;
+ enum ib_comp_cntr_entry entry;
+ u64 value;
+ int ret;
+
+ if (!cc->device->ops.modify_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&entry, attrs, UVERBS_ATTR_MODIFY_COMP_CNTR_ENTRY);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&op, attrs, UVERBS_ATTR_MODIFY_COMP_CNTR_OP);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&value, attrs, UVERBS_ATTR_MODIFY_COMP_CNTR_VALUE);
+ if (ret)
+ return ret;
+
+ return cc->device->ops.modify_comp_cntr(cc, entry, op, value);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_READ)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COMP_CNTR_HANDLE);
+ enum ib_comp_cntr_entry entry;
+ u64 value;
+ int ret;
+
+ if (!cc->device->ops.read_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&entry, attrs, UVERBS_ATTR_READ_COMP_CNTR_ENTRY);
+ if (ret)
+ return ret;
+
+ ret = cc->device->ops.read_comp_cntr(cc, entry, &value);
+ if (ret)
+ return ret;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COMP_CNTR_RESP_VALUE, &value, sizeof(value));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_COMP_CNTR_RESP_COUNT_MAX_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_COMP_CNTR_RESP_ERR_COUNT_MAX_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_COMP_CNTR_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_MODIFY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_MODIFY_COMP_CNTR_ENTRY,
+ enum ib_uverbs_comp_cntr_entry,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_MODIFY_COMP_CNTR_OP,
+ enum ib_uverbs_comp_cntr_modify_op,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_MODIFY_COMP_CNTR_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_READ,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_READ_COMP_CNTR_ENTRY,
+ enum ib_uverbs_comp_cntr_entry,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COMP_CNTR_RESP_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_comp_cntr),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_MODIFY),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_READ));
+
+const struct uapi_definition uverbs_def_obj_comp_cntr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CNTR,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_comp_cntr)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
index e44974abc6b5..9962155f905a 100644
--- a/drivers/infiniband/core/uverbs_std_types_qp.c
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -373,11 +373,57 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp),
UA_MANDATORY));
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_ATTACH_COMP_CNTR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *qp_uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_QP_ATTACH_COMP_CNTR_HANDLE);
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_QP_ATTACH_COMP_CNTR_CNTR_HANDLE);
+ struct ib_qp_attach_comp_cntr_attr attr = {};
+ struct ib_qp *qp = qp_uobj->object;
+ int ret;
+
+ if (!cc->device->ops.qp_attach_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_flags32(&attr.op_mask, attrs,
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_OP_MASK,
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_SEND |
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RECV |
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RDMA_READ |
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_READ |
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RDMA_WRITE |
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_WRITE);
+ if (ret)
+ return ret;
+
+ if (!attr.op_mask)
+ return -EINVAL;
+
+ return qp->device->ops.qp_attach_comp_cntr(qp, cc, &attr);
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_ATTACH_COMP_CNTR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QP_ATTACH_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QP_ATTACH_COMP_CNTR_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QP_ATTACH_COMP_CNTR_OP_MASK,
+ enum ib_uverbs_qp_attach_comp_cntr_op,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_QP,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp),
&UVERBS_METHOD(UVERBS_METHOD_QP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY));
+ &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_ATTACH_COMP_CNTR));
const struct uapi_definition uverbs_def_obj_qp[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 4e2e556c8119..d150099b99d2 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -628,6 +628,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
static const struct uapi_definition uverbs_core_api[] = {
UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_comp_cntr),
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
UAPI_DEF_CHAIN(uverbs_def_obj_device),
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0daa5089d539..4d3441bf7328 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -454,6 +454,7 @@ struct ib_device_attr {
u64 max_dm_size;
/* Max entries for sgl for optimized performance per READ */
u32 max_sgl_rd;
+ u32 max_comp_cntr;
};
enum ib_mtu {
@@ -1746,6 +1747,36 @@ struct ib_cq {
struct rdma_restrack_entry res;
};
+struct ib_comp_cntr {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ u64 comp_count_max_value;
+ u64 err_count_max_value;
+};
+
+enum ib_comp_cntr_entry {
+ IB_COMP_CNTR_ENTRY_COMP = IB_UVERBS_COMP_CNTR_ENTRY_COMP,
+ IB_COMP_CNTR_ENTRY_ERR = IB_UVERBS_COMP_CNTR_ENTRY_ERR,
+};
+
+enum ib_comp_cntr_modify_op {
+ IB_COMP_CNTR_MODIFY_OP_SET = IB_UVERBS_COMP_CNTR_MODIFY_OP_SET,
+ IB_COMP_CNTR_MODIFY_OP_INC = IB_UVERBS_COMP_CNTR_MODIFY_OP_INC,
+};
+
+enum ib_qp_attach_comp_cntr_op {
+ IB_QP_ATTACH_COMP_CNTR_OP_SEND = IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_SEND,
+ IB_QP_ATTACH_COMP_CNTR_OP_RECV = IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RECV,
+ IB_QP_ATTACH_COMP_CNTR_OP_RDMA_READ = IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RDMA_READ,
+ IB_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_READ = IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_READ,
+ IB_QP_ATTACH_COMP_CNTR_OP_RDMA_WRITE = IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RDMA_WRITE,
+ IB_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_WRITE = IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_WRITE,
+};
+
+struct ib_qp_attach_comp_cntr_attr {
+ u32 op_mask;
+};
+
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
@@ -2624,6 +2655,8 @@ struct ib_device_ops {
struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
+ int (*qp_attach_comp_cntr)(struct ib_qp *qp, struct ib_comp_cntr *cc,
+ struct ib_qp_attach_comp_cntr_attr *attr);
int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
@@ -2645,6 +2678,12 @@ struct ib_device_ops {
* post_destroy_cq - Free all kernel resources
*/
void (*post_destroy_cq)(struct ib_cq *cq);
+ int (*create_comp_cntr)(struct ib_comp_cntr *cc,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_comp_cntr)(struct ib_comp_cntr *cc);
+ int (*modify_comp_cntr)(struct ib_comp_cntr *cc, enum ib_comp_cntr_entry entry,
+ enum ib_comp_cntr_modify_op op, u64 value);
+ int (*read_comp_cntr)(struct ib_comp_cntr *cc, enum ib_comp_cntr_entry entry, u64 *value);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
@@ -2878,6 +2917,7 @@ struct ib_device_ops {
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_comp_cntr);
DECLARE_RDMA_OBJ_SIZE(ib_dmah);
DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 839835bd4b23..1fd537ebb69e 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -57,6 +57,7 @@ enum uverbs_default_objects {
UVERBS_OBJECT_ASYNC_EVENT,
UVERBS_OBJECT_DMAH,
UVERBS_OBJECT_DMABUF,
+ UVERBS_OBJECT_COMP_CNTR,
};
enum {
@@ -169,9 +170,16 @@ enum uverbs_attrs_destroy_qp_cmd_attr_ids {
UVERBS_ATTR_DESTROY_QP_RESP,
};
+enum uverbs_attrs_qp_attach_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_CNTR_HANDLE,
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_OP_MASK,
+};
+
enum uverbs_methods_qp {
UVERBS_METHOD_QP_CREATE,
UVERBS_METHOD_QP_DESTROY,
+ UVERBS_METHOD_QP_ATTACH_COMP_CNTR,
};
enum uverbs_attrs_create_srq_cmd_attr_ids {
@@ -438,4 +446,34 @@ enum uverbs_attrs_query_gid_entry_cmd_attr_ids {
UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
};
+enum uverbs_methods_comp_cntr {
+ UVERBS_METHOD_COMP_CNTR_CREATE,
+ UVERBS_METHOD_COMP_CNTR_DESTROY,
+ UVERBS_METHOD_COMP_CNTR_MODIFY,
+ UVERBS_METHOD_COMP_CNTR_READ,
+};
+
+enum uverbs_attrs_create_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_CREATE_COMP_CNTR_RESP_COUNT_MAX_VALUE,
+ UVERBS_ATTR_CREATE_COMP_CNTR_RESP_ERR_COUNT_MAX_VALUE,
+};
+
+enum uverbs_attrs_destroy_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_DESTROY_COMP_CNTR_HANDLE,
+};
+
+enum uverbs_attrs_modify_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_MODIFY_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_MODIFY_COMP_CNTR_ENTRY,
+ UVERBS_ATTR_MODIFY_COMP_CNTR_OP,
+ UVERBS_ATTR_MODIFY_COMP_CNTR_VALUE,
+};
+
+enum uverbs_attrs_read_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_READ_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_READ_COMP_CNTR_ENTRY,
+ UVERBS_ATTR_READ_COMP_CNTR_RESP_VALUE,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 51030c27d479..21f86cc7bb1f 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -300,4 +300,23 @@ struct ib_uverbs_buffer_desc {
__aligned_u64 length;
};
+enum ib_uverbs_comp_cntr_entry {
+ IB_UVERBS_COMP_CNTR_ENTRY_COMP,
+ IB_UVERBS_COMP_CNTR_ENTRY_ERR,
+};
+
+enum ib_uverbs_comp_cntr_modify_op {
+ IB_UVERBS_COMP_CNTR_MODIFY_OP_SET,
+ IB_UVERBS_COMP_CNTR_MODIFY_OP_INC,
+};
+
+enum ib_uverbs_qp_attach_comp_cntr_op {
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_SEND = 1 << 0,
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RECV = 1 << 1,
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RDMA_READ = 1 << 2,
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_READ = 1 << 3,
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_RDMA_WRITE = 1 << 4,
+ IB_UVERBS_QP_ATTACH_COMP_CNTR_OP_REMOTE_RDMA_WRITE = 1 << 5,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index d2aeadb6d2f9..d212bb470a4a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -299,7 +299,7 @@ struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_cq_moderation_caps cq_moderation_caps;
__aligned_u64 max_dm_size;
__u32 xrc_odp_caps;
- __u32 reserved;
+ __u32 max_comp_cntr;
};
struct ib_uverbs_query_port {
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH for-next v6 4/5] RDMA/efa: Update device interface
2026-06-04 11:46 [PATCH for-next v6 0/5] Introduce Completion Counters Michael Margolin
` (2 preceding siblings ...)
2026-06-04 11:46 ` [PATCH for-next v6 3/5] RDMA/core: Add Completion Counters to resource tracking Michael Margolin
@ 2026-06-04 11:46 ` Michael Margolin
3 siblings, 0 replies; 5+ messages in thread
From: Michael Margolin @ 2026-06-04 11:46 UTC (permalink / raw)
To: jgg, leon, linux-rdma
Cc: sleybo, matua, gal.pressman, Daniel Kinsbursky, Yonatan Nachum
Align device interface definitions.
Reviewed-by: Daniel Kinsbursky <dkinsb@amazon.com>
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
---
.../infiniband/hw/efa/efa_admin_cmds_defs.h | 185 +++++++++++++++++-
drivers/infiniband/hw/efa/efa_io_defs.h | 63 +++++-
2 files changed, 242 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 826790ca9d83..a305585dee3c 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -28,7 +28,12 @@ enum efa_admin_aq_opcode {
EFA_ADMIN_CREATE_EQ = 18,
EFA_ADMIN_DESTROY_EQ = 19,
EFA_ADMIN_ALLOC_MR = 20,
- EFA_ADMIN_MAX_OPCODE = 20,
+ EFA_ADMIN_SERVICE = 21,
+ EFA_ADMIN_CREATE_EVENT_COUNTER = 25,
+ EFA_ADMIN_DESTROY_EVENT_COUNTER = 26,
+ EFA_ADMIN_ATTACH_EVENT_COUNTER = 27,
+ EFA_ADMIN_MODIFY_EVENT_COUNTER = 28,
+ EFA_ADMIN_MAX_OPCODE = 28,
};
enum efa_admin_aq_feature_id {
@@ -722,7 +727,9 @@ struct efa_admin_feature_device_attr_desc {
* on TX queues
* 4 : unsolicited_write_recv - If set, unsolicited
* write with imm. receive is supported
- * 31:5 : reserved - MBZ
+ * 5 : event_counters - If set, event counters are
+ * supported
+ * 31:6 : reserved - MBZ
*/
u32 device_caps;
@@ -811,6 +818,34 @@ struct efa_admin_feature_queue_attr_desc_1 {
struct efa_admin_feature_queue_attr_desc_2 {
/* Maximum size of data that can be sent inline in a Send WQE */
u16 inline_buf_size_ex;
+
+ /* MBZ */
+ u8 reserved[6];
+
+ /*
+ * Supported counter QP events
+ * 0 : send_comp
+ * 1 : send_comp_err
+ * 2 : recv_comp
+ * 3 : recv_comp_err
+ * 4 : read_comp
+ * 5 : read_comp_err
+ * 6 : write_comp
+ * 7 : write_comp_err
+ * 8 : remote_read_comp
+ * 9 : remote_write_comp
+ * 31:10 : reserved - MBZ
+ */
+ u32 supported_event_counter_qp_events;
+
+ /* Maximum number of counters */
+ u32 max_event_counters;
+
+ /*
+ * Maximum counter value, counter wraps around to 0 after reaching
+ * this value
+ */
+ u64 event_counter_max_val;
};
struct efa_admin_event_queue_attr_desc {
@@ -1089,6 +1124,127 @@ struct efa_admin_host_info {
u32 flags;
};
+struct efa_admin_service_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ u8 buffer[60];
+};
+
+struct efa_admin_service_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ u8 buffer[56];
+};
+
+/* Create Counter command */
+struct efa_admin_create_event_counter_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* UAR number */
+ u16 uar;
+
+ /* MBZ */
+ u16 reserved;
+
+ /* Counter physical address */
+ u64 paddr;
+};
+
+struct efa_admin_create_event_counter_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* Counter handle */
+ u32 cntr_handle;
+
+ /* MBZ */
+ u32 reserved;
+};
+
+struct efa_admin_destroy_event_counter_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* Counter handle */
+ u32 cntr_handle;
+};
+
+struct efa_admin_destroy_event_counter_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+enum efa_admin_event_counter_attach_type {
+ EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS = 0,
+};
+
+struct efa_admin_event_counter_attach_qp_events {
+ /* QP handle */
+ u32 qp_handle;
+
+ /*
+ * Bitmask of counter QP events
+ * 0 : send_comp
+ * 1 : send_comp_err
+ * 2 : recv_comp
+ * 3 : recv_comp_err
+ * 4 : read_comp
+ * 5 : read_comp_err
+ * 6 : write_comp
+ * 7 : write_comp_err
+ * 8 : remote_read_comp
+ * 9 : remote_write_comp
+ * 31:10 : reserved - MBZ
+ */
+ u32 events;
+};
+
+struct efa_admin_attach_event_counter_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* Counter handle */
+ u32 cntr_handle;
+
+ /* efa_admin_event_counter_attach_type */
+ u8 attach_type;
+
+ /* MBZ */
+ u8 reserved[3];
+
+ union {
+ struct efa_admin_event_counter_attach_qp_events qp_events;
+ } u;
+};
+
+struct efa_admin_attach_event_counter_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/* Counter modify operations */
+enum efa_admin_event_counter_modify_ops {
+ /* Set counter value */
+ EFA_ADMIN_EVENT_COUNTER_MODIFY_SET = 0,
+ /* Add to counter value */
+ EFA_ADMIN_EVENT_COUNTER_MODIFY_ADD = 1,
+};
+
+struct efa_admin_modify_event_counter_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* Counter handle */
+ u32 cntr_handle;
+
+ /* Counter operation type (efa_admin_event_counter_modify_ops) */
+ u8 operation;
+
+ /* MBZ */
+ u8 reserved[7];
+
+ /* Value for SET or ADD */
+ u64 value;
+};
+
+struct efa_admin_modify_event_counter_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
/* create_qp_cmd */
#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
@@ -1129,6 +1285,19 @@ struct efa_admin_host_info {
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_UNSOLICITED_WRITE_RECV_MASK BIT(4)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_EVENT_COUNTERS_MASK BIT(5)
+
+/* feature_queue_attr_desc_2 */
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_SEND_COMP_MASK BIT(0)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_SEND_COMP_ERR_MASK BIT(1)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_RECV_COMP_MASK BIT(2)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_RECV_COMP_ERR_MASK BIT(3)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_READ_COMP_MASK BIT(4)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_READ_COMP_ERR_MASK BIT(5)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_WRITE_COMP_MASK BIT(6)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_WRITE_COMP_ERR_MASK BIT(7)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_REMOTE_READ_COMP_MASK BIT(8)
+#define EFA_ADMIN_FEATURE_QUEUE_ATTR_DESC_2_REMOTE_WRITE_COMP_MASK BIT(9)
/* create_eq_cmd */
#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
@@ -1147,4 +1316,16 @@ struct efa_admin_host_info {
#define EFA_ADMIN_HOST_INFO_INTREE_MASK BIT(0)
#define EFA_ADMIN_HOST_INFO_GDR_MASK BIT(1)
+/* counter_attach_qp_events */
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_SEND_COMP_MASK BIT(0)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_SEND_COMP_ERR_MASK BIT(1)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_RECV_COMP_MASK BIT(2)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_RECV_COMP_ERR_MASK BIT(3)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_READ_COMP_MASK BIT(4)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_READ_COMP_ERR_MASK BIT(5)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_WRITE_COMP_MASK BIT(6)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_WRITE_COMP_ERR_MASK BIT(7)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_REMOTE_READ_COMP_MASK BIT(8)
+#define EFA_ADMIN_EVENT_COUNTER_ATTACH_QP_EVENTS_REMOTE_WRITE_COMP_MASK BIT(9)
+
#endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
index a4c9fd33da38..ede4b27eb951 100644
--- a/drivers/infiniband/hw/efa/efa_io_defs.h
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2026 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_IO_H_
@@ -9,6 +9,7 @@
#define EFA_IO_TX_DESC_NUM_BUFS 2
#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
+#define EFA_IO_TX_DESC_INLINE_MAX_SIZE_128 80
#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
#define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1
@@ -65,6 +66,8 @@ enum efa_io_comp_status {
EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
/* Unreachable remote - never received a response */
EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
+ /* Remote feature mismatch */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_FEATURE_MISMATCH = 18,
};
enum efa_io_frwr_pbl_mode {
@@ -72,6 +75,11 @@ enum efa_io_frwr_pbl_mode {
EFA_IO_FRWR_DIRECT_PBL = 1,
};
+enum efa_io_processing_hint {
+ /* Optimize for throughput */
+ EFA_IO_PROCESSING_HINT_BURST_PPS_SENSITIVE = 1 << 0,
+};
+
struct efa_io_tx_meta_desc {
/* Verbs-generated Request ID */
u16 req_id;
@@ -121,7 +129,15 @@ struct efa_io_tx_meta_desc {
u16 ah;
- u16 reserved;
+ /*
+ * control flags
+ * 1:0 : processing_hints - Bitmask of enum
+ * efa_io_processing_hint
+ * 7:2 : reserved - MBZ
+ */
+ u8 ctrl3;
+
+ u8 reserved;
/* Queue key */
u32 qkey;
@@ -172,6 +188,19 @@ struct efa_io_rdma_req {
struct efa_io_tx_buf_desc local_mem[1];
};
+struct efa_io_rdma_req_128 {
+ /* Remote memory address */
+ struct efa_io_remote_mem_addr remote_mem;
+
+ union {
+ /* Local memory address */
+ struct efa_io_tx_buf_desc local_mem[1];
+
+ /* inline data for RDMA */
+ u8 inline_data[80];
+ };
+};
+
struct efa_io_fast_mr_reg_req {
/* Updated local key of the MR after lkey/rkey increment */
u32 lkey;
@@ -230,8 +259,8 @@ struct efa_io_fast_mr_inv_req {
};
/*
- * Tx WQE, composed of tx meta descriptors followed by either tx buffer
- * descriptors or inline data
+ * 64-byte Tx WQE, composed of tx meta descriptors followed by either tx
+ * buffer descriptors or inline data
*/
struct efa_io_tx_wqe {
/* TX meta */
@@ -254,6 +283,31 @@ struct efa_io_tx_wqe {
} data;
};
+/*
+ * 128-byte Tx WQE, composed of tx meta descriptors followed by either tx
+ * buffer descriptors or inline data
+ */
+struct efa_io_tx_wqe_128 {
+ /* TX meta */
+ struct efa_io_tx_meta_desc meta;
+
+ union {
+ /* Send buffer descriptors */
+ struct efa_io_tx_buf_desc sgl[2];
+
+ u8 inline_data[80];
+
+ /* RDMA local and remote memory addresses */
+ struct efa_io_rdma_req_128 rdma_req;
+
+ /* Fast registration */
+ struct efa_io_fast_mr_reg_req reg_mr_req;
+
+ /* Fast invalidation */
+ struct efa_io_fast_mr_inv_req inv_mr_req;
+ } data;
+};
+
/*
* Rx buffer descriptor; RX WQE is composed of one or more RX buffer
* descriptors.
@@ -365,6 +419,7 @@ struct efa_io_rx_cdesc_ex {
#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2)
#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3)
#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4)
+#define EFA_IO_TX_META_DESC_PROCESSING_HINTS_MASK GENMASK(1, 0)
/* tx_buf_desc */
#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
--
2.47.3
^ permalink raw reply related [flat|nested] 5+ messages in thread