* [PATCH for-next 1/4] RDMA/core: Add Completion Counters support
2026-04-07 11:54 [PATCH for-next 0/4] Introduce Completion Counters Michael Margolin
@ 2026-04-07 11:54 ` Michael Margolin
2026-04-07 14:17 ` Jason Gunthorpe
2026-04-07 11:54 ` [PATCH for-next 2/4] RDMA/core: Add Completion Counters to resource tracking Michael Margolin
` (2 subsequent siblings)
3 siblings, 1 reply; 14+ messages in thread
From: Michael Margolin @ 2026-04-07 11:54 UTC (permalink / raw)
To: jgg, leon, linux-rdma; +Cc: sleybo, matua, gal.pressman, Yonatan Nachum
Add core infrastructure for Completion Counters, a light-weight
alternative to polling CQ for tracking operation completions.
Define the UVERBS_OBJECT_COMP_CNTR ioctl object with create, destroy,
set, inc and read methods for both success and error counters. Add a
QP attach method on the QP object to associate a completion counter
with a queue pair.
The create handler constructs umem from user-provided VA or dmabuf for
each counter, following the CQ buffer pattern. Set, inc and read
handlers pass through to driver callbacks. The QP attach handler
validates the operation mask flags and delegates to the driver.
Add ib_comp_cntr struct, ib_comp_cntr_attach_attr, device ops, and
DECLARE_RDMA_OBJ_SIZE for driver object allocation.
Only userspace Completion Counters are supported at this stage.
Reviewed-by: Yonatan Nachum <ynachum@amazon.com>
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
---
drivers/infiniband/core/Makefile | 1 +
drivers/infiniband/core/device.c | 10 +
drivers/infiniband/core/rdma_core.h | 1 +
drivers/infiniband/core/uverbs_cmd.c | 1 +
.../core/uverbs_std_types_comp_cntr.c | 373 ++++++++++++++++++
drivers/infiniband/core/uverbs_std_types_qp.c | 45 ++-
drivers/infiniband/core/uverbs_uapi.c | 1 +
include/rdma/ib_verbs.h | 26 ++
include/uapi/rdma/ib_user_ioctl_cmds.h | 65 +++
include/uapi/rdma/ib_user_ioctl_verbs.h | 9 +
include/uapi/rdma/ib_user_verbs.h | 2 +-
11 files changed, 532 insertions(+), 2 deletions(-)
create mode 100644 drivers/infiniband/core/uverbs_std_types_comp_cntr.c
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index dce798d8cfe6..4767339608a1 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -35,6 +35,7 @@ ib_umad-y := user_mad.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_std_types_cq.o \
+ uverbs_std_types_comp_cntr.o \
uverbs_std_types_dmabuf.o \
uverbs_std_types_dmah.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4c174f7f1070..c9b4a85d1a35 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2733,6 +2733,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_ah);
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
+ SET_DEVICE_OP(dev_ops, create_comp_cntr);
SET_DEVICE_OP(dev_ops, create_user_cq);
SET_DEVICE_OP(dev_ops, create_flow);
SET_DEVICE_OP(dev_ops, create_qp);
@@ -2753,6 +2754,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, destroy_ah);
SET_DEVICE_OP(dev_ops, destroy_counters);
SET_DEVICE_OP(dev_ops, destroy_cq);
+ SET_DEVICE_OP(dev_ops, destroy_comp_cntr);
SET_DEVICE_OP(dev_ops, destroy_flow);
SET_DEVICE_OP(dev_ops, destroy_flow_action);
SET_DEVICE_OP(dev_ops, destroy_qp);
@@ -2804,6 +2806,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
+ SET_DEVICE_OP(dev_ops, inc_comp_cntr);
+ SET_DEVICE_OP(dev_ops, inc_err_comp_cntr);
+ SET_DEVICE_OP(dev_ops, qp_attach_comp_cntr);
SET_DEVICE_OP(dev_ops, modify_srq);
SET_DEVICE_OP(dev_ops, modify_wq);
SET_DEVICE_OP(dev_ops, peek_cq);
@@ -2827,12 +2832,16 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, query_ucontext);
SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
SET_DEVICE_OP(dev_ops, read_counters);
+ SET_DEVICE_OP(dev_ops, read_comp_cntr);
+ SET_DEVICE_OP(dev_ops, read_err_comp_cntr);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
SET_DEVICE_OP(dev_ops, resize_user_cq);
+ SET_DEVICE_OP(dev_ops, set_comp_cntr);
+ SET_DEVICE_OP(dev_ops, set_err_comp_cntr);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
@@ -2841,6 +2850,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_comp_cntr);
SET_OBJ_SIZE(dev_ops, ib_dmah);
SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 269b393799ab..2569550e4c6d 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -156,6 +156,7 @@ uverbs_api_ioctl_handler_fn uverbs_get_handler_fn(struct ib_udata *udata);
extern const struct uapi_definition uverbs_def_obj_async_fd[];
extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_comp_cntr[];
extern const struct uapi_definition uverbs_def_obj_cq[];
extern const struct uapi_definition uverbs_def_obj_device[];
extern const struct uapi_definition uverbs_def_obj_dm[];
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a768436ba468..4bc493b3b624 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3673,6 +3673,7 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
resp.max_dm_size = attr.max_dm_size;
+ resp.max_comp_cntr = attr.max_comp_cntr;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
return uverbs_response(attrs, &resp, sizeof(resp));
diff --git a/drivers/infiniband/core/uverbs_std_types_comp_cntr.c b/drivers/infiniband/core/uverbs_std_types_comp_cntr.c
new file mode 100644
index 000000000000..a62c20d4e5a4
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_comp_cntr.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_dmabuf.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_comp_cntr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uobject->object;
+ int ret;
+
+ ret = cc->device->ops.destroy_comp_cntr(cc);
+ if (ret)
+ return ret;
+
+ ib_umem_release(cc->comp_umem);
+ ib_umem_release(cc->err_umem);
+ kfree(cc);
+ return 0;
+}
+
+static int comp_cntr_get_umem(struct ib_device *ib_dev,
+ struct uverbs_attr_bundle *attrs,
+ int va_attr, int fd_attr, int offset_attr,
+ struct ib_umem **umem_out)
+{
+ struct ib_umem_dmabuf *umem_dmabuf;
+ u64 buffer_offset;
+ u64 buffer_va;
+ int buffer_fd;
+ int ret;
+
+ *umem_out = NULL;
+
+ if (uverbs_attr_is_valid(attrs, va_attr)) {
+ if (uverbs_attr_is_valid(attrs, fd_attr) ||
+ uverbs_attr_is_valid(attrs, offset_attr))
+ return -EINVAL;
+
+ ret = uverbs_copy_from(&buffer_va, attrs, va_attr);
+ if (ret)
+ return ret;
+
+ *umem_out = ib_umem_get(ib_dev, buffer_va, sizeof(u64),
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(*umem_out)) {
+ ret = PTR_ERR(*umem_out);
+ *umem_out = NULL;
+ return ret;
+ }
+ } else if (uverbs_attr_is_valid(attrs, fd_attr)) {
+ if (uverbs_attr_is_valid(attrs, va_attr))
+ return -EINVAL;
+
+ ret = uverbs_get_raw_fd(&buffer_fd, attrs, fd_attr);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&buffer_offset, attrs, offset_attr);
+ if (ret)
+ return ret;
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ib_dev, buffer_offset,
+ sizeof(u64), buffer_fd,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem_dmabuf))
+ return PTR_ERR(umem_dmabuf);
+
+ *umem_out = &umem_dmabuf->umem;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE);
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_comp_cntr *cc;
+ int ret;
+
+ if (!ib_dev->ops.create_comp_cntr ||
+ !ib_dev->ops.destroy_comp_cntr ||
+ !ib_dev->ops.qp_attach_comp_cntr)
+ return -EOPNOTSUPP;
+
+ cc = rdma_zalloc_drv_obj(ib_dev, ib_comp_cntr);
+ if (!cc)
+ return -ENOMEM;
+
+ cc->device = ib_dev;
+ cc->uobject = uobj;
+
+ ret = comp_cntr_get_umem(ib_dev, attrs,
+ UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_VA,
+ UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_FD,
+ UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_OFFSET,
+ &cc->comp_umem);
+ if (ret)
+ goto err_free;
+
+ ret = comp_cntr_get_umem(ib_dev, attrs,
+ UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_VA,
+ UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_FD,
+ UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_OFFSET,
+ &cc->err_umem);
+ if (ret)
+ goto err_comp_umem;
+
+ ret = ib_dev->ops.create_comp_cntr(cc, attrs);
+ if (ret)
+ goto err_err_umem;
+
+ uobj->object = cc;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE);
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_CREATE_COMP_CNTR_RESP_COUNT_MAX_VALUE,
+ &cc->comp_count_max_value,
+ sizeof(cc->comp_count_max_value));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_CREATE_COMP_CNTR_RESP_ERR_COUNT_MAX_VALUE,
+ &cc->err_count_max_value,
+ sizeof(cc->err_count_max_value));
+ return ret;
+
+err_err_umem:
+ ib_umem_release(cc->err_umem);
+err_comp_umem:
+ ib_umem_release(cc->comp_umem);
+err_free:
+ kfree(cc);
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_SET)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_SET_COMP_CNTR_HANDLE);
+ u64 value;
+ int ret;
+
+ if (!cc->device->ops.set_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&value, attrs, UVERBS_ATTR_SET_COMP_CNTR_VALUE);
+ if (ret)
+ return ret;
+
+ return cc->device->ops.set_comp_cntr(cc, value);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_SET_ERR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_SET_ERR_COMP_CNTR_HANDLE);
+ u64 value;
+ int ret;
+
+ if (!cc->device->ops.set_err_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&value, attrs,
+ UVERBS_ATTR_SET_ERR_COMP_CNTR_VALUE);
+ if (ret)
+ return ret;
+
+ return cc->device->ops.set_err_comp_cntr(cc, value);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_INC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_INC_COMP_CNTR_HANDLE);
+ u64 amount;
+ int ret;
+
+ if (!cc->device->ops.inc_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&amount, attrs, UVERBS_ATTR_INC_COMP_CNTR_VALUE);
+ if (ret)
+ return ret;
+
+ return cc->device->ops.inc_comp_cntr(cc, amount);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_INC_ERR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_INC_ERR_COMP_CNTR_HANDLE);
+ u64 amount;
+ int ret;
+
+ if (!cc->device->ops.inc_err_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&amount, attrs,
+ UVERBS_ATTR_INC_ERR_COMP_CNTR_VALUE);
+ if (ret)
+ return ret;
+
+ return cc->device->ops.inc_err_comp_cntr(cc, amount);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_READ)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_READ_COMP_CNTR_HANDLE);
+ u64 value;
+ int ret;
+
+ if (!cc->device->ops.read_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = cc->device->ops.read_comp_cntr(cc, &value);
+ if (ret)
+ return ret;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COMP_CNTR_RESP_VALUE,
+ &value, sizeof(value));
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COMP_CNTR_READ_ERR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_READ_ERR_COMP_CNTR_HANDLE);
+ u64 value;
+ int ret;
+
+ if (!cc->device->ops.read_err_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = cc->device->ops.read_err_comp_cntr(cc, &value);
+ if (ret)
+ return ret;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_READ_ERR_COMP_CNTR_RESP_VALUE,
+ &value, sizeof(value));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_VA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_RAW_FD(UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_VA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_RAW_FD(UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_COMP_CNTR_RESP_COUNT_MAX_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_COMP_CNTR_RESP_ERR_COUNT_MAX_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_COMP_CNTR_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_SET,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_SET_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_SET_COMP_CNTR_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_SET_ERR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_SET_ERR_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_SET_ERR_COMP_CNTR_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_INC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_INC_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_INC_COMP_CNTR_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_INC_ERR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_INC_ERR_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_INC_ERR_COMP_CNTR_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_READ,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COMP_CNTR_RESP_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COMP_CNTR_READ_ERR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_ERR_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_ERR_COMP_CNTR_RESP_VALUE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_comp_cntr),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_SET),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_SET_ERR),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_INC),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_INC_ERR),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_READ),
+ &UVERBS_METHOD(UVERBS_METHOD_COMP_CNTR_READ_ERR));
+
+const struct uapi_definition uverbs_def_obj_comp_cntr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CNTR,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_comp_cntr)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
index be0730e8509e..2c607b02d9d5 100644
--- a/drivers/infiniband/core/uverbs_std_types_qp.c
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -367,11 +367,54 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp),
UA_MANDATORY));
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_ATTACH_COMP_CNTR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *qp_uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_QP_ATTACH_COMP_CNTR_QP_HANDLE);
+ struct ib_comp_cntr *cc = uverbs_attr_get_obj(
+ attrs, UVERBS_ATTR_QP_ATTACH_COMP_CNTR_HANDLE);
+ struct ib_comp_cntr_attach_attr attr = {};
+ struct ib_qp *qp = qp_uobj->object;
+ int ret;
+
+ if (!cc->device->ops.qp_attach_comp_cntr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_flags32(&attr.op_mask, attrs,
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_OP_MASK,
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_SEND |
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_RECV |
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_READ |
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_READ |
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_WRITE |
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_WRITE);
+ if (ret)
+ return ret;
+
+ return qp->device->ops.qp_attach_comp_cntr(qp, cc, &attr);
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_ATTACH_COMP_CNTR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QP_ATTACH_COMP_CNTR_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QP_ATTACH_COMP_CNTR_HANDLE,
+ UVERBS_OBJECT_COMP_CNTR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QP_ATTACH_COMP_CNTR_OP_MASK,
+ enum ib_uverbs_comp_cntr_attach_op,
+ UA_OPTIONAL));
+
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_QP,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp),
&UVERBS_METHOD(UVERBS_METHOD_QP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY));
+ &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_ATTACH_COMP_CNTR));
const struct uapi_definition uverbs_def_obj_qp[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 31b248295854..a3f42a50a14f 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -628,6 +628,7 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
static const struct uapi_definition uverbs_core_api[] = {
UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_comp_cntr),
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
UAPI_DEF_CHAIN(uverbs_def_obj_device),
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9dd76f489a0b..76fa705389a4 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -453,6 +453,7 @@ struct ib_device_attr {
u64 max_dm_size;
/* Max entries for sgl for optimized performance per READ */
u32 max_sgl_rd;
+ u32 max_comp_cntr;
};
enum ib_mtu {
@@ -1746,6 +1747,19 @@ struct ib_cq {
struct rdma_restrack_entry res;
};
+struct ib_comp_cntr {
+ struct ib_device *device;
+ struct ib_uobject *uobject;
+ struct ib_umem *comp_umem;
+ struct ib_umem *err_umem;
+ u64 comp_count_max_value;
+ u64 err_count_max_value;
+};
+
+struct ib_comp_cntr_attach_attr {
+ u32 op_mask;
+};
+
struct ib_srq {
struct ib_device *device;
struct ib_pd *pd;
@@ -2624,6 +2638,8 @@ struct ib_device_ops {
struct ib_udata *udata);
int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
+ int (*qp_attach_comp_cntr)(struct ib_qp *qp, struct ib_comp_cntr *cc,
+ struct ib_comp_cntr_attach_attr *attr);
int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
@@ -2645,6 +2661,15 @@ struct ib_device_ops {
* post_destroy_cq - Free all kernel resources
*/
void (*post_destroy_cq)(struct ib_cq *cq);
+ int (*create_comp_cntr)(struct ib_comp_cntr *cc,
+ struct uverbs_attr_bundle *attrs);
+ int (*destroy_comp_cntr)(struct ib_comp_cntr *cc);
+ int (*set_comp_cntr)(struct ib_comp_cntr *cc, u64 value);
+ int (*set_err_comp_cntr)(struct ib_comp_cntr *cc, u64 value);
+ int (*inc_comp_cntr)(struct ib_comp_cntr *cc, u64 amount);
+ int (*inc_err_comp_cntr)(struct ib_comp_cntr *cc, u64 amount);
+ int (*read_comp_cntr)(struct ib_comp_cntr *cc, u64 *value);
+ int (*read_err_comp_cntr)(struct ib_comp_cntr *cc, u64 *value);
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
@@ -2878,6 +2903,7 @@ struct ib_device_ops {
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_comp_cntr);
DECLARE_RDMA_OBJ_SIZE(ib_dmah);
DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 72041c1b0ea5..f66ae6d44df4 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -57,6 +57,7 @@ enum uverbs_default_objects {
UVERBS_OBJECT_ASYNC_EVENT,
UVERBS_OBJECT_DMAH,
UVERBS_OBJECT_DMABUF,
+ UVERBS_OBJECT_COMP_CNTR,
};
enum {
@@ -168,6 +169,7 @@ enum uverbs_attrs_destroy_qp_cmd_attr_ids {
enum uverbs_methods_qp {
UVERBS_METHOD_QP_CREATE,
UVERBS_METHOD_QP_DESTROY,
+ UVERBS_METHOD_QP_ATTACH_COMP_CNTR,
};
enum uverbs_attrs_create_srq_cmd_attr_ids {
@@ -434,4 +436,67 @@ enum uverbs_attrs_query_gid_entry_cmd_attr_ids {
UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
};
+enum uverbs_methods_comp_cntr {
+ UVERBS_METHOD_COMP_CNTR_CREATE,
+ UVERBS_METHOD_COMP_CNTR_DESTROY,
+ UVERBS_METHOD_COMP_CNTR_SET,
+ UVERBS_METHOD_COMP_CNTR_SET_ERR,
+ UVERBS_METHOD_COMP_CNTR_INC,
+ UVERBS_METHOD_COMP_CNTR_INC_ERR,
+ UVERBS_METHOD_COMP_CNTR_READ,
+ UVERBS_METHOD_COMP_CNTR_READ_ERR,
+};
+
+enum uverbs_attrs_create_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_CREATE_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_VA,
+ UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_FD,
+ UVERBS_ATTR_CREATE_COMP_CNTR_BUFFER_OFFSET,
+ UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_VA,
+ UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_FD,
+ UVERBS_ATTR_CREATE_COMP_CNTR_ERR_BUFFER_OFFSET,
+ UVERBS_ATTR_CREATE_COMP_CNTR_RESP_COUNT_MAX_VALUE,
+ UVERBS_ATTR_CREATE_COMP_CNTR_RESP_ERR_COUNT_MAX_VALUE,
+};
+
+enum uverbs_attrs_destroy_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_DESTROY_COMP_CNTR_HANDLE,
+};
+
+enum uverbs_attrs_set_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_SET_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_SET_COMP_CNTR_VALUE,
+};
+
+enum uverbs_attrs_set_err_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_SET_ERR_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_SET_ERR_COMP_CNTR_VALUE,
+};
+
+enum uverbs_attrs_inc_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_INC_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_INC_COMP_CNTR_VALUE,
+};
+
+enum uverbs_attrs_inc_err_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_INC_ERR_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_INC_ERR_COMP_CNTR_VALUE,
+};
+
+enum uverbs_attrs_read_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_READ_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_READ_COMP_CNTR_RESP_VALUE,
+};
+
+enum uverbs_attrs_read_err_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_READ_ERR_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_READ_ERR_COMP_CNTR_RESP_VALUE,
+};
+
+enum uverbs_attrs_qp_attach_comp_cntr_cmd_attr_ids {
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_QP_HANDLE,
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_HANDLE,
+ UVERBS_ATTR_QP_ATTACH_COMP_CNTR_OP_MASK,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 90c5cd8e7753..45e6f1fccf80 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -273,4 +273,13 @@ struct ib_uverbs_gid_entry {
__u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */
};
+enum ib_uverbs_comp_cntr_attach_op {
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_SEND = 1 << 0,
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_RECV = 1 << 1,
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_READ = 1 << 2,
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_READ = 1 << 3,
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_WRITE = 1 << 4,
+ IB_UVERBS_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_WRITE = 1 << 5,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 3b7bd99813e9..45d142f4a7f8 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -299,7 +299,7 @@ struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_cq_moderation_caps cq_moderation_caps;
__aligned_u64 max_dm_size;
__u32 xrc_odp_caps;
- __u32 reserved;
+ __u32 max_comp_cntr;
};
struct ib_uverbs_query_port {
--
2.47.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH for-next 4/4] RDMA/efa: Add Completion Counters support
2026-04-07 11:54 [PATCH for-next 0/4] Introduce Completion Counters Michael Margolin
` (2 preceding siblings ...)
2026-04-07 11:54 ` [PATCH for-next 3/4] RDMA/efa: Update device interface Michael Margolin
@ 2026-04-07 11:54 ` Michael Margolin
3 siblings, 0 replies; 14+ messages in thread
From: Michael Margolin @ 2026-04-07 11:54 UTC (permalink / raw)
To: jgg, leon, linux-rdma; +Cc: sleybo, matua, gal.pressman
Implement completion counters for the EFA device. Each completion
counter is backed by two EFA event counters, one for success
completions and one for error completions.
Set and inc operations are forwarded to the device via the modify
counter admin command. Read operations return EOPNOTSUPP as the
counter values are accessed directly from userspace through the
mapped memory.
Signed-off-by: Michael Margolin <mrgolin@amazon.com>
---
drivers/infiniband/hw/efa/efa.h | 15 ++
drivers/infiniband/hw/efa/efa_com_cmd.c | 106 ++++++++++++++
drivers/infiniband/hw/efa/efa_com_cmd.h | 36 +++++
drivers/infiniband/hw/efa/efa_main.c | 8 ++
drivers/infiniband/hw/efa/efa_verbs.c | 177 ++++++++++++++++++++++++
include/uapi/rdma/efa-abi.h | 1 +
6 files changed, 343 insertions(+)
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 00b19f2ba3da..d0c2c355f031 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -110,6 +110,12 @@ struct efa_cq {
struct ib_umem *umem;
};
+struct efa_comp_cntr {
+ struct ib_comp_cntr ibcc;
+ u32 comp_handle;
+ u32 err_handle;
+};
+
struct efa_qp {
struct ib_qp ibqp;
dma_addr_t rq_dma_addr;
@@ -163,6 +169,15 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int efa_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
+int efa_create_comp_cntr(struct ib_comp_cntr *ibcc,
+ struct uverbs_attr_bundle *attrs);
+int efa_destroy_comp_cntr(struct ib_comp_cntr *ibcc);
+int efa_set_comp_cntr(struct ib_comp_cntr *ibcc, u64 value);
+int efa_set_err_comp_cntr(struct ib_comp_cntr *ibcc, u64 value);
+int efa_inc_comp_cntr(struct ib_comp_cntr *ibcc, u64 amount);
+int efa_inc_err_comp_cntr(struct ib_comp_cntr *ibcc, u64 amount);
+int efa_qp_attach_comp_cntr(struct ib_qp *ibqp, struct ib_comp_cntr *ibcc,
+ struct ib_comp_cntr_attach_attr *attr);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_dmah *dmah,
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 63c7f07806a8..e91c405e57d2 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -516,6 +516,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
}
result->inline_buf_size_ex = resp.u.queue_attr_2.inline_buf_size_ex;
+ result->max_event_counters = resp.u.queue_attr_2.max_event_counters;
+ result->event_counter_max_val = resp.u.queue_attr_2.event_counter_max_val;
} else {
result->inline_buf_size_ex = result->inline_buf_size;
}
@@ -851,3 +853,107 @@ int efa_com_get_stats(struct efa_com_dev *edev,
return 0;
}
+
+int efa_com_create_counter(struct efa_com_dev *edev,
+ struct efa_com_create_counter_params *params,
+ struct efa_com_create_counter_result *result)
+{
+ struct efa_admin_create_counter_cmd cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_create_counter_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_COUNTER;
+ cmd.uar = params->uarn;
+ cmd.paddr = params->dma_addr;
+
+ err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create counter [%d]\n", err);
+ return err;
+ }
+
+ result->cntr_handle = resp.cntr_handle;
+ return 0;
+}
+
+int efa_com_destroy_counter(struct efa_com_dev *edev,
+ struct efa_com_destroy_counter_params *params)
+{
+ struct efa_admin_destroy_counter_cmd cmd = {};
+ struct efa_admin_destroy_counter_resp resp;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_COUNTER;
+ cmd.cntr_handle = params->cntr_handle;
+
+ err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy counter [%d]\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_attach_counter(struct efa_com_dev *edev,
+ struct efa_com_attach_counter_params *params)
+{
+ struct efa_admin_attach_counter_cmd cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_attach_counter_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_ATTACH_COUNTER;
+ cmd.cntr_handle = params->cntr_handle;
+ cmd.attach_type = EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS;
+ cmd.u.qp_events.qp_handle = params->qp_handle;
+ cmd.u.qp_events.events = params->events;
+
+ err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to attach counter [%d]\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_modify_counter(struct efa_com_dev *edev,
+ struct efa_com_modify_counter_params *params)
+{
+ struct efa_admin_modify_counter_cmd cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_modify_counter_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_MODIFY_COUNTER;
+ cmd.cntr_handle = params->cntr_handle;
+ cmd.operation = params->operation;
+ cmd.value = params->value;
+
+ err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to modify counter [%d]\n", err);
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index ef15b3c38429..9bce27d585d5 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -145,6 +145,8 @@ struct efa_com_get_device_attr_result {
u16 min_sq_depth;
u16 max_link_speed_gbps;
u8 db_bar;
+ u32 max_event_counters;
+ u64 event_counter_max_val;
};
struct efa_com_get_hw_hints_result {
@@ -300,6 +302,31 @@ union efa_com_get_stats_result {
struct efa_com_network_stats network_stats;
};
+struct efa_com_create_counter_params {
+ dma_addr_t dma_addr;
+ u16 uarn;
+};
+
+struct efa_com_create_counter_result {
+ u32 cntr_handle;
+};
+
+struct efa_com_destroy_counter_params {
+ u32 cntr_handle;
+};
+
+struct efa_com_attach_counter_params {
+ u32 cntr_handle;
+ u32 qp_handle;
+ u32 events;
+};
+
+struct efa_com_modify_counter_params {
+ u32 cntr_handle;
+ u8 operation;
+ u64 value;
+};
+
int efa_com_create_qp(struct efa_com_dev *edev,
struct efa_com_create_qp_params *params,
struct efa_com_create_qp_result *res);
@@ -350,5 +377,14 @@ int efa_com_dealloc_uar(struct efa_com_dev *edev,
int efa_com_get_stats(struct efa_com_dev *edev,
struct efa_com_get_stats_params *params,
union efa_com_get_stats_result *result);
+int efa_com_create_counter(struct efa_com_dev *edev,
+ struct efa_com_create_counter_params *params,
+ struct efa_com_create_counter_result *result);
+int efa_com_destroy_counter(struct efa_com_dev *edev,
+ struct efa_com_destroy_counter_params *params);
+int efa_com_attach_counter(struct efa_com_dev *edev,
+ struct efa_com_attach_counter_params *params);
+int efa_com_modify_counter(struct efa_com_dev *edev,
+ struct efa_com_modify_counter_params *params);
#endif /* _EFA_COM_CMD_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 03c237c8c81e..e125120fe4ad 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -372,20 +372,25 @@ static const struct ib_device_ops efa_dev_ops = {
.alloc_pd = efa_alloc_pd,
.alloc_ucontext = efa_alloc_ucontext,
.create_user_cq = efa_create_user_cq,
+ .create_comp_cntr = efa_create_comp_cntr,
.create_qp = efa_create_qp,
.create_user_ah = efa_create_ah,
.dealloc_pd = efa_dealloc_pd,
.dealloc_ucontext = efa_dealloc_ucontext,
.dereg_mr = efa_dereg_mr,
.destroy_ah = efa_destroy_ah,
+ .destroy_comp_cntr = efa_destroy_comp_cntr,
.destroy_cq = efa_destroy_cq,
.destroy_qp = efa_destroy_qp,
.get_hw_stats = efa_get_hw_stats,
.get_link_layer = efa_port_link_layer,
.get_port_immutable = efa_get_port_immutable,
+ .inc_comp_cntr = efa_inc_comp_cntr,
+ .inc_err_comp_cntr = efa_inc_err_comp_cntr,
.mmap = efa_mmap,
.mmap_free = efa_mmap_free,
.modify_qp = efa_modify_qp,
+ .qp_attach_comp_cntr = efa_qp_attach_comp_cntr,
.query_device = efa_query_device,
.query_gid = efa_query_gid,
.query_pkey = efa_query_pkey,
@@ -393,9 +398,12 @@ static const struct ib_device_ops efa_dev_ops = {
.query_qp = efa_query_qp,
.reg_user_mr = efa_reg_mr,
.reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf,
+ .set_comp_cntr = efa_set_comp_cntr,
+ .set_err_comp_cntr = efa_set_err_comp_cntr,
INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_comp_cntr, efa_comp_cntr, ibcc),
INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, efa_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 283c62d9cb3d..e9dbb08f3e02 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -169,6 +169,11 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah)
return container_of(ibah, struct efa_ah, ibah);
}
+static inline struct efa_comp_cntr *to_ecc(struct ib_comp_cntr *ibcc)
+{
+ return container_of(ibcc, struct efa_comp_cntr, ibcc);
+}
+
static inline struct efa_user_mmap_entry *
to_emmap(struct rdma_user_mmap_entry *rdma_entry)
{
@@ -245,6 +250,7 @@ int efa_query_device(struct ib_device *ibdev,
props->max_recv_sge = dev_attr->max_rq_sge;
props->max_sge_rd = dev_attr->max_wr_rdma_sge;
props->max_pkeys = 1;
+ props->max_comp_cntr = dev_attr->max_event_counters / 2;
if (udata && udata->outlen) {
resp.max_sq_sge = dev_attr->max_sq_sge;
@@ -270,6 +276,9 @@ int efa_query_device(struct ib_device *ibdev,
if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV;
+ if (EFA_DEV_CAP(dev, EVENT_COUNTERS))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_COMP_CNTR;
+
if (dev->neqs)
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
@@ -2307,6 +2316,174 @@ enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_UNSPECIFIED;
}
+static int efa_create_event_counter(struct efa_dev *dev, struct ib_umem *umem,
+ u16 uarn, u32 *handle)
+{
+ struct efa_com_create_counter_params params = {};
+ struct efa_com_create_counter_result result;
+ int err;
+
+ params.uarn = uarn;
+ params.dma_addr = ib_umem_start_dma_addr(umem);
+
+ err = efa_com_create_counter(&dev->edev, ¶ms, &result);
+ if (err)
+ return err;
+
+ *handle = result.cntr_handle;
+ return 0;
+}
+
+static int efa_destroy_event_counter(struct efa_dev *dev, u32 handle)
+{
+ struct efa_com_destroy_counter_params params = {
+ .cntr_handle = handle,
+ };
+
+ return efa_com_destroy_counter(&dev->edev, ¶ms);
+}
+
+int efa_create_comp_cntr(struct ib_comp_cntr *ibcc, struct uverbs_attr_bundle *attrs)
+{
+ struct efa_dev *dev = to_edev(ibcc->device);
+ struct efa_comp_cntr *cc = to_ecc(ibcc);
+ struct efa_ucontext *ucontext;
+ int err;
+
+ if (!ibcc->comp_umem || !ibcc->err_umem) {
+ ibdev_dbg(&dev->ibdev, "Completion Counter without umem isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ ucontext = rdma_udata_to_drv_context(&attrs->driver_udata, struct efa_ucontext,
+ ibucontext);
+
+ err = efa_create_event_counter(dev, ibcc->comp_umem, ucontext->uarn, &cc->comp_handle);
+ if (err) {
+ ibdev_dbg(&dev->ibdev, "Failed to create comp event counter [%d]\n", err);
+ return err;
+ }
+
+ err = efa_create_event_counter(dev, ibcc->err_umem, ucontext->uarn, &cc->err_handle);
+ if (err) {
+ ibdev_dbg(&dev->ibdev, "Failed to create err event counter [%d]\n", err);
+ efa_destroy_event_counter(dev, cc->comp_handle);
+ return err;
+ }
+
+ ibcc->comp_count_max_value = dev->dev_attr.event_counter_max_val;
+ ibcc->err_count_max_value = dev->dev_attr.event_counter_max_val;
+
+ return 0;
+}
+
+int efa_destroy_comp_cntr(struct ib_comp_cntr *ibcc)
+{
+ struct efa_dev *dev = to_edev(ibcc->device);
+ struct efa_comp_cntr *cc = to_ecc(ibcc);
+ int err;
+
+ err = efa_destroy_event_counter(dev, cc->comp_handle);
+ if (err)
+ return err;
+
+ return efa_destroy_event_counter(dev, cc->err_handle);
+}
+
+static int efa_modify_event_counter(struct efa_dev *dev, u32 handle, u8 operation, u64 value)
+{
+ struct efa_com_modify_counter_params params = {
+ .cntr_handle = handle,
+ .operation = operation,
+ .value = value,
+ };
+
+ return efa_com_modify_counter(&dev->edev, ¶ms);
+}
+
+int efa_set_comp_cntr(struct ib_comp_cntr *ibcc, u64 value)
+{
+ return efa_modify_event_counter(to_edev(ibcc->device), to_ecc(ibcc)->comp_handle,
+ EFA_ADMIN_COUNTER_MODIFY_SET, value);
+}
+
+int efa_set_err_comp_cntr(struct ib_comp_cntr *ibcc, u64 value)
+{
+ return efa_modify_event_counter(to_edev(ibcc->device), to_ecc(ibcc)->err_handle,
+ EFA_ADMIN_COUNTER_MODIFY_SET, value);
+}
+
+int efa_inc_comp_cntr(struct ib_comp_cntr *ibcc, u64 amount)
+{
+ return efa_modify_event_counter(to_edev(ibcc->device), to_ecc(ibcc)->comp_handle,
+ EFA_ADMIN_COUNTER_MODIFY_ADD, amount);
+}
+
+int efa_inc_err_comp_cntr(struct ib_comp_cntr *ibcc, u64 amount)
+{
+ return efa_modify_event_counter(to_edev(ibcc->device), to_ecc(ibcc)->err_handle,
+ EFA_ADMIN_COUNTER_MODIFY_ADD, amount);
+}
+
+static u32 efa_comp_cntr_op_to_comp_events(u32 op_mask)
+{
+ u32 events = 0;
+
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_SEND)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_SEND_COMP, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_RECV)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_RECV_COMP, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_READ)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_READ_COMP, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_READ)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_REMOTE_READ_COMP, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_WRITE)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_WRITE_COMP, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_REMOTE_RDMA_WRITE)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_REMOTE_WRITE_COMP, 1);
+
+ return events;
+}
+
+static u32 efa_comp_cntr_op_to_err_events(u32 op_mask)
+{
+ u32 events = 0;
+
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_SEND)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_SEND_COMP_ERR, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_RECV)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_RECV_COMP_ERR, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_READ)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_READ_COMP_ERR, 1);
+ if (op_mask & IB_UVERBS_COMP_CNTR_ATTACH_OP_RDMA_WRITE)
+ EFA_SET(&events, EFA_ADMIN_COUNTER_ATTACH_QP_EVENTS_WRITE_COMP_ERR, 1);
+
+ return events;
+}
+
+int efa_qp_attach_comp_cntr(struct ib_qp *ibqp, struct ib_comp_cntr *ibcc,
+ struct ib_comp_cntr_attach_attr *attr)
+{
+ struct efa_com_attach_counter_params params;
+ struct efa_dev *dev = to_edev(ibqp->device);
+ struct efa_comp_cntr *cc = to_ecc(ibcc);
+ struct efa_qp *qp = to_eqp(ibqp);
+ int err;
+
+ params.cntr_handle = cc->comp_handle;
+ params.qp_handle = qp->qp_handle;
+ params.events = efa_comp_cntr_op_to_comp_events(attr->op_mask);
+
+ err = efa_com_attach_counter(&dev->edev, ¶ms);
+ if (err)
+ return err;
+
+ params.cntr_handle = cc->err_handle;
+ params.events = efa_comp_cntr_op_to_err_events(attr->op_mask);
+
+ return efa_com_attach_counter(&dev->edev, ¶ms);
+}
+
DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY,
UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE,
UVERBS_OBJECT_MR,
diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h
index d5c18f8de182..492b2fa93467 100644
--- a/include/uapi/rdma/efa-abi.h
+++ b/include/uapi/rdma/efa-abi.h
@@ -133,6 +133,7 @@ enum {
EFA_QUERY_DEVICE_CAPS_RDMA_WRITE = 1 << 5,
EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV = 1 << 6,
EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM = 1 << 7,
+ EFA_QUERY_DEVICE_CAPS_COMP_CNTR = 1 << 8,
};
struct efa_ibv_ex_query_device_resp {
--
2.47.3
^ permalink raw reply related [flat|nested] 14+ messages in thread