* [PATCH rdma-next 5/8] RDMA/core: Introduce a DMAH object and its alloc/free APIs
2025-07-07 17:03 [PATCH rdma-next 0/8] RDMA support for DMA handle Leon Romanovsky
` (2 preceding siblings ...)
2025-07-07 17:03 ` [PATCH rdma-next 4/8] IB/core: Add UVERBS_METHOD_REG_MR on the MR object Leon Romanovsky
@ 2025-07-07 17:03 ` Leon Romanovsky
2025-07-07 17:03 ` [PATCH rdma-next 6/8] RDMA/mlx5: Add DMAH object support Leon Romanovsky
` (2 subsequent siblings)
6 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2025-07-07 17:03 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Yishai Hadas, Edward Srouji, linux-rdma
From: Yishai Hadas <yishaih@nvidia.com>
Introduce a new DMA handle (DMAH) object along with its corresponding
allocation and deallocation APIs.
This DMAH object encapsulates attributes intended for use in DMA
transactions.
While its initial purpose is to support TPH functionality, it is
designed to be extensible for future features such as DMA PCI multipath,
PCI UIO configurations, PCI traffic class selection, and more.
Further details:
----------------
We ensure that a caller requesting a DMA handle for a specific CPU ID is
permitted to be scheduled on it. This prevent a potential security issue
where a non privileged user may trigger DMA operations toward a CPU that
it's not allowed to run on.
We manage reference counting for the DMAH object and its consumers
(e.g., memory regions) as will be detailed in subsequent patches in the
series.
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Edward Srouji <edwards@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/core/Makefile | 1 +
drivers/infiniband/core/device.c | 3 +
drivers/infiniband/core/rdma_core.h | 1 +
drivers/infiniband/core/restrack.c | 2 +
.../infiniband/core/uverbs_std_types_dmah.c | 151 ++++++++++++++++++
drivers/infiniband/core/uverbs_uapi.c | 1 +
include/rdma/ib_verbs.h | 28 ++++
include/rdma/restrack.h | 4 +
include/uapi/rdma/ib_user_ioctl_cmds.h | 17 ++
9 files changed, 208 insertions(+)
create mode 100644 drivers/infiniband/core/uverbs_std_types_dmah.c
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d49ded7e95f0..f483e0c12444 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -33,6 +33,7 @@ ib_umad-y := user_mad.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_std_types_cq.o \
+ uverbs_std_types_dmah.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
uverbs_uapi.o uverbs_std_types_device.o \
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 1ca6a9b7ba1a..4a98afbf8430 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2708,6 +2708,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, add_sub_dev);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
+ SET_DEVICE_OP(dev_ops, alloc_dmah);
SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
@@ -2735,6 +2736,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, create_user_ah);
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
+ SET_DEVICE_OP(dev_ops, dealloc_dmah);
SET_DEVICE_OP(dev_ops, dealloc_driver);
SET_DEVICE_OP(dev_ops, dealloc_mw);
SET_DEVICE_OP(dev_ops, dealloc_pd);
@@ -2832,6 +2834,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_dmah);
SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
SET_OBJ_SIZE(dev_ops, ib_qp);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 33706dad6c0f..a59b087611cb 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -156,6 +156,7 @@ extern const struct uapi_definition uverbs_def_obj_counters[];
extern const struct uapi_definition uverbs_def_obj_cq[];
extern const struct uapi_definition uverbs_def_obj_device[];
extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_dmah[];
extern const struct uapi_definition uverbs_def_obj_flow_action[];
extern const struct uapi_definition uverbs_def_obj_intf[];
extern const struct uapi_definition uverbs_def_obj_mr[];
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3313410014cd..a7de6f403fca 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -100,6 +100,8 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
return container_of(res, struct rdma_counter, res)->device;
case RDMA_RESTRACK_SRQ:
return container_of(res, struct ib_srq, res)->device;
+ case RDMA_RESTRACK_DMAH:
+ return container_of(res, struct ib_dmah, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
diff --git a/drivers/infiniband/core/uverbs_std_types_dmah.c b/drivers/infiniband/core/uverbs_std_types_dmah.c
new file mode 100644
index 000000000000..760c43438b70
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dmah.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
+
+static int uverbs_free_dmah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dmah *dmah = uobject->object;
+ int ret;
+
+ if (atomic_read(&dmah->usecnt))
+ return -EBUSY;
+
+ ret = dmah->device->ops.dealloc_dmah(dmah, attrs);
+ if (ret)
+ return ret;
+
+ rdma_restrack_del(&dmah->res);
+ kfree(dmah);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE)
+ ->obj_attr.uobject;
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_dmah *dmah;
+ int ret;
+
+ if (!ib_dev->ops.alloc_dmah || !ib_dev->ops.dealloc_dmah)
+ return -EOPNOTSUPP;
+
+ dmah = rdma_zalloc_drv_obj(ib_dev, ib_dmah);
+ if (!dmah)
+ return -ENOMEM;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_CPU_ID)) {
+ ret = uverbs_copy_from(&dmah->cpu_id, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_CPU_ID);
+ if (ret)
+ goto err;
+
+ if (!cpumask_test_cpu(dmah->cpu_id, current->cpus_ptr)) {
+ ret = -EPERM;
+ goto err;
+ }
+
+ dmah->valid_fields |= BIT(IB_DMAH_CPU_ID_EXISTS);
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE)) {
+ dmah->mem_type = uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE);
+ if (dmah->mem_type < 0) {
+ ret = dmah->mem_type;
+ goto err;
+ }
+ dmah->valid_fields |= BIT(IB_DMAH_MEM_TYPE_EXISTS);
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_PH)) {
+ ret = uverbs_copy_from(&dmah->ph, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_PH);
+ if (ret)
+ goto err;
+
+ /* Per PCIe spec 6.2-1.0, only the lowest two bits are applicable */
+ if (dmah->ph & 0xFC) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ dmah->valid_fields |= BIT(IB_DMAH_PH_EXISTS);
+ }
+
+ dmah->device = ib_dev;
+ dmah->uobject = uobj;
+ atomic_set(&dmah->usecnt, 0);
+
+ rdma_restrack_new(&dmah->res, RDMA_RESTRACK_DMAH);
+ rdma_restrack_set_name(&dmah->res, NULL);
+
+ ret = ib_dev->ops.alloc_dmah(dmah, attrs);
+ if (ret) {
+ rdma_restrack_put(&dmah->res);
+ goto err;
+ }
+
+ uobj->object = dmah;
+ rdma_restrack_add(&dmah->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE);
+ return 0;
+err:
+ kfree(dmah);
+ return ret;
+}
+
+static const struct uverbs_attr_spec uverbs_dmah_mem_type[] = {
+ [TPH_MEM_TYPE_VM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [TPH_MEM_TYPE_PM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DMAH_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DMAH_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_CPU_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE,
+ uverbs_dmah_mem_type,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_PH,
+ UVERBS_ATTR_TYPE(u8),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DMAH_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DMA_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DMAH,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dmah),
+ &UVERBS_METHOD(UVERBS_METHOD_DMAH_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DMAH_FREE));
+
+const struct uapi_definition uverbs_def_obj_dmah[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMAH,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dmah)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index a02916a3a79c..e00ea63175bd 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -631,6 +631,7 @@ static const struct uapi_definition uverbs_core_api[] = {
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
UAPI_DEF_CHAIN(uverbs_def_obj_device),
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
UAPI_DEF_CHAIN(uverbs_def_obj_intf),
UAPI_DEF_CHAIN(uverbs_def_obj_mr),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 010594dc755b..d7ee762b87e9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -42,6 +42,7 @@
#include <rdma/signature.h>
#include <uapi/rdma/rdma_user_ioctl.h>
#include <uapi/rdma/ib_user_ioctl_verbs.h>
+#include <linux/pci-tph.h>
#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
@@ -1846,6 +1847,29 @@ struct ib_dm {
atomic_t usecnt;
};
+/* bit values to mark existence of ib_dmah fields */
+enum {
+ IB_DMAH_CPU_ID_EXISTS,
+ IB_DMAH_MEM_TYPE_EXISTS,
+ IB_DMAH_PH_EXISTS,
+};
+
+struct ib_dmah {
+ struct ib_device *device;
+ u32 cpu_id;
+ enum tph_mem_type mem_type;
+ u8 ph;
+ struct ib_uobject *uobject;
+
+ u8 valid_fields; /* use IB_DMAH_XXX_EXISTS */
+ atomic_t usecnt;
+
+ /*
+ * Implementation details of the RDMA core, don't use in drivers:
+ */
+ struct rdma_restrack_entry res;
+};
+
struct ib_mr {
struct ib_device *device;
struct ib_pd *pd;
@@ -2569,6 +2593,9 @@ struct ib_device_ops {
struct ib_dm_alloc_attr *attr,
struct uverbs_attr_bundle *attrs);
int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+ int (*alloc_dmah)(struct ib_dmah *ibdmah,
+ struct uverbs_attr_bundle *attrs);
+ int (*dealloc_dmah)(struct ib_dmah *dmah, struct uverbs_attr_bundle *attrs);
struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
@@ -2726,6 +2753,7 @@ struct ib_device_ops {
DECLARE_RDMA_OBJ_SIZE(ib_ah);
DECLARE_RDMA_OBJ_SIZE(ib_counters);
DECLARE_RDMA_OBJ_SIZE(ib_cq);
+ DECLARE_RDMA_OBJ_SIZE(ib_dmah);
DECLARE_RDMA_OBJ_SIZE(ib_mw);
DECLARE_RDMA_OBJ_SIZE(ib_pd);
DECLARE_RDMA_OBJ_SIZE(ib_qp);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 0d69ded73bf2..8a9bcf77dace 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -56,6 +56,10 @@ enum rdma_restrack_type {
* @RDMA_RESTRACK_SRQ: Shared receive queue (SRQ)
*/
RDMA_RESTRACK_SRQ,
+ /**
+ * @RDMA_RESTRACK_DMAH: DMA handle
+ */
+ RDMA_RESTRACK_DMAH,
/**
* @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
*/
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index e7b827e281d1..a0b1130423f0 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -55,6 +55,7 @@ enum uverbs_default_objects {
UVERBS_OBJECT_DM,
UVERBS_OBJECT_COUNTERS,
UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_OBJECT_DMAH,
};
enum {
@@ -236,6 +237,22 @@ enum uverbs_methods_dm {
UVERBS_METHOD_DM_FREE,
};
+enum uverbs_attrs_alloc_dmah_cmd_attr_ids {
+ UVERBS_ATTR_ALLOC_DMAH_HANDLE,
+ UVERBS_ATTR_ALLOC_DMAH_CPU_ID,
+ UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE,
+ UVERBS_ATTR_ALLOC_DMAH_PH,
+};
+
+enum uverbs_attrs_free_dmah_cmd_attr_ids {
+ UVERBS_ATTR_FREE_DMA_HANDLE,
+};
+
+enum uverbs_methods_dmah {
+ UVERBS_METHOD_DMAH_ALLOC,
+ UVERBS_METHOD_DMAH_FREE,
+};
+
enum uverbs_attrs_reg_dm_mr_cmd_attr_ids {
UVERBS_ATTR_REG_DM_MR_HANDLE,
UVERBS_ATTR_REG_DM_MR_OFFSET,
--
2.50.0
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH rdma-next 7/8] IB: Extend UVERBS_METHOD_REG_MR to get DMAH
2025-07-07 17:03 [PATCH rdma-next 0/8] RDMA support for DMA handle Leon Romanovsky
` (4 preceding siblings ...)
2025-07-07 17:03 ` [PATCH rdma-next 6/8] RDMA/mlx5: Add DMAH object support Leon Romanovsky
@ 2025-07-07 17:03 ` Leon Romanovsky
2025-07-08 2:27 ` Junxian Huang
2025-07-07 17:03 ` [PATCH rdma-next 8/8] RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr Leon Romanovsky
6 siblings, 1 reply; 17+ messages in thread
From: Leon Romanovsky @ 2025-07-07 17:03 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Yishai Hadas, Bernard Metzler, Bryan Tan, Chengchang Tang,
Cheng Xu, Christian Benvenuti, Dennis Dalessandro, Edward Srouji,
Junxian Huang, Kai Shen, Kalesh AP, Konstantin Taranov,
linux-rdma, Long Li, Michael Margolin, Michal Kalderon,
Mustafa Ismail, Nelson Escobar, Potnuri Bharat Teja,
Selvin Xavier, Tatyana Nikolova, Vishnu Dasa, Zhu Yanjun
From: Yishai Hadas <yishaih@nvidia.com>
Extend UVERBS_METHOD_REG_MR to get DMAH and pass it to all drivers.
It will be used in mlx5 driver as part of the next patch from the
series.
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Edward Srouji <edwards@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/core/uverbs_cmd.c | 2 +-
drivers/infiniband/core/uverbs_std_types_mr.c | 21 ++++++++++++++++---
drivers/infiniband/core/verbs.c | 5 ++++-
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 8 +++++++
drivers/infiniband/hw/bnxt_re/ib_verbs.h | 2 ++
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 +
drivers/infiniband/hw/cxgb4/mem.c | 6 +++++-
drivers/infiniband/hw/efa/efa.h | 2 ++
drivers/infiniband/hw/efa/efa_verbs.c | 8 +++++++
drivers/infiniband/hw/erdma/erdma_verbs.c | 6 +++++-
drivers/infiniband/hw/erdma/erdma_verbs.h | 3 ++-
drivers/infiniband/hw/hns/hns_roce_device.h | 1 +
drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++++
drivers/infiniband/hw/irdma/verbs.c | 9 ++++++++
drivers/infiniband/hw/mana/mana_ib.h | 2 ++
drivers/infiniband/hw/mana/mr.c | 8 +++++++
drivers/infiniband/hw/mlx4/mlx4_ib.h | 1 +
drivers/infiniband/hw/mlx4/mr.c | 4 ++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++
drivers/infiniband/hw/mlx5/mr.c | 8 ++++---
drivers/infiniband/hw/mthca/mthca_provider.c | 6 +++++-
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 6 +++++-
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h | 3 ++-
drivers/infiniband/hw/qedr/verbs.c | 6 +++++-
drivers/infiniband/hw/qedr/verbs.h | 3 ++-
drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 ++++
drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 1 +
drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 5 +++++
.../infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 1 +
drivers/infiniband/sw/rdmavt/mr.c | 5 +++++
drivers/infiniband/sw/rdmavt/mr.h | 1 +
drivers/infiniband/sw/rxe/rxe_verbs.c | 4 ++++
drivers/infiniband/sw/siw/siw_verbs.c | 7 ++++++-
drivers/infiniband/sw/siw/siw_verbs.h | 3 ++-
include/rdma/ib_verbs.h | 3 +++
include/uapi/rdma/ib_user_ioctl_cmds.h | 1 +
36 files changed, 144 insertions(+), 18 deletions(-)
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 88aa8d4599df..ce16404cdfb8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -741,7 +741,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
}
mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags,
+ cmd.access_flags, NULL,
&attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 1bd4b17b5515..711ae6379509 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -238,7 +238,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
return ret;
mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd,
- access_flags,
+ access_flags, NULL,
attrs);
if (IS_ERR(mr))
return PTR_ERR(mr);
@@ -276,6 +276,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
u32 valid_access_flags = IB_ACCESS_SUPPORTED;
u64 length, iova, fd_offset = 0, addr = 0;
struct ib_device *ib_dev = pd->device;
+ struct ib_dmah *dmah = NULL;
bool has_fd_offset = false;
bool has_addr = false;
bool has_fd = false;
@@ -340,6 +341,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
return -EINVAL;
}
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_DMA_HANDLE)) {
+ dmah = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_MR_DMA_HANDLE);
+ if (IS_ERR(dmah))
+ return PTR_ERR(dmah);
+ }
+
ret = uverbs_get_flags32(&access_flags, attrs,
UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
valid_access_flags);
@@ -352,10 +359,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
if (has_fd)
mr = pd->device->ops.reg_user_mr_dmabuf(pd, fd_offset, length, iova,
- fd, access_flags, attrs);
+ fd, access_flags, dmah, attrs);
else
mr = pd->device->ops.reg_user_mr(pd, addr, length,
- iova, access_flags, NULL);
+ iova, access_flags, dmah, NULL);
if (IS_ERR(mr))
return PTR_ERR(mr);
@@ -365,6 +372,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
mr->type = IB_MR_TYPE_USER;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ if (dmah) {
+ mr->dmah = dmah;
+ atomic_inc(&dmah->usecnt);
+ }
rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
rdma_restrack_set_name(&mr->res, NULL);
rdma_restrack_add(&mr->res);
@@ -488,6 +499,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_OBJECT_PD,
UVERBS_ACCESS_READ,
UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_DMA_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_IOVA,
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY),
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 75fde0fe9989..3a5f81402d2f 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2223,7 +2223,7 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
- access_flags, NULL);
+ access_flags, NULL, NULL);
if (IS_ERR(mr))
return mr;
@@ -2262,6 +2262,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
struct ib_dm *dm = mr->dm;
+ struct ib_dmah *dmah = mr->dmah;
struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
@@ -2272,6 +2273,8 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
atomic_dec(&pd->usecnt);
if (dm)
atomic_dec(&dm->usecnt);
+ if (dmah)
+ atomic_dec(&dmah->usecnt);
kfree(sig_attrs);
}
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 3a627acb82ce..37c2bc3bdba5 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -4235,6 +4235,7 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
@@ -4242,6 +4243,9 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct ib_umem *umem;
struct ib_mr *ib_mr;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
@@ -4255,6 +4259,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
u64 length, u64 virt_addr, int fd,
int mr_access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
@@ -4263,6 +4268,9 @@ struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
struct ib_umem *umem;
struct ib_mr *ib_mr;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
umem_dmabuf = ib_umem_dmabuf_get_pinned(&rdev->ibdev, start, length,
fd, mr_access_flags);
if (IS_ERR(umem_dmabuf))
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 22c9eb8e9cfc..fe00ab691a51 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -258,10 +258,12 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
int bnxt_re_dealloc_mw(struct ib_mw *mw);
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
u64 length, u64 virt_addr,
int fd, int mr_access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs);
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 5b3007acaa1f..e17c1252536b 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -1006,6 +1006,7 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
void c4iw_dealloc(struct uld_ctx *ctx);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index a2c71a1d93d5..dcdfe250bdbe 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -489,7 +489,8 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
}
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
__be64 *pages;
int shift, n, i;
@@ -501,6 +502,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
pr_debug("ib_pd %p\n", pd);
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (length == ~0ULL)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 838182d0409c..15cfca04fab8 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -164,10 +164,12 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct uverbs_attr_bundle *attrs);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs);
int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index a8645a40730f..9cc5b224995f 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1727,6 +1727,7 @@ static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs)
{
struct efa_dev *dev = to_edev(ibpd->device);
@@ -1734,6 +1735,9 @@ struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
struct efa_mr *mr;
int err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = efa_alloc_mr(ibpd, access_flags, &attrs->driver_udata);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
@@ -1766,12 +1770,16 @@ struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_mr *mr;
int err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = efa_alloc_mr(ibpd, access_flags, udata);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index ec0ad4086066..94c211df09d8 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1200,13 +1200,17 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
}
struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
- u64 virt, int access, struct ib_udata *udata)
+ u64 virt, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
struct erdma_mr *mr = NULL;
struct erdma_dev *dev = to_edev(ibpd->device);
u32 stag;
int ret;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!len || len > dev->attrs.max_mr_size)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index f9408ccc8bad..ef411b81fbd7 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -452,7 +452,8 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext);
int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
- u64 virt, int access, struct ib_udata *udata);
+ u64 virt, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata);
struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int rights);
int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *data);
int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 25f77b1fa773..78ee04a48a74 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -1219,6 +1219,7 @@ int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
u64 length, u64 virt_addr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index ebef93559225..03af842dd9d3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -231,12 +231,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_mr *mr;
int ret;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 1e8c92826de2..da5a41b275d8 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -3013,10 +3013,12 @@ static int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req,
* @len: length of mr
* @virt: virtual address
* @access: access of mr
+ * @dmah: dma handle
* @udata: user data
*/
static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
u64 virt, int access,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages)
@@ -3026,6 +3028,9 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct irdma_mr *iwmr = NULL;
int err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
return ERR_PTR(-EINVAL);
@@ -3085,6 +3090,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
u64 len, u64 virt,
int fd, int access,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs)
{
struct irdma_device *iwdev = to_iwdev(pd->device);
@@ -3092,6 +3098,9 @@ static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
struct irdma_mr *iwmr;
int err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index eddd0a83b97e..03a1cb039343 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -624,6 +624,7 @@ struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 iova, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
@@ -713,5 +714,6 @@ int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
u64 iova, int fd, int mr_access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 6d974d0a8400..55701046ffba 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -106,6 +106,7 @@ static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 iova, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
@@ -116,6 +117,9 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 dma_region_handle;
int err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
ibdev_dbg(ibdev,
@@ -188,6 +192,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
u64 iova, int fd, int access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs)
{
struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
@@ -199,6 +204,9 @@ struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 leng
u64 dma_region_handle;
int err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
access_flags &= ~IB_ACCESS_OPTIONAL;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f53b1846594c..5df5b955114e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -759,6 +759,7 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index e77645a673fb..94464f1694d9 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -139,6 +139,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
@@ -147,6 +148,9 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a012e24d3afe..71916d730be0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1380,10 +1380,12 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs);
int mlx5_ib_advise_mr(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 41e897a7e652..8deff7cdf048 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1564,13 +1564,14 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 iova, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem *umem;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || dmah)
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
@@ -1724,6 +1725,7 @@ reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
u64 length, u64 virt_addr,
int fd, int access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -1731,7 +1733,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
- !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || dmah)
return ERR_PTR(-EOPNOTSUPP);
if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
@@ -1937,7 +1939,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
*/
recreate:
return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
- new_access_flags, udata);
+ new_access_flags, NULL, udata);
}
static int
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 6a1e2e79ddc3..dd572d76866c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -825,7 +825,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
}
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_block_iter biter;
@@ -838,6 +839,9 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err = 0;
int write_mtt_size;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (udata->inlen < sizeof ucmd) {
if (!context->reg_mr_warned) {
mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 979de8f8df14..46d911fd38de 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -847,13 +847,17 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
}
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
- u64 usr_addr, int acc, struct ib_udata *udata)
+ u64 usr_addr, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
int status = -ENOMEM;
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_mr *mr;
struct ocrdma_pd *pd;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
pd = get_ocrdma_pd(ibpd);
if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 0644346d8d98..6c5c3755b8a9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -98,7 +98,8 @@ int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *);
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 568a5b18803f..ab9bf0922979 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2953,13 +2953,17 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
}
struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
- u64 usr_addr, int acc, struct ib_udata *udata)
+ u64 usr_addr, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_mr *mr;
struct qedr_pd *pd;
int rc = -ENOMEM;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
pd = get_qedr_pd(ibpd);
DP_DEBUG(dev, QEDR_MSG_MR,
"qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 5731458abb06..62420a15101b 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -79,7 +79,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *);
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *);
int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 217af34e82b3..ae5df96589d9 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -592,6 +592,7 @@ int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct usnic_ib_mr *mr;
@@ -600,6 +601,9 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
virt_addr, length);
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 53f53f2d53be..e3031ac32488 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -60,6 +60,7 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
int usnic_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index e80848bfb3bd..ec7a00c8285b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -104,12 +104,14 @@ struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc)
* @length: length of region
* @virt_addr: I/O virtual address
* @access_flags: access flags for memory region
+ * @dmah: dma handle
* @udata: user data
*
* @return: ib_mr pointer on success, otherwise returns an errno.
*/
struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
@@ -121,6 +123,9 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
int ret, npages;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (length == 0 || length > dev->dsr->caps.max_mr_size) {
dev_warn(&dev->pdev->dev, "invalid mem region length\n");
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index fd47b0b1df5c..603e5a9311eb 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -366,6 +366,7 @@ int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
int pvrdma_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 5ed5cfc2b280..86e482593a85 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -329,12 +329,14 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
* @length: length of region to register
* @virt_addr: associated virtual address
* @mr_access_flags: access flags for this memory region
+ * @dmah: dma handle
* @udata: unused by the driver
*
* Return: the memory region on success, otherwise returns an errno.
*/
struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct rvt_mr *mr;
@@ -343,6 +345,9 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int n, m;
struct ib_mr *ret;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (length == 0)
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h
index 44afe2731741..72dab48307b7 100644
--- a/drivers/infiniband/sw/rdmavt/mr.h
+++ b/drivers/infiniband/sw/rdmavt/mr.h
@@ -26,6 +26,7 @@ void rvt_mr_exit(struct rvt_dev_info *rdi);
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 2331e698a65b..f48d6e132954 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1271,6 +1271,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
u64 length, u64 iova, int access,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct rxe_dev *rxe = to_rdev(ibpd->device);
@@ -1278,6 +1279,9 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
struct rxe_mr *mr;
int err, cleanup_err;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (access & ~RXE_ACCESS_SUPPORTED_MR) {
rxe_err_pd(pd, "access = %#x not supported (%#x)\n", access,
RXE_ACCESS_SUPPORTED_MR);
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 2b2a7b8e93b0..b784e18db463 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1321,10 +1321,12 @@ int siw_dereg_mr(struct ib_mr *base_mr, struct ib_udata *udata)
* @len: len of MR
* @rnic_va: not used by siw
* @rights: MR access rights
+ * @dmah: dma handle
* @udata: user buffer to communicate STag and Key.
*/
struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
- u64 rnic_va, int rights, struct ib_udata *udata)
+ u64 rnic_va, int rights, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
struct siw_mr *mr = NULL;
struct siw_umem *umem = NULL;
@@ -1332,6 +1334,9 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_device *sdev = to_siw_dev(pd->device);
int rv;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
siw_dbg_pd(pd, "start: 0x%p, va: 0x%p, len: %llu\n",
(void *)(uintptr_t)start, (void *)(uintptr_t)rnic_va,
(unsigned long long)len);
diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h
index 1f1a305540af..e9f4463aecdc 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.h
+++ b/drivers/infiniband/sw/siw/siw_verbs.h
@@ -65,7 +65,8 @@ int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata);
int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc);
int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags);
struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len,
- u64 rnic_va, int rights, struct ib_udata *udata);
+ u64 rnic_va, int rights, struct ib_dmah *dmah,
+ struct ib_udata *udata);
struct ib_mr *siw_alloc_mr(struct ib_pd *base_pd, enum ib_mr_type mr_type,
u32 max_sge);
struct ib_mr *siw_get_dma_mr(struct ib_pd *base_pd, int rights);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index d7ee762b87e9..c263327a0205 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1887,6 +1887,7 @@ struct ib_mr {
struct ib_dm *dm;
struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
+ struct ib_dmah *dmah;
/*
* Implementation details of the RDMA core, don't use in drivers:
*/
@@ -2525,10 +2526,12 @@ struct ib_device_ops {
struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *(*reg_user_mr_dmabuf)(struct ib_pd *pd, u64 offset,
u64 length, u64 virt_addr, int fd,
int mr_access_flags,
+ struct ib_dmah *dmah,
struct uverbs_attr_bundle *attrs);
struct ib_mr *(*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start,
u64 length, u64 virt_addr,
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index a0b1130423f0..17f963014eca 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -307,6 +307,7 @@ enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids {
enum uverbs_attrs_reg_mr_cmd_attr_ids {
UVERBS_ATTR_REG_MR_HANDLE,
UVERBS_ATTR_REG_MR_PD_HANDLE,
+ UVERBS_ATTR_REG_MR_DMA_HANDLE,
UVERBS_ATTR_REG_MR_IOVA,
UVERBS_ATTR_REG_MR_ADDR,
UVERBS_ATTR_REG_MR_LENGTH,
--
2.50.0
^ permalink raw reply related [flat|nested] 17+ messages in thread
* [PATCH rdma-next 8/8] RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr
2025-07-07 17:03 [PATCH rdma-next 0/8] RDMA support for DMA handle Leon Romanovsky
` (5 preceding siblings ...)
2025-07-07 17:03 ` [PATCH rdma-next 7/8] IB: Extend UVERBS_METHOD_REG_MR to get DMAH Leon Romanovsky
@ 2025-07-07 17:03 ` Leon Romanovsky
6 siblings, 0 replies; 17+ messages in thread
From: Leon Romanovsky @ 2025-07-07 17:03 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Yishai Hadas, Edward Srouji, linux-rdma
From: Yishai Hadas <yishaih@nvidia.com>
As part of this enhancement, allow the creation of an MKEY associated
with a DMA handle.
Additional notes:
MKEYs with TPH (i.e. TLP Processing Hints) attributes are currently not
UMR-capable unless explicitly enabled by firmware or hardware.
Therefore, to maintain such MKEYs in the MR cache, the TPH fields have
been added to the rb_key structure, with a dedicated hash bucket.
The ability to bypass the kernel verbs flow and create an MKEY with TPH
attributes using DEVX has been restricted. TPH must follow the standard
InfiniBand flow, where a DMAH is created with the appropriate security
checks and management mechanisms in place.
DMA handles are currently not supported in conjunction with On-Demand
Paging (ODP).
Re-registration of memory regions originally created with TPH attributes
is currently not supported.
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Edward Srouji <edwards@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/hw/mlx5/devx.c | 4 ++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 ++
drivers/infiniband/hw/mlx5/mr.c | 101 ++++++++++++++++++++++-----
drivers/infiniband/hw/mlx5/odp.c | 1 +
4 files changed, 92 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index c03fe5414b9a..c6ced06111dd 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1393,6 +1393,10 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
}
MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ /* TPH is not allowed to bypass the regular kernel's verbs flow */
+ MLX5_SET(mkc, mkc, pcie_tph_en, 0);
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
+ MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX);
return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 71916d730be0..3452695944e8 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -650,8 +650,13 @@ enum mlx5_mkey_type {
MLX5_MKEY_IMPLICIT_CHILD,
};
+/* Used for non-existent ph value */
+#define MLX5_IB_NO_PH 0xff
+
struct mlx5r_cache_rb_key {
u8 ats:1;
+ u8 ph;
+ u16 st_index;
unsigned int access_mode;
unsigned int access_flags;
unsigned int ndescs;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8deff7cdf048..5805d8231710 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -44,6 +44,7 @@
#include "mlx5_ib.h"
#include "umr.h"
#include "data_direct.h"
+#include "dmah.h"
enum {
MAX_PENDING_REG_MR = 8,
@@ -57,7 +58,7 @@ create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
unsigned long page_size, bool populate,
- int access_mode);
+ int access_mode, u16 st_index, u8 ph);
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
@@ -256,6 +257,14 @@ static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
get_mkc_octo_size(ent->rb_key.access_mode,
ent->rb_key.ndescs));
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+
+ if (ent->rb_key.ph != MLX5_IB_NO_PH) {
+ MLX5_SET(mkc, mkc, pcie_tph_en, 1);
+ MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph);
+ if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
+ ent->rb_key.st_index);
+ }
}
/* Asynchronously schedule new MRs to be populated in the cache. */
@@ -641,6 +650,14 @@ static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
if (res)
return res;
+ res = key1.st_index - key2.st_index;
+ if (res)
+ return res;
+
+ res = key1.ph - key2.ph;
+ if (res)
+ return res;
+
/*
* keep ndescs the last in the compare table since the find function
* searches for an exact match on all properties and only closest
@@ -712,6 +729,8 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
smallest->rb_key.access_mode == rb_key.access_mode &&
smallest->rb_key.access_flags == rb_key.access_flags &&
smallest->rb_key.ats == rb_key.ats &&
+ smallest->rb_key.st_index == rb_key.st_index &&
+ smallest->rb_key.ph == rb_key.ph &&
smallest->rb_key.ndescs <= ndescs_limit) ?
smallest :
NULL;
@@ -786,7 +805,8 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
struct mlx5r_cache_rb_key rb_key = {
.ndescs = ndescs,
.access_mode = access_mode,
- .access_flags = get_unchangeable_access_flags(dev, access_flags)
+ .access_flags = get_unchangeable_access_flags(dev, access_flags),
+ .ph = MLX5_IB_NO_PH,
};
struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
@@ -979,6 +999,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
struct rb_root *root = &dev->cache.rb_root;
struct mlx5r_cache_rb_key rb_key = {
.access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+ .ph = MLX5_IB_NO_PH,
};
struct mlx5_cache_ent *ent;
struct rb_node *node;
@@ -1155,7 +1176,8 @@ static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
- int access_flags, int access_mode)
+ int access_flags, int access_mode,
+ u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5r_cache_rb_key rb_key = {};
@@ -1174,6 +1196,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
+ rb_key.st_index = st_index;
+ rb_key.ph = ph;
ent = mkey_cache_ent_from_rb_key(dev, rb_key);
/*
* If the MR can't come from the cache then synchronously create an uncached
@@ -1181,7 +1205,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
*/
if (!ent) {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode,
+ st_index, ph);
mutex_unlock(&dev->slow_path_mutex);
if (IS_ERR(mr))
return mr;
@@ -1266,7 +1291,7 @@ reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_f
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
unsigned long page_size, bool populate,
- int access_mode)
+ int access_mode, u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1340,6 +1365,13 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
get_octo_len(iova, umem->length, mr->page_shift));
}
+ if (ph != MLX5_IB_NO_PH) {
+ MLX5_SET(mkc, mkc, pcie_tph_en, 1);
+ MLX5_SET(mkc, mkc, pcie_tph_ph, ph);
+ if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index);
+ }
+
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
@@ -1459,24 +1491,37 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
}
static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
- u64 iova, int access_flags)
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool xlt_with_umr;
+ u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
+ u8 ph = MLX5_IB_NO_PH;
int err;
+ if (dmah) {
+ struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
+
+ ph = dmah->ph;
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ st_index = mdmah->st_index;
+ }
+
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
- MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_MKC_ACCESS_MODE_MTT,
+ st_index, ph);
} else {
unsigned long page_size =
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size,
- true, MLX5_MKC_ACCESS_MODE_MTT);
+ true, MLX5_MKC_ACCESS_MODE_MTT,
+ st_index, ph);
mutex_unlock(&dev->slow_path_mutex);
}
if (IS_ERR(mr)) {
@@ -1540,7 +1585,9 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_CAST(odp);
mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
- MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_MKC_ACCESS_MODE_MTT,
+ MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX,
+ MLX5_IB_NO_PH);
if (IS_ERR(mr)) {
ib_umem_release(&odp->umem);
return ERR_CAST(mr);
@@ -1571,7 +1618,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_umem *umem;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || dmah)
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ ((access_flags & IB_ACCESS_ON_DEMAND) && dmah))
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
@@ -1587,7 +1635,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
if (IS_ERR(umem))
return ERR_CAST(umem);
- return create_real_mr(pd, umem, iova, access_flags);
+ return create_real_mr(pd, umem, iova, access_flags, dmah);
}
static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
@@ -1612,12 +1660,15 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
static struct ib_mr *
reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
u64 offset, u64 length, u64 virt_addr,
- int fd, int access_flags, int access_mode)
+ int fd, int access_flags, int access_mode,
+ struct ib_dmah *dmah)
{
bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
struct ib_umem_dmabuf *umem_dmabuf;
+ u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
+ u8 ph = MLX5_IB_NO_PH;
int err;
err = mlx5r_umr_resource_init(dev);
@@ -1640,8 +1691,17 @@ reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
return ERR_CAST(umem_dmabuf);
}
+ if (dmah) {
+ struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
+
+ ph = dmah->ph;
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ st_index = mdmah->st_index;
+ }
+
mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
- access_flags, access_mode);
+ access_flags, access_mode,
+ st_index, ph);
if (IS_ERR(mr)) {
ib_umem_release(&umem_dmabuf->umem);
return ERR_CAST(mr);
@@ -1698,7 +1758,8 @@ reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
offset, length, virt_addr, fd,
- access_flags, MLX5_MKC_ACCESS_MODE_KSM);
+ access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+ NULL);
if (IS_ERR(crossed_mr)) {
ret = PTR_ERR(crossed_mr);
goto end;
@@ -1733,7 +1794,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
- !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || dmah)
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
return ERR_PTR(-EOPNOTSUPP);
if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
@@ -1758,7 +1819,8 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
return reg_user_mr_dmabuf(pd, pd->device->dma_device,
offset, length, virt_addr,
- fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT);
+ fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT,
+ dmah);
}
/*
@@ -1855,7 +1917,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct)
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct ||
+ mr->mmkey.rb_key.ph != MLX5_IB_NO_PH)
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(
@@ -1899,7 +1962,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
return create_real_mr(new_pd, umem, mr->ibmr.iova,
- new_access_flags);
+ new_access_flags, NULL);
}
/*
@@ -1930,7 +1993,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
return NULL;
}
- return create_real_mr(new_pd, new_umem, iova, new_access_flags);
+ return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL);
}
/*
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index f6abd64f07f7..2cfe66a9839c 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1866,6 +1866,7 @@ int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
struct mlx5r_cache_rb_key rb_key = {
.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
.ndescs = mlx5_imr_ksm_entries,
+ .ph = MLX5_IB_NO_PH,
};
struct mlx5_cache_ent *ent;
--
2.50.0
^ permalink raw reply related [flat|nested] 17+ messages in thread