* [RFC 1/7] IB/mlx5: Helper for posting work-requests on the UMR QP
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
2016-08-01 6:57 ` [RFC 2/7] IB/mlx5: Support registration and invalidate operations " Haggai Eran
` (5 subsequent siblings)
6 siblings, 0 replies; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
Add mlx5_ib_post_umr_sync(), a helper function that posts work-requests
on the UMR QP, waits for their completion, and returns the result.
Modify several functions to use the new helper.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/infiniband/hw/mlx5/mr.c | 98 ++++++++++++++---------------------------
1 file changed, 32 insertions(+), 66 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8cf2ce50511f..fb2bb25c6cf0 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -852,16 +852,41 @@ static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
init_completion(&context->done);
}
+static int mlx5_ib_post_umr_sync(struct mlx5_ib_dev *dev, struct ib_send_wr *wr)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
+ struct ib_send_wr *bad;
+ int err;
+
+ mlx5_ib_init_umr_context(&umr_context);
+ wr->wr_cqe = &umr_context.cqe;
+
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, wr, &bad);
+ if (err) {
+ mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+ goto out;
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "umr failed\n");
+ err = -EFAULT;
+ goto out;
+ }
+ }
+out:
+ up(&umrc->sem);
+ return err;
+}
+
static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, u64 len, int npages,
int page_shift, int order, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
int size;
@@ -890,24 +915,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (err)
goto free_mr;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
page_shift, virt_addr, len, access_flags);
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+ err = mlx5_ib_post_umr_sync(dev, &umrwr.wr);
+ if (err)
goto unmap_dma;
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- }
- }
mr->mmkey.iova = virt_addr;
mr->mmkey.size = len;
@@ -916,7 +929,6 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
mr->live = 1;
unmap_dma:
- up(&umrc->sem);
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
kfree(mr_pas);
@@ -1193,36 +1205,10 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
- int err;
-
- mlx5_ib_init_umr_context(&umr_context);
- umrwr.wr.wr_cqe = &umr_context.cqe;
prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- up(&umrc->sem);
- mlx5_ib_dbg(dev, "err %d\n", err);
- goto error;
- } else {
- wait_for_completion(&umr_context.done);
- up(&umrc->sem);
- }
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "unreg umr failed\n");
- err = -EFAULT;
- goto error;
- }
- return 0;
-
-error:
- return err;
+ return mlx5_ib_post_umr_sync(dev, &umrwr.wr);
}
static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
@@ -1231,19 +1217,13 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct device *ddev = dev->ib_dev.dma_device;
- struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr *bad;
struct mlx5_umr_wr umrwr = {};
struct ib_sge sg;
- struct umr_common *umrc = &dev->umrc;
dma_addr_t dma = 0;
__be64 *mr_pas = NULL;
int size;
int err;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
if (flags & IB_MR_REREG_TRANS) {
@@ -1271,21 +1251,7 @@ static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
}
/* post send request to UMR QP */
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
-
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
-
- up(&umrc->sem);
+ err = mlx5_ib_post_umr_sync(dev, &umrwr.wr);
if (flags & IB_MR_REREG_TRANS) {
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
kfree(mr_pas);
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 2/7] IB/mlx5: Support registration and invalidate operations on the UMR QP
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
2016-08-01 6:57 ` [RFC 1/7] IB/mlx5: Helper for posting work-requests on the UMR QP Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
2016-08-01 6:57 ` [RFC 3/7] IB/core: Helpers for mapping DMA-BUF in MRs Haggai Eran
` (4 subsequent siblings)
6 siblings, 0 replies; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
Currently the UMR QP only supports UMR work requests. There is nothing
preventing it from also supporting fast registration and local
invalidate operations. This would allow internal driver operations to
use the fast registration API internally, without having to create an RC
QP.
Use the ib_create_qp instead of mlx5_ib_create_qp in order to initialize
the PD field of the new UMR QP.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/infiniband/hw/mlx5/main.c | 6 +---
drivers/infiniband/hw/mlx5/qp.c | 71 +++++++++++++++++++++++++++------------
2 files changed, 50 insertions(+), 27 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b48ad85315dc..f41254f3689a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2023,16 +2023,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
init_attr->cap.max_send_sge = 1;
init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
+ qp = ib_create_qp(pd, init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto error_3;
}
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ce0a7ab35a22..861ec1f6a20b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3237,8 +3237,8 @@ static int set_psv_wr(struct ib_sig_domain *domain,
return 0;
}
-static int set_reg_wr(struct mlx5_ib_qp *qp,
- struct ib_reg_wr *wr,
+static int set_reg_wr(struct mlx5_ib_qp *qp, struct ib_reg_wr *wr,
+ unsigned int idx, struct mlx5_wqe_ctrl_seg *ctrl,
void **seg, int *size)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
@@ -3266,10 +3266,15 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
*seg += sizeof(struct mlx5_wqe_data_seg);
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+ qp->sq.wr_data[idx] = IB_WR_REG_MR;
+ ctrl->imm = cpu_to_be32(wr->key);
+
return 0;
}
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+static void set_linv_wr(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
+ unsigned int idx, struct mlx5_wqe_ctrl_seg *ctrl,
+ void **seg, int *size)
{
set_linv_umr_seg(*seg);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
@@ -3281,6 +3286,9 @@ static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
+
+ qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
+ ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
}
static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
@@ -3476,17 +3484,14 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_LOCAL_INV:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
- ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size);
+ set_linv_wr(qp, wr, idx, ctrl, &seg, &size);
num_sge = 0;
break;
case IB_WR_REG_MR:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- qp->sq.wr_data[idx] = IB_WR_REG_MR;
- ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
+ err = set_reg_wr(qp, reg_wr(wr), idx, ctrl,
+ &seg, &size);
if (err) {
*bad_wr = wr;
goto out;
@@ -3613,23 +3618,45 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
break;
case MLX5_IB_QPT_REG_UMR:
- if (wr->opcode != MLX5_IB_WR_UMR) {
+ switch (wr->opcode) {
+ case MLX5_IB_WR_UMR:
+ qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+ ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
+ set_reg_umr_segment(seg, wr);
+ seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ size += sizeof(struct mlx5_wqe_umr_ctrl_seg)
+ / 16;
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+ set_reg_mkey_segment(seg, wr);
+ seg += sizeof(struct mlx5_mkey_seg);
+ size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((seg == qend)))
+ seg = mlx5_get_send_wqe(qp, 0);
+ break;
+
+ case IB_WR_LOCAL_INV:
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ set_linv_wr(qp, wr, idx, ctrl, &seg, &size);
+ num_sge = 0;
+ break;
+
+ case IB_WR_REG_MR:
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ err = set_reg_wr(qp, reg_wr(wr), idx, ctrl,
+ &seg, &size);
+ if (err) {
+ *bad_wr = wr;
+ goto out;
+ }
+ num_sge = 0;
+ break;
+
+ default:
err = -EINVAL;
mlx5_ib_warn(dev, "bad opcode\n");
goto out;
}
- qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- set_reg_umr_segment(seg, wr);
- seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
- set_reg_mkey_segment(seg, wr);
- seg += sizeof(struct mlx5_mkey_seg);
- size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
break;
default:
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 3/7] IB/core: Helpers for mapping DMA-BUF in MRs
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
2016-08-01 6:57 ` [RFC 1/7] IB/mlx5: Helper for posting work-requests on the UMR QP Haggai Eran
2016-08-01 6:57 ` [RFC 2/7] IB/mlx5: Support registration and invalidate operations " Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
2016-08-01 6:57 ` [RFC 4/7] IB/uverbs: Add command to register a DMA-BUF fd Haggai Eran
` (3 subsequent siblings)
6 siblings, 0 replies; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
DMA-BUF API allows devices to export buffers for other devices to
access. This can provide a means to implement peer to peer communication
for RDMA. A device with internal memory would expose its memory to the
PCIe BAR, and export that region as a DMA-BUF object. Then, the user or
ULP would register a memory region that is backed by the DMA-BUF object.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/infiniband/core/verbs.c | 60 +++++++++++++++++++++++++++++++++++++++++
include/rdma/ib_verbs.h | 9 +++++++
2 files changed, 69 insertions(+)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6298f54b4137..633831e77a62 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -44,6 +44,7 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <net/addrconf.h>
+#include <linux/dma-buf.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -1922,3 +1923,62 @@ void ib_drain_qp(struct ib_qp *qp)
ib_drain_rq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);
+
+int ib_mr_attach_dmabuf(struct ib_mr *mr, struct dma_buf *dmabuf, int access)
+{
+ struct device *dev = mr->device->dma_device;
+ struct dma_buf_attachment *attach;
+ struct sg_table *sg;
+ int err;
+ int n;
+
+ get_dma_buf(dmabuf);
+
+ attach = dma_buf_attach(dmabuf, dev);
+ if (IS_ERR(attach)) {
+ dev_err(dev, "unable to attach to DMA-BUF object (from %s): %ld",
+ dmabuf->exp_name, PTR_ERR(attach));
+ err = PTR_ERR(attach);
+ goto put_dma_buf;
+ }
+
+ sg = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+ if (IS_ERR(sg)) {
+ dev_err(dev, "unable to map a DMA-BUF object (from %s): %ld",
+ dmabuf->exp_name, PTR_ERR(sg));
+ err = PTR_ERR(sg);
+ goto detach;
+ }
+
+ n = ib_map_mr_sg_zbva(mr, sg->sgl, sg->nents, NULL, PAGE_SIZE);
+ if (unlikely(n != sg->nents)) {
+ dev_err(dev, "failed to map sg (%d/%d) of DMA-BUF object (from %s)\n",
+ n, sg->nents, dmabuf->exp_name);
+ err = n < 0 ? n : -EINVAL;
+ goto unmap;
+ }
+
+ mr->attach = attach;
+ mr->sg = sg;
+ return 0;
+
+unmap:
+ dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
+detach:
+ dma_buf_detach(dmabuf, attach);
+put_dma_buf:
+ dma_buf_put(dmabuf);
+ return err;
+}
+EXPORT_SYMBOL(ib_mr_attach_dmabuf);
+
+void ib_mr_detach_dmabuf(struct dma_buf_attachment *attach,
+ struct sg_table *sg)
+{
+ struct dma_buf *dmabuf = attach->dmabuf;
+
+ dma_buf_unmap_attachment(attach, sg, DMA_BIDIRECTIONAL);
+ dma_buf_detach(dmabuf, attach);
+ dma_buf_put(dmabuf);
+}
+EXPORT_SYMBOL(ib_mr_detach_dmabuf);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7e440d41487a..da23ec0a06d7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1465,6 +1465,9 @@ struct ib_mr {
struct ib_uobject *uobject; /* user */
struct list_head qp_entry; /* FR */
};
+ /* For MRs that expose a DMA-BUF */
+ struct dma_buf_attachment *attach;
+ struct sg_table *sg;
};
struct ib_mw {
@@ -3189,4 +3192,10 @@ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
+
+struct dma_buf;
+int ib_mr_attach_dmabuf(struct ib_mr *mr, struct dma_buf *dmabuf, int access);
+void ib_mr_detach_dmabuf(struct dma_buf_attachment *attach,
+ struct sg_table *sg);
+
#endif /* IB_VERBS_H */
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 4/7] IB/uverbs: Add command to register a DMA-BUF fd
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
` (2 preceding siblings ...)
2016-08-01 6:57 ` [RFC 3/7] IB/core: Helpers for mapping DMA-BUF in MRs Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
2016-08-01 6:57 ` [RFC 5/7] IB/mlx5: Implement reg_user_dma_buf_mr Haggai Eran
` (2 subsequent siblings)
6 siblings, 0 replies; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
The new command accept the file descriptor of a DMA-BUF object as input,
as well as the PD and access flags, and produces a memory region
accessing the device memory exposed by the DMA-BUF object.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/infiniband/core/uverbs.h | 1 +
drivers/infiniband/core/uverbs_cmd.c | 111 ++++++++++++++++++++++++++++++++++
drivers/infiniband/core/uverbs_main.c | 1 +
include/rdma/ib_verbs.h | 6 ++
include/uapi/rdma/ib_user_verbs.h | 18 ++++++
5 files changed, 137 insertions(+)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 612ccfd39bf9..491292740b67 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -275,5 +275,6 @@ IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
IB_UVERBS_DECLARE_EX_CMD(query_device);
IB_UVERBS_DECLARE_EX_CMD(create_cq);
IB_UVERBS_DECLARE_EX_CMD(create_qp);
+IB_UVERBS_DECLARE_EX_CMD(reg_dma_buf_mr);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 825021d1008b..ca1f40769019 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -37,6 +37,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/dma-buf.h>
#include <asm/uaccess.h>
@@ -1125,6 +1126,116 @@ put_uobjs:
return ret;
}
+int ib_uverbs_ex_reg_dma_buf_mr(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore, struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_reg_dma_buf_mr cmd;
+ struct ib_uverbs_ex_reg_dma_buf_mr_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct dma_buf *dmabuf;
+ int ret;
+
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if (ucore->outlen < (offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length)))
+ return -ENOSPC;
+
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ return ret;
+
+ dmabuf = dma_buf_get(cmd.fd);
+ if (IS_ERR(dmabuf))
+ return PTR_ERR(dmabuf);
+
+ uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
+ if (!uobj) {
+ ret = -ENOMEM;
+ goto err_dma_buf;
+ }
+
+ init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
+ down_write(&uobj->mutex);
+
+ pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+ if (!pd) {
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ if (!pd->device->reg_user_dma_buf_mr) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ mr = pd->device->reg_user_dma_buf_mr(pd, dmabuf, cmd.access_flags, uhw);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto err_put;
+ }
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = uobj;
+
+ uobj->object = mr;
+ ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
+ if (ret)
+ goto err_unreg;
+
+ memset(&resp, 0, sizeof(resp));
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+ resp.mr_handle = uobj->id;
+ resp.response_length = sizeof(resp);
+
+ if (ib_copy_to_udata(ucore, &resp, resp.response_length)) {
+ ret = -EFAULT;
+ goto err_copy;
+ }
+
+ dma_buf_put(dmabuf);
+ put_pd_read(pd);
+
+ mutex_lock(&file->mutex);
+ list_add_tail(&uobj->list, &file->ucontext->mr_list);
+ mutex_unlock(&file->mutex);
+
+ uobj->live = 1;
+
+ up_write(&uobj->mutex);
+
+ return 0;
+
+err_copy:
+ idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
+
+err_unreg:
+ ib_dereg_mr(mr);
+
+err_put:
+ put_pd_read(pd);
+
+err_free:
+ put_uobj_write(uobj);
+
+err_dma_buf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 31f422a70623..c5368f2b1cf0 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -130,6 +130,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
[IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
[IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
+ [IB_USER_VERBS_EX_CMD_REG_DMA_BUF_MR] = ib_uverbs_ex_reg_dma_buf_mr,
};
static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index da23ec0a06d7..9c4241aeec79 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1677,6 +1677,7 @@ struct ib_dma_mapping_ops {
};
struct iw_cm_verbs;
+struct dma_buf;
struct ib_port_immutable {
int pkey_tbl_len;
@@ -1866,6 +1867,11 @@ struct ib_device {
int mr_access_flags,
struct ib_pd *pd,
struct ib_udata *udata);
+ struct ib_mr * (*reg_user_dma_buf_mr)(
+ struct ib_pd *pd,
+ struct dma_buf *dmabuf,
+ int mr_access_flags,
+ struct ib_udata *udata);
int (*dereg_mr)(struct ib_mr *mr);
struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
enum ib_mr_type mr_type,
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index b6543d73d20a..aad37e4572b5 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -95,6 +95,7 @@ enum {
IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ IB_USER_VERBS_EX_CMD_REG_DMA_BUF_MR,
};
/*
@@ -320,6 +321,23 @@ struct ib_uverbs_rereg_mr_resp {
__u32 rkey;
};
+struct ib_uverbs_ex_reg_dma_buf_mr {
+ __u64 response;
+ __u32 fd;
+ __u32 access_flags;
+ __u32 pd_handle;
+ __u32 comp_mask;
+};
+
+struct ib_uverbs_ex_reg_dma_buf_mr_resp {
+ __u32 mr_handle;
+ __u32 lkey;
+ __u32 rkey;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u32 reserved;
+};
+
struct ib_uverbs_dereg_mr {
__u32 mr_handle;
};
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 5/7] IB/mlx5: Implement reg_user_dma_buf_mr
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
` (3 preceding siblings ...)
2016-08-01 6:57 ` [RFC 4/7] IB/uverbs: Add command to register a DMA-BUF fd Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
2016-08-01 6:57 ` [RFC 6/7] NVMe: Use genalloc to allocate CMB regions Haggai Eran
2016-08-01 6:57 ` [RFC 7/7] NVMe: CMB on DMA-BUF Haggai Eran
6 siblings, 0 replies; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
Register DMA-BUF buffers as memory regions using the ib_mr_attach_dmabuf
helper function. The code posts the fast registration request on the UMR
QP and syncs before returning.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/infiniband/hw/mlx5/main.c | 4 ++-
drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +++
drivers/infiniband/hw/mlx5/mr.c | 51 ++++++++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f41254f3689a..852be161b68e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2368,7 +2368,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.uverbs_ex_cmd_mask =
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_EX_CMD_REG_DMA_BUF_MR);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
@@ -2409,6 +2410,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
+ dev->ib_dev.reg_user_dma_buf_mr = mlx5_ib_reg_user_dma_buf_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c4a9825828bc..a722dcb367fc 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -711,6 +711,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_reg_user_dma_buf_mr(struct ib_pd *pd,
+ struct dma_buf *dmabuf,
+ int mr_access_flags,
+ struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index fb2bb25c6cf0..f773787013bd 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -39,6 +39,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <rdma/ib_verbs.h>
+#include <linux/dma-buf.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -1447,6 +1448,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int npages = mr->npages;
struct ib_umem *umem = mr->umem;
+ struct dma_buf_attachment *attach = ibmr->attach;
+ struct sg_table *sg = ibmr->sg;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (umem && umem->odp_data) {
@@ -1477,6 +1480,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
atomic_sub(npages, &dev->mdev->priv.reg_pages);
}
+ if (attach)
+ ib_mr_detach_dmabuf(attach, sg);
+
return 0;
}
@@ -1785,3 +1791,48 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
return n;
}
+
+struct ib_mr *mlx5_ib_reg_user_dma_buf_mr(struct ib_pd *pd,
+ struct dma_buf *dmabuf,
+ int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_mr *mr;
+ struct mlx5_ib_umr_context umr_context;
+ struct ib_reg_wr regwr = {};
+ int ret;
+
+ if (mr_access_flags & IB_ACCESS_ON_DEMAND) {
+ mlx5_ib_err(dev, "reg DMA-BUF MR with on-demand paging not supported");
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ DIV_ROUND_UP(dmabuf->size, PAGE_SIZE));
+ if (IS_ERR(mr))
+ return mr;
+
+ ret = ib_mr_attach_dmabuf(mr, dmabuf, mr_access_flags);
+ if (ret)
+ goto dereg;
+
+ mlx5_ib_init_umr_context(&umr_context);
+ regwr.wr.wr_cqe = &umr_context.cqe;
+ regwr.wr.opcode = IB_WR_REG_MR;
+ regwr.mr = mr;
+ regwr.key = mr->lkey;
+ regwr.access = mr_access_flags;
+
+ ret = mlx5_ib_post_umr_sync(dev, ®wr.wr);
+ if (ret)
+ goto detach;
+
+ return mr;
+
+detach:
+ ib_mr_detach_dmabuf(mr->attach, mr->sg);
+dereg:
+ WARN_ON_ONCE(ib_dereg_mr(mr));
+ return ERR_PTR(ret);
+}
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 6/7] NVMe: Use genalloc to allocate CMB regions
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
` (4 preceding siblings ...)
2016-08-01 6:57 ` [RFC 5/7] IB/mlx5: Implement reg_user_dma_buf_mr Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
[not found] ` <20160801155348.GB23224@infradead.org>
2016-08-01 6:57 ` [RFC 7/7] NVMe: CMB on DMA-BUF Haggai Eran
6 siblings, 1 reply; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
Register the CMB in a gen_pool dedicated to manage CMB regions. Use the
pool to allocate the SQs to make sure they are registered.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/nvme/host/nvme-pci.h | 24 ++++++++++++++++++++
drivers/nvme/host/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++----
2 files changed, 74 insertions(+), 4 deletions(-)
create mode 100644 drivers/nvme/host/nvme-pci.h
diff --git a/drivers/nvme/host/nvme-pci.h b/drivers/nvme/host/nvme-pci.h
new file mode 100644
index 000000000000..5b29508dc182
--- /dev/null
+++ b/drivers/nvme/host/nvme-pci.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright © 2016 Mellanox Technlogies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_PCI_H
+#define _NVME_PCI_H
+
+#include "nvme.h"
+
+struct nvme_dev;
+
+void *nvme_alloc_cmb(struct nvme_dev *dev, size_t size, dma_addr_t *dma_addr);
+void nvme_free_cmb(struct nvme_dev *dev, void *addr, size_t size);
+
+#endif
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index befac5b19490..d3da5d9552dd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -42,8 +42,10 @@
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
+#include <linux/genalloc.h>
#include "nvme.h"
+#include "nvme-pci.h"
#define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 256
@@ -99,6 +101,7 @@ struct nvme_dev {
dma_addr_t cmb_dma_addr;
u64 cmb_size;
u32 cmbsz;
+ struct gen_pool *cmb_pool;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
};
@@ -937,11 +940,17 @@ static void nvme_cancel_io(struct request *req, void *data, bool reserved)
static void nvme_free_queue(struct nvme_queue *nvmeq)
{
+ struct nvme_dev *dev = nvmeq->dev;
+
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
if (nvmeq->sq_cmds)
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+ if (nvmeq->sq_cmds_io)
+ nvme_free_cmb(dev, nvmeq->sq_cmds_io,
+ roundup(SQ_SIZE(nvmeq->q_depth),
+ dev->ctrl.page_size));
kfree(nvmeq);
}
@@ -1032,10 +1041,12 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
- unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
- dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
- nvmeq->sq_cmds_io = dev->cmb + offset;
+ nvmeq->sq_cmds_io =
+ nvme_alloc_cmb(dev, roundup(SQ_SIZE(depth),
+ dev->ctrl.page_size),
+ &nvmeq->sq_dma_addr);
+ if (!nvmeq->sq_cmds_io)
+ return -ENOMEM;
} else {
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
&nvmeq->sq_dma_addr, GFP_KERNEL);
@@ -1339,6 +1350,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
struct pci_dev *pdev = to_pci_dev(dev->dev);
void __iomem *cmb;
dma_addr_t dma_addr;
+ int ret;
if (!use_cmb_sqes)
return NULL;
@@ -1372,17 +1384,51 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
dev->cmb_dma_addr = dma_addr;
dev->cmb_size = size;
+
+ dev->cmb_pool = gen_pool_create(PAGE_SHIFT, -1);
+ if (!dev->cmb_pool)
+ goto unmap;
+
+ ret = gen_pool_add_virt(dev->cmb_pool, (unsigned long)(uintptr_t)cmb,
+ dma_addr, size, -1);
+ if (ret)
+ goto destroy_pool;
+
return cmb;
+
+destroy_pool:
+ gen_pool_destroy(dev->cmb_pool);
+ dev->cmb_pool = NULL;
+unmap:
+ iounmap(cmb);
+ return NULL;
}
static inline void nvme_release_cmb(struct nvme_dev *dev)
{
if (dev->cmb) {
+ gen_pool_destroy(dev->cmb_pool);
iounmap(dev->cmb);
dev->cmb = NULL;
}
}
+void *nvme_alloc_cmb(struct nvme_dev *dev, size_t size, dma_addr_t *dma_addr)
+{
+ if (!dev->cmb_pool)
+ return NULL;
+
+ return gen_pool_dma_alloc(dev->cmb_pool, size, dma_addr);
+}
+
+void nvme_free_cmb(struct nvme_dev *dev, void *addr, size_t size)
+{
+ if (WARN_ON(!dev->cmb_pool))
+ return;
+
+ gen_pool_free(dev->cmb_pool, (unsigned long)(uintptr_t)addr, size);
+}
+
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [RFC 7/7] NVMe: CMB on DMA-BUF
2016-08-01 6:57 [RFC 0/7] RDMA subsystem DMA-BUF support Haggai Eran
` (5 preceding siblings ...)
2016-08-01 6:57 ` [RFC 6/7] NVMe: Use genalloc to allocate CMB regions Haggai Eran
@ 2016-08-01 6:57 ` Haggai Eran
2016-08-01 15:52 ` Christoph Hellwig
6 siblings, 1 reply; 16+ messages in thread
From: Haggai Eran @ 2016-08-01 6:57 UTC (permalink / raw)
To: linux-rdma, linux-nvme
Cc: linux-pci, Stephen Bates, Liran Liss, Leon Romanovsky,
Artemy Kovalyov, Jerome Glisse, Yishai Hadas, Haggai Eran
Allocate a CMB region to user-space as a DMA-BUF object.
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
drivers/nvme/host/Makefile | 2 +-
drivers/nvme/host/core.c | 29 ++++
drivers/nvme/host/dmabuf.c | 308 ++++++++++++++++++++++++++++++++++++++++
drivers/nvme/host/nvme-pci.h | 2 +
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 6 +
include/uapi/linux/nvme_ioctl.h | 11 ++
7 files changed, 358 insertions(+), 1 deletion(-)
create mode 100644 drivers/nvme/host/dmabuf.c
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 9a3ca892b4a7..f8d4f5d33398 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -5,4 +5,4 @@ nvme-core-y := core.o
nvme-core-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
-nvme-y += pci.o
+nvme-y += pci.o dmabuf.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d5fb55c0a9d9..5860b468ab39 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1209,6 +1209,33 @@ out_unlock:
return ret;
}
+static int nvme_alloc_user_cmb(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_alloc_user_cmb __user *ucmd)
+{
+ struct nvme_alloc_user_cmb cmd;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+ if (cmd.flags || cmd.rsvd1 || cmd.opcode)
+ return -EINVAL;
+
+ if (!ctrl->ops->alloc_user_cmb)
+ return -ENOTTY;
+
+ status = ctrl->ops->alloc_user_cmb(ctrl, cmd.size);
+ if (status < 0)
+ return status;
+
+ cmd.fd = status;
+ if (copy_to_user(ucmd, &cmd, sizeof(cmd)))
+ return -EFAULT;
+
+ return 0;
+}
+
static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1228,6 +1255,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
case NVME_IOCTL_RESCAN:
nvme_queue_scan(ctrl);
return 0;
+ case NVME_IOCTL_ALLOC_USER_CMB:
+ return nvme_alloc_user_cmb(ctrl, NULL, argp);
default:
return -ENOTTY;
}
diff --git a/drivers/nvme/host/dmabuf.c b/drivers/nvme/host/dmabuf.c
new file mode 100644
index 000000000000..ab9484b40775
--- /dev/null
+++ b/drivers/nvme/host/dmabuf.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright © 2016 Mellanox Technlogies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/dma-buf.h>
+
+#include "nvme-pci.h"
+
+struct nvme_cmb_object {
+ struct nvme_dev *dev;
+ struct dma_buf *dma_buf;
+ void *addr;
+ dma_addr_t dma_addr;
+ int attachments;
+ struct kref refcount;
+};
+
+static size_t obj_size(struct nvme_cmb_object *obj)
+{
+ return obj->dma_buf->size;
+}
+
+struct nvme_cmb_attachment {
+ struct sg_table sgt;
+ enum dma_data_direction dir;
+};
+
+static void nvme_cmb_object_get(struct nvme_cmb_object *obj)
+{
+ kref_get(&obj->refcount);
+}
+
+static void nvme_cmb_object_release(struct kref *kref)
+{
+ struct nvme_cmb_object *obj =
+ container_of(kref, struct nvme_cmb_object, refcount);
+
+ WARN_ON(obj->attachments);
+ WARN_ON(obj->addr || obj->dma_addr);
+
+ if (obj->dma_buf)
+ dma_buf_put(obj->dma_buf);
+ kfree(obj);
+}
+
+static void nvme_cmb_object_put(struct nvme_cmb_object *obj)
+{
+ kref_put(&obj->refcount, nvme_cmb_object_release);
+}
+
+static int nvme_cmb_map_attach(struct dma_buf *dma_buf,
+ struct device *target_dev,
+ struct dma_buf_attachment *attach)
+{
+ struct nvme_cmb_attachment *cmb_attach;
+ struct nvme_cmb_object *obj = dma_buf->priv;
+ struct nvme_dev *dev = obj->dev;
+ int ret;
+
+ cmb_attach = kzalloc(sizeof(*cmb_attach), GFP_KERNEL);
+ if (!cmb_attach)
+ return -ENOMEM;
+
+ /*
+ * TODO check there is no IOMMU enabled and there is peer to peer
+ * access between target_dev and our device
+ */
+
+ cmb_attach->dir = DMA_NONE;
+ attach->priv = cmb_attach;
+
+ if (!obj->attachments) {
+ obj->addr = nvme_alloc_cmb(dev, obj_size(obj), &obj->dma_addr);
+ if (!obj->addr) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ }
+ ++obj->attachments;
+
+ return 0;
+
+free:
+ kfree(cmb_attach);
+ return ret;
+}
+
+static void nvme_cmb_map_detach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
+{
+ struct nvme_cmb_attachment *cmb_attach = attach->priv;
+ struct nvme_cmb_object *obj = dma_buf->priv;
+ struct nvme_dev *dev = obj->dev;
+
+ if (!cmb_attach)
+ return;
+
+ if (!--obj->attachments) {
+ nvme_free_cmb(dev, obj->addr, obj_size(obj));
+ obj->addr = NULL;
+ obj->dma_addr = 0;
+ }
+
+ if (cmb_attach->dir != DMA_NONE) {
+ /* TODO something like dma_unmap_resource */
+ sg_free_table(&cmb_attach->sgt);
+ }
+
+ kfree(cmb_attach);
+ attach->priv = NULL;
+}
+
+static struct sg_table *nvme_cmb_map_dma_buf(struct dma_buf_attachment *attach,
+ enum dma_data_direction dir)
+{
+ struct nvme_cmb_attachment *cmb_attach = attach->priv;
+ struct nvme_cmb_object *obj = attach->dmabuf->priv;
+ int ret;
+
+ if (WARN_ON(dir == DMA_NONE || !cmb_attach))
+ return ERR_PTR(-EINVAL);
+
+ /* return the cached mapping when possible */
+ if (cmb_attach->dir == dir)
+ return &cmb_attach->sgt;
+
+ /*
+ * two mappings with different directions for the same attachment are
+ * not allowed
+ */
+ if (WARN_ON(cmb_attach->dir != DMA_NONE))
+ return ERR_PTR(-EBUSY);
+
+ ret = sg_alloc_table(&cmb_attach->sgt, 1, GFP_KERNEL);
+ if (ret)
+ return ERR_PTR(ret);
+
+ /*
+ * TODO
+ * 1. Use something like dma_map_resource to get DMA mapping for the
+ * BAR.
+ * 2. no struct page for this address, just a pfn. Make sure callers
+ * don't need it.
+ */
+ sg_dma_address(cmb_attach->sgt.sgl) = obj->dma_addr;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg_dma_len(cmb_attach->sgt.sgl) = obj_size(obj);
+#endif
+
+ cmb_attach->dir = dir;
+
+ return &cmb_attach->sgt;
+}
+
+static void nvme_cmb_unmap_dma_buf(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+{
+ /* nothing to be done here */
+}
+
+static void nvme_cmb_dmabuf_release(struct dma_buf *dma_buf)
+{
+ struct nvme_cmb_object *obj = dma_buf->priv;
+
+ if (!obj)
+ return;
+
+ nvme_cmb_object_put(obj);
+}
+
+static void *nvme_cmb_dmabuf_kmap_atomic(struct dma_buf *dma_buf,
+ unsigned long page_num)
+{
+ struct nvme_cmb_object *obj = dma_buf->priv;
+
+ if (!obj || !obj->addr)
+ return NULL;
+
+ return obj->addr + (page_num << PAGE_SHIFT);
+}
+
+static void nvme_cmb_vm_open(struct vm_area_struct *vma)
+{
+ struct nvme_cmb_object *obj = vma->vm_private_data;
+
+ nvme_cmb_object_get(obj);
+}
+
+static void nvme_cmb_vm_close(struct vm_area_struct *vma)
+{
+ struct nvme_cmb_object *obj = vma->vm_private_data;
+
+ nvme_cmb_object_put(obj);
+}
+
+static int nvme_cmb_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct nvme_cmb_object *obj = vma->vm_private_data;
+ pgoff_t offset;
+ unsigned long pfn;
+ int err;
+
+ if (!obj->addr)
+ return VM_FAULT_SIGBUS;
+
+ offset = ((unsigned long)vmf->virtual_address - vma->vm_start);
+ pfn = ((unsigned long)obj->addr + offset) >> PAGE_SHIFT;
+
+ err = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+ switch (err) {
+ case -EAGAIN:
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ return VM_FAULT_NOPAGE;
+
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ }
+
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct nvme_cmb_vm_ops = {
+ .fault = nvme_cmb_fault,
+ .open = nvme_cmb_vm_open,
+ .close = nvme_cmb_vm_close,
+};
+
+static int nvme_cmb_dmabuf_mmap(struct dma_buf *dma_buf,
+ struct vm_area_struct *vma)
+{
+ struct nvme_cmb_object *obj = dma_buf->priv;
+
+ /* Check for valid size. */
+ if (obj_size(obj) < vma->vm_end - vma->vm_start)
+ return -EINVAL;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+ vma->vm_ops = &nvme_cmb_vm_ops;
+ vma->vm_private_data = obj;
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+ nvme_cmb_object_get(obj);
+
+ return 0;
+}
+
+static const struct dma_buf_ops nvme_cmb_dmabuf_ops = {
+ .attach = nvme_cmb_map_attach,
+ .detach = nvme_cmb_map_detach,
+ .map_dma_buf = nvme_cmb_map_dma_buf,
+ .unmap_dma_buf = nvme_cmb_unmap_dma_buf,
+ .release = nvme_cmb_dmabuf_release,
+ .kmap = nvme_cmb_dmabuf_kmap_atomic,
+ .kmap_atomic = nvme_cmb_dmabuf_kmap_atomic,
+ .mmap = nvme_cmb_dmabuf_mmap,
+};
+
+int nvme_pci_alloc_user_cmb(struct nvme_dev *dev, u64 size)
+{
+ struct nvme_cmb_object *obj;
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+ int ret;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ kref_init(&obj->refcount);
+ obj->dev = dev;
+
+ exp_info.ops = &nvme_cmb_dmabuf_ops;
+ exp_info.size = size;
+ exp_info.flags = O_CLOEXEC | O_RDWR;
+ exp_info.priv = obj;
+
+ obj->dma_buf = dma_buf_export(&exp_info);
+ if (IS_ERR(obj->dma_buf)) {
+ ret = PTR_ERR(obj->dma_buf);
+ goto put_obj;
+ }
+
+ ret = dma_buf_fd(obj->dma_buf, exp_info.flags);
+ if (ret < 0)
+ goto put_obj;
+
+ return ret;
+
+put_obj:
+ nvme_cmb_object_put(obj);
+ return ret;
+}
+
diff --git a/drivers/nvme/host/nvme-pci.h b/drivers/nvme/host/nvme-pci.h
index 5b29508dc182..2292d2c24fda 100644
--- a/drivers/nvme/host/nvme-pci.h
+++ b/drivers/nvme/host/nvme-pci.h
@@ -18,6 +18,8 @@
struct nvme_dev;
+int nvme_pci_alloc_user_cmb(struct nvme_dev *dev, u64 size);
+
void *nvme_alloc_cmb(struct nvme_dev *dev, size_t size, dma_addr_t *dma_addr);
void nvme_free_cmb(struct nvme_dev *dev, void *addr, size_t size);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1daa0482de0e..3a65144f23be 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -152,6 +152,7 @@ struct nvme_ctrl_ops {
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*post_scan)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
+ int (*alloc_user_cmb)(struct nvme_ctrl *ctrl, u64 size);
};
static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d3da5d9552dd..2a15755e845e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1947,6 +1947,11 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
return nvme_reset(to_nvme_dev(ctrl));
}
+static int nvme_pci_alloc_user_cmb_wrapper(struct nvme_ctrl *ctrl, u64 size)
+{
+ return nvme_pci_alloc_user_cmb(to_nvme_dev(ctrl), size);
+}
+
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.module = THIS_MODULE,
.reg_read32 = nvme_pci_reg_read32,
@@ -1956,6 +1961,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.free_ctrl = nvme_pci_free_ctrl,
.post_scan = nvme_pci_post_scan,
.submit_async_event = nvme_pci_submit_async_event,
+ .alloc_user_cmb = nvme_pci_alloc_user_cmb_wrapper,
};
static int nvme_dev_map(struct nvme_dev *dev)
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index 50ff21f748b6..d66f5b56b163 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -55,6 +55,16 @@ struct nvme_passthru_cmd {
#define nvme_admin_cmd nvme_passthru_cmd
+struct nvme_alloc_user_cmb {
+ /* in */
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u64 size;
+ /* out */
+ __u32 fd;
+};
+
#define NVME_IOCTL_ID _IO('N', 0x40)
#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
@@ -62,5 +72,6 @@ struct nvme_passthru_cmd {
#define NVME_IOCTL_RESET _IO('N', 0x44)
#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
#define NVME_IOCTL_RESCAN _IO('N', 0x46)
+#define NVME_IOCTL_ALLOC_USER_CMB _IOWR('N', 0x47, struct nvme_alloc_user_cmb)
#endif /* _UAPI_LINUX_NVME_IOCTL_H */
--
1.7.11.2
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [RFC 7/7] NVMe: CMB on DMA-BUF
2016-08-01 6:57 ` [RFC 7/7] NVMe: CMB on DMA-BUF Haggai Eran
@ 2016-08-01 15:52 ` Christoph Hellwig
2016-08-02 7:10 ` Haggai Eran
2016-08-03 10:02 ` Sagi Grimberg
0 siblings, 2 replies; 16+ messages in thread
From: Christoph Hellwig @ 2016-08-01 15:52 UTC (permalink / raw)
To: Haggai Eran
Cc: linux-rdma, linux-nvme, Liran Liss, linux-pci, Yishai Hadas,
Stephen Bates, Leon Romanovsky, Artemy Kovalyov, Jerome Glisse
On Mon, Aug 01, 2016 at 09:57:33AM +0300, Haggai Eran wrote:
> Allocate a CMB region to user-space as a DMA-BUF object.
NACK. Absolutely no way we're going to export a low-level device
specific feature to userspace like that. If you want to DMA to
the CMB do it from the kernel, and for that we'll need a proper
block device level API.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 7/7] NVMe: CMB on DMA-BUF
2016-08-01 15:52 ` Christoph Hellwig
@ 2016-08-02 7:10 ` Haggai Eran
2016-08-04 11:56 ` hch
2016-08-03 10:02 ` Sagi Grimberg
1 sibling, 1 reply; 16+ messages in thread
From: Haggai Eran @ 2016-08-02 7:10 UTC (permalink / raw)
To: hch@infradead.org
Cc: Leon Romanovsky, Kovalyov Artemy, linux-rdma@vger.kernel.org,
Yishai Hadas, stephen.bates@microsemi.com, j.glisse@gmail.com,
linux-nvme@lists.infradead.org, Liran Liss,
linux-pci@vger.kernel.org
T24g15EnLCAyMDE2LTA4LTAxIGF0IDA4OjUyIC0wNzAwLCBDaHJpc3RvcGggSGVsbHdpZyB3cm90
ZToNCj4gT24gTW9uLCBBdWcgMDEsIDIwMTYgYXQgMDk6NTc6MzNBTSArMDMwMCwgSGFnZ2FpIEVy
YW4gd3JvdGU6DQo+ID4gDQo+ID4gQWxsb2NhdGUgYSBDTUIgcmVnaW9uIHRvIHVzZXItc3BhY2Ug
YXMgYSBETUEtQlVGIG9iamVjdC4NCj4gTkFDSy7CoMKgQWJzb2x1dGVseSBubyB3YXkgd2UncmUg
Z29pbmcgdG8gZXhwb3J0IGEgbG93LWxldmVsIGRldmljZQ0KPiBzcGVjaWZpYyBmZWF0dXJlIHRv
IHVzZXJzcGFjZSBsaWtlIHRoYXQuwqDCoElmIHlvdSB3YW50IHRvIERNQSB0bw0KPiB0aGUgQ01C
IGRvIGl0IGZyb20gdGhlIGtlcm5lbCwgYW5kIGZvciB0aGF0IHdlJ2xsIG5lZWQgYSBwcm9wZXIN
Cj4gYmxvY2sgZGV2aWNlIGxldmVsIEFQSS4NCg0KVGhlIHJlYXNvbiBJIHRob3VnaHQgdGhpcyBp
ZGVhIG1pZ2h0IHdvcmsgaXMgYmVjYXVzZSBvZiB0aGUgcmVzcG9uc2VzDQpTdGVwaGVuIGdvdCBm
b3IgaGlzIGF0dGVtcHQgdG8gdXNlIFpPTkVfREVWSUNFIGZvciB0aGluZ3MgbGlrZSB0aGUgQ01C
Lg0KVGhlIHByb2JsZW0gd2l0aCB0aGF0IGlzIHRoYXQgdGhlIHdheSBSRE1BIHJlZ2lzdHJhdGlv
biB3b3JrcyB3aXRoDQpnZXRfdXNlcl9wYWdlcyBtZWFucyB0aGF0IHRoZSBDUFUgYWxzbyBnZXRz
IGFjY2VzcyB0byB0aGUgQ01CIGFuZA0Kc29tZXRpbWVzIHlvdSBkb24ndCB3YW50IHRoYXQuIERN
QS1CVUYgY2FuIHNvbHZlIHRoYXQgbmljZWx5IGluIHRoYXQgaXQNCmRvZXNuJ3QgcmVxdWlyZSBt
YXBwaW5nIGl0IHRvIHRoZSBDUFUgLSBhIGRyaXZlciBjYW4gYXR0YWNoIHRvIGl0DQpkaXJlY3Rs
eS4NCg0KQW55d2F5LCBJIHVuZGVyc3RhbmQgZXhwb3NpbmcgdGhlIENNQiBkaXJlY3RseSBtaWdo
dCBiZSBhIGxpdHRsZSB0b28NCmxvdyBsZXZlbC4gV2hhdCBkbyB5b3UgdGhpbmsgd291bGQgd29y
ayBiZXR0ZXI/IFNvbWV0aGluZyBsaWtlDQpkaXJlY3RfYWNjZXNzIHRoYXQgcmV0dXJucyBidXMg
YWRkcmVzc2VzIGluc3RlYWQgb2YgcGZucz8NCg0KVGhhbmtzLA0KSGFnZ2Fp
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 7/7] NVMe: CMB on DMA-BUF
2016-08-02 7:10 ` Haggai Eran
@ 2016-08-04 11:56 ` hch
2016-08-07 5:24 ` Haggai Eran
0 siblings, 1 reply; 16+ messages in thread
From: hch @ 2016-08-04 11:56 UTC (permalink / raw)
To: Haggai Eran
Cc: hch@infradead.org, Liran Liss, linux-rdma@vger.kernel.org,
linux-pci@vger.kernel.org, Yishai Hadas,
linux-nvme@lists.infradead.org, stephen.bates@microsemi.com,
Leon Romanovsky, Kovalyov Artemy, j.glisse@gmail.com
On Tue, Aug 02, 2016 at 07:10:14AM +0000, Haggai Eran wrote:
> The reason I thought this idea might work is because of the responses
> Stephen got for his attempt to use ZONE_DEVICE for things like the CMB.
> The problem with that is that the way RDMA registration works with
> get_user_pages means that the CPU also gets access to the CMB and
> sometimes you don't want that. DMA-BUF can solve that nicely in that it
> doesn't require mapping it to the CPU - a driver can attach to it
> directly.
I'm not complaining about DMA-BUF - that might be a useful low-level
abstraction if we define a higher level API around it.
> Anyway, I understand exposing the CMB directly might be a little too
> low level. What do you think would work better? Something like
> direct_access that returns bus addresses instead of pfns?
This would be my primary choicse. We'll really need to built an
in-kernel framework for a) PCI P2P access involving drivers for the PCI
switches etc, and then on top of that build something speicifly for
block drivers. Which will be a bit complicated because both have the
case of directly memory mapped device as well as indirect devices like
the CMB. I'm pretty sure we won't get it right at the first attempt,
so exposing it to userspace is an absolute no-go as that will lock us
into the first ABI forever.
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 7/7] NVMe: CMB on DMA-BUF
2016-08-04 11:56 ` hch
@ 2016-08-07 5:24 ` Haggai Eran
0 siblings, 0 replies; 16+ messages in thread
From: Haggai Eran @ 2016-08-07 5:24 UTC (permalink / raw)
To: hch@infradead.org
Cc: Liran Liss, linux-rdma@vger.kernel.org, linux-pci@vger.kernel.org,
Yishai Hadas, linux-nvme@lists.infradead.org,
stephen.bates@microsemi.com, Leon Romanovsky, Kovalyov Artemy,
j.glisse@gmail.com, William Davis
On 8/4/2016 2:56 PM, hch@infradead.org wrote:
>> Anyway, I understand exposing the CMB directly might be a little too
>> low level. What do you think would work better? Something like
>> direct_access that returns bus addresses instead of pfns?
>
> This would be my primary choicse. We'll really need to built an
> in-kernel framework for a) PCI P2P access involving drivers for the PCI
> switches etc,
I was thinking perhaps the dma_map_resource patches by Will Davis [1]
could be used (adding him to the thread).
> and then on top of that build something speicifly for
> block drivers. Which will be a bit complicated because both have the
> case of directly memory mapped device as well as indirect devices like
> the CMB. I'm pretty sure we won't get it right at the first attempt,
> so exposing it to userspace is an absolute no-go as that will lock us
> into the first ABI forever.
I understand. I'll try to think of something for kernel usage only for
the next attempt.
Thanks,
Haggai
[1]
https://lists.linuxfoundation.org/pipermail/iommu/2015-August/014106.html
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [RFC 7/7] NVMe: CMB on DMA-BUF
2016-08-01 15:52 ` Christoph Hellwig
2016-08-02 7:10 ` Haggai Eran
@ 2016-08-03 10:02 ` Sagi Grimberg
1 sibling, 0 replies; 16+ messages in thread
From: Sagi Grimberg @ 2016-08-03 10:02 UTC (permalink / raw)
To: Christoph Hellwig, Haggai Eran
Cc: linux-rdma, linux-nvme, Liran Liss, linux-pci, Yishai Hadas,
Stephen Bates, Leon Romanovsky, Artemy Kovalyov, Jerome Glisse
Hey Christoph, Haggai,
>> Allocate a CMB region to user-space as a DMA-BUF object.
>
> NACK. Absolutely no way we're going to export a low-level device
> specific feature to userspace like that. If you want to DMA to
> the CMB do it from the kernel, and for that we'll need a proper
> block device level API.
I think that in this patch, Haggai was aiming to demonstrate the use
of dma_buf for peer2peer transactions with an RDMA device. I agree
that the CMB example can/should be exposed in the block device level,
but I also think we'd like to generalize the way we expose a bar space
of any peer device to another peer device.
How would you suggest we do that? I think Haggai's proposal is pretty
solid (considering the fact that the vast majority of RDMA applications
live in user-space).
^ permalink raw reply [flat|nested] 16+ messages in thread