* [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory
@ 2024-07-05 13:17 Anumula Murali Mohan Reddy
2024-07-06 0:16 ` Zhu Yanjun
2024-07-07 9:11 ` Leon Romanovsky
0 siblings, 2 replies; 6+ messages in thread
From: Anumula Murali Mohan Reddy @ 2024-07-05 13:17 UTC (permalink / raw)
To: jgg, leonro; +Cc: linux-rdma, Anumula Murali Mohan Reddy, Potnuri Bharat Teja
dma_alloc_coherent() allocates contiguous memory irrespective of
iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
dma_alloc_coherent() may allocate non-contiguous memory.
Attempt to map this memory results in panic.
This patch fixes the issue by using dma_mmap_coherent() to map each page
to user space.
Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
Signed-off-by: Anumula Murali Mohan Reddy <anumula@chelsio.com>
Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
---
drivers/infiniband/hw/cxgb4/cq.c | 4 +++
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 ++
drivers/infiniband/hw/cxgb4/provider.c | 48 +++++++++++++++++++++-----
drivers/infiniband/hw/cxgb4/qp.c | 14 ++++++++
4 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 5111421f9473..81cfc876fa89 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
mm->key = uresp.key;
mm->addr = virt_to_phys(chp->cq.queue);
+ mm->vaddr = chp->cq.queue;
+ mm->dma_addr = chp->cq.dma_addr;
mm->len = chp->cq.memsize;
insert_mmap(ucontext, mm);
mm2->key = uresp.gts_key;
mm2->addr = chp->cq.bar2_pa;
mm2->len = PAGE_SIZE;
+ mm2->vaddr = NULL;
+ mm2->dma_addr = 0;
insert_mmap(ucontext, mm2);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index f838bb6718af..5eedc6cf0f8c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -536,6 +536,8 @@ struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
u32 key;
+ void *vaddr;
+ dma_addr_t dma_addr;
unsigned len;
};
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 246b739ddb2b..6227775970c9 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -131,6 +131,10 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_mm_entry *mm;
struct c4iw_ucontext *ucontext;
u64 addr;
+ size_t size;
+ void *vaddr;
+ unsigned long vm_pgoff;
+ dma_addr_t dma_addr;
pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
key, len);
@@ -145,6 +149,9 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (!mm)
return -EINVAL;
addr = mm->addr;
+ vaddr = mm->vaddr;
+ dma_addr = mm->dma_addr;
+ size = mm->len;
kfree(mm);
if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
@@ -155,9 +162,17 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
* MA_SYNC register...
*/
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- ret = io_remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
+ if (vaddr && is_vmalloc_addr(vaddr)) {
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+ vaddr, dma_addr, size);
+ vma->vm_pgoff = vm_pgoff;
+ } else {
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
} else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
(addr < (pci_resource_start(rdev->lldi.pdev, 2) +
pci_resource_len(rdev->lldi.pdev, 2)))) {
@@ -175,17 +190,32 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
vma->vm_page_prot =
pgprot_noncached(vma->vm_page_prot);
}
- ret = io_remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
+ if (vaddr && is_vmalloc_addr(vaddr)) {
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+ vaddr, dma_addr, size);
+ vma->vm_pgoff = vm_pgoff;
+ } else {
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
} else {
/*
* Map WQ or CQ contig dma memory...
*/
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
+ if (vaddr && is_vmalloc_addr(vaddr)) {
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+ vaddr, dma_addr, size);
+ } else {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
}
return ret;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d16d8eaa1415..3f6fb4b34d5a 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2282,16 +2282,22 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
goto err_free_ma_sync_key;
sq_key_mm->key = uresp.sq_key;
sq_key_mm->addr = qhp->wq.sq.phys_addr;
+ sq_key_mm->vaddr = qhp->wq.sq.queue;
+ sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
insert_mmap(ucontext, sq_key_mm);
if (!attrs->srq) {
rq_key_mm->key = uresp.rq_key;
rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+ rq_key_mm->vaddr = qhp->wq.rq.queue;
+ rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
insert_mmap(ucontext, rq_key_mm);
}
sq_db_key_mm->key = uresp.sq_db_gts_key;
sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
+ sq_db_key_mm->vaddr = NULL;
+ sq_db_key_mm->dma_addr = 0;
sq_db_key_mm->len = PAGE_SIZE;
insert_mmap(ucontext, sq_db_key_mm);
if (!attrs->srq) {
@@ -2299,6 +2305,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
rq_db_key_mm->addr =
(u64)(unsigned long)qhp->wq.rq.bar2_pa;
rq_db_key_mm->len = PAGE_SIZE;
+ rq_db_key_mm->vaddr = NULL;
+ rq_db_key_mm->dma_addr = 0;
insert_mmap(ucontext, rq_db_key_mm);
}
if (ma_sync_key_mm) {
@@ -2307,6 +2315,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
PCIE_MA_SYNC_A) & PAGE_MASK;
ma_sync_key_mm->len = PAGE_SIZE;
+ ma_sync_key_mm->vaddr = NULL;
+ ma_sync_key_mm->dma_addr = 0;
insert_mmap(ucontext, ma_sync_key_mm);
}
@@ -2763,10 +2773,14 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
srq_key_mm->key = uresp.srq_key;
srq_key_mm->addr = virt_to_phys(srq->wq.queue);
srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+ srq_key_mm->vaddr = srq->wq.queue;
+ srq_key_mm->dma_addr = srq->wq.dma_addr;
insert_mmap(ucontext, srq_key_mm);
srq_db_key_mm->key = uresp.srq_db_gts_key;
srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
srq_db_key_mm->len = PAGE_SIZE;
+ srq_db_key_mm->vaddr = NULL;
+ srq_db_key_mm->dma_addr = 0;
insert_mmap(ucontext, srq_db_key_mm);
}
--
2.39.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory
2024-07-05 13:17 [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory Anumula Murali Mohan Reddy
@ 2024-07-06 0:16 ` Zhu Yanjun
2024-07-07 9:11 ` Leon Romanovsky
1 sibling, 0 replies; 6+ messages in thread
From: Zhu Yanjun @ 2024-07-06 0:16 UTC (permalink / raw)
To: Anumula Murali Mohan Reddy, jgg, leonro
Cc: linux-rdma, Potnuri Bharat Teja, Linux Memory Management List
在 2024/7/5 21:17, Anumula Murali Mohan Reddy 写道:
> dma_alloc_coherent() allocates contiguous memory irrespective of
> iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
CC linux-mm@kvack.org
Zhu Yanjun
> dma_alloc_coherent() may allocate non-contiguous memory.
> Attempt to map this memory results in panic.
> This patch fixes the issue by using dma_mmap_coherent() to map each page
> to user space.
>
> Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
> Signed-off-by: Anumula Murali Mohan Reddy <anumula@chelsio.com>
> Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
> ---
> drivers/infiniband/hw/cxgb4/cq.c | 4 +++
> drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 ++
> drivers/infiniband/hw/cxgb4/provider.c | 48 +++++++++++++++++++++-----
> drivers/infiniband/hw/cxgb4/qp.c | 14 ++++++++
> 4 files changed, 59 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
> index 5111421f9473..81cfc876fa89 100644
> --- a/drivers/infiniband/hw/cxgb4/cq.c
> +++ b/drivers/infiniband/hw/cxgb4/cq.c
> @@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>
> mm->key = uresp.key;
> mm->addr = virt_to_phys(chp->cq.queue);
> + mm->vaddr = chp->cq.queue;
> + mm->dma_addr = chp->cq.dma_addr;
> mm->len = chp->cq.memsize;
> insert_mmap(ucontext, mm);
>
> mm2->key = uresp.gts_key;
> mm2->addr = chp->cq.bar2_pa;
> mm2->len = PAGE_SIZE;
> + mm2->vaddr = NULL;
> + mm2->dma_addr = 0;
> insert_mmap(ucontext, mm2);
> }
>
> diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> index f838bb6718af..5eedc6cf0f8c 100644
> --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> @@ -536,6 +536,8 @@ struct c4iw_mm_entry {
> struct list_head entry;
> u64 addr;
> u32 key;
> + void *vaddr;
> + dma_addr_t dma_addr;
> unsigned len;
> };
>
> diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
> index 246b739ddb2b..6227775970c9 100644
> --- a/drivers/infiniband/hw/cxgb4/provider.c
> +++ b/drivers/infiniband/hw/cxgb4/provider.c
> @@ -131,6 +131,10 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> struct c4iw_mm_entry *mm;
> struct c4iw_ucontext *ucontext;
> u64 addr;
> + size_t size;
> + void *vaddr;
> + unsigned long vm_pgoff;
> + dma_addr_t dma_addr;
>
> pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
> key, len);
> @@ -145,6 +149,9 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> if (!mm)
> return -EINVAL;
> addr = mm->addr;
> + vaddr = mm->vaddr;
> + dma_addr = mm->dma_addr;
> + size = mm->len;
> kfree(mm);
>
> if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
> @@ -155,9 +162,17 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> * MA_SYNC register...
> */
> vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> - ret = io_remap_pfn_range(vma, vma->vm_start,
> - addr >> PAGE_SHIFT,
> - len, vma->vm_page_prot);
> + if (vaddr && is_vmalloc_addr(vaddr)) {
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = 0;
> + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> + vaddr, dma_addr, size);
> + vma->vm_pgoff = vm_pgoff;
> + } else {
> + ret = io_remap_pfn_range(vma, vma->vm_start,
> + addr >> PAGE_SHIFT,
> + len, vma->vm_page_prot);
> + }
> } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
> (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
> pci_resource_len(rdev->lldi.pdev, 2)))) {
> @@ -175,17 +190,32 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> vma->vm_page_prot =
> pgprot_noncached(vma->vm_page_prot);
> }
> - ret = io_remap_pfn_range(vma, vma->vm_start,
> - addr >> PAGE_SHIFT,
> - len, vma->vm_page_prot);
> + if (vaddr && is_vmalloc_addr(vaddr)) {
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = 0;
> + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> + vaddr, dma_addr, size);
> + vma->vm_pgoff = vm_pgoff;
> + } else {
> + ret = io_remap_pfn_range(vma, vma->vm_start,
> + addr >> PAGE_SHIFT,
> + len, vma->vm_page_prot);
> + }
> } else {
>
> /*
> * Map WQ or CQ contig dma memory...
> */
> - ret = remap_pfn_range(vma, vma->vm_start,
> - addr >> PAGE_SHIFT,
> - len, vma->vm_page_prot);
> + if (vaddr && is_vmalloc_addr(vaddr)) {
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = 0;
> + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> + vaddr, dma_addr, size);
> + } else {
> + ret = remap_pfn_range(vma, vma->vm_start,
> + addr >> PAGE_SHIFT,
> + len, vma->vm_page_prot);
> + }
> }
>
> return ret;
> diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
> index d16d8eaa1415..3f6fb4b34d5a 100644
> --- a/drivers/infiniband/hw/cxgb4/qp.c
> +++ b/drivers/infiniband/hw/cxgb4/qp.c
> @@ -2282,16 +2282,22 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
> goto err_free_ma_sync_key;
> sq_key_mm->key = uresp.sq_key;
> sq_key_mm->addr = qhp->wq.sq.phys_addr;
> + sq_key_mm->vaddr = qhp->wq.sq.queue;
> + sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
> sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
> insert_mmap(ucontext, sq_key_mm);
> if (!attrs->srq) {
> rq_key_mm->key = uresp.rq_key;
> rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
> + rq_key_mm->vaddr = qhp->wq.rq.queue;
> + rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
> rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
> insert_mmap(ucontext, rq_key_mm);
> }
> sq_db_key_mm->key = uresp.sq_db_gts_key;
> sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
> + sq_db_key_mm->vaddr = NULL;
> + sq_db_key_mm->dma_addr = 0;
> sq_db_key_mm->len = PAGE_SIZE;
> insert_mmap(ucontext, sq_db_key_mm);
> if (!attrs->srq) {
> @@ -2299,6 +2305,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
> rq_db_key_mm->addr =
> (u64)(unsigned long)qhp->wq.rq.bar2_pa;
> rq_db_key_mm->len = PAGE_SIZE;
> + rq_db_key_mm->vaddr = NULL;
> + rq_db_key_mm->dma_addr = 0;
> insert_mmap(ucontext, rq_db_key_mm);
> }
> if (ma_sync_key_mm) {
> @@ -2307,6 +2315,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
> (pci_resource_start(rhp->rdev.lldi.pdev, 0) +
> PCIE_MA_SYNC_A) & PAGE_MASK;
> ma_sync_key_mm->len = PAGE_SIZE;
> + ma_sync_key_mm->vaddr = NULL;
> + ma_sync_key_mm->dma_addr = 0;
> insert_mmap(ucontext, ma_sync_key_mm);
> }
>
> @@ -2763,10 +2773,14 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
> srq_key_mm->key = uresp.srq_key;
> srq_key_mm->addr = virt_to_phys(srq->wq.queue);
> srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
> + srq_key_mm->vaddr = srq->wq.queue;
> + srq_key_mm->dma_addr = srq->wq.dma_addr;
> insert_mmap(ucontext, srq_key_mm);
> srq_db_key_mm->key = uresp.srq_db_gts_key;
> srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
> srq_db_key_mm->len = PAGE_SIZE;
> + srq_db_key_mm->vaddr = NULL;
> + srq_db_key_mm->dma_addr = 0;
> insert_mmap(ucontext, srq_db_key_mm);
> }
>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory
2024-07-05 13:17 [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory Anumula Murali Mohan Reddy
2024-07-06 0:16 ` Zhu Yanjun
@ 2024-07-07 9:11 ` Leon Romanovsky
2024-07-07 11:31 ` Christoph Hellwig
1 sibling, 1 reply; 6+ messages in thread
From: Leon Romanovsky @ 2024-07-07 9:11 UTC (permalink / raw)
To: Anumula Murali Mohan Reddy
Cc: jgg, linux-rdma, Potnuri Bharat Teja, Christoph Hellwig,
Robin Murphy
On Fri, Jul 05, 2024 at 06:47:53PM +0530, Anumula Murali Mohan Reddy wrote:
> dma_alloc_coherent() allocates contiguous memory irrespective of
> iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
> dma_alloc_coherent() may allocate non-contiguous memory.
> Attempt to map this memory results in panic.
> This patch fixes the issue by using dma_mmap_coherent() to map each page
> to user space.
It is perfect time to move to use rdma_user_mmap_io(), instead of
open-code it in the driver.
>
> Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
+ authors of the commit mentioned in Fixes.
Thanks
> Signed-off-by: Anumula Murali Mohan Reddy <anumula@chelsio.com>
> Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
> ---
> drivers/infiniband/hw/cxgb4/cq.c | 4 +++
> drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 ++
> drivers/infiniband/hw/cxgb4/provider.c | 48 +++++++++++++++++++++-----
> drivers/infiniband/hw/cxgb4/qp.c | 14 ++++++++
> 4 files changed, 59 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
> index 5111421f9473..81cfc876fa89 100644
> --- a/drivers/infiniband/hw/cxgb4/cq.c
> +++ b/drivers/infiniband/hw/cxgb4/cq.c
> @@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
>
> mm->key = uresp.key;
> mm->addr = virt_to_phys(chp->cq.queue);
> + mm->vaddr = chp->cq.queue;
> + mm->dma_addr = chp->cq.dma_addr;
> mm->len = chp->cq.memsize;
> insert_mmap(ucontext, mm);
>
> mm2->key = uresp.gts_key;
> mm2->addr = chp->cq.bar2_pa;
> mm2->len = PAGE_SIZE;
> + mm2->vaddr = NULL;
> + mm2->dma_addr = 0;
> insert_mmap(ucontext, mm2);
> }
>
> diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> index f838bb6718af..5eedc6cf0f8c 100644
> --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
> @@ -536,6 +536,8 @@ struct c4iw_mm_entry {
> struct list_head entry;
> u64 addr;
> u32 key;
> + void *vaddr;
> + dma_addr_t dma_addr;
> unsigned len;
> };
>
> diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
> index 246b739ddb2b..6227775970c9 100644
> --- a/drivers/infiniband/hw/cxgb4/provider.c
> +++ b/drivers/infiniband/hw/cxgb4/provider.c
> @@ -131,6 +131,10 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> struct c4iw_mm_entry *mm;
> struct c4iw_ucontext *ucontext;
> u64 addr;
> + size_t size;
> + void *vaddr;
> + unsigned long vm_pgoff;
> + dma_addr_t dma_addr;
>
> pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
> key, len);
> @@ -145,6 +149,9 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> if (!mm)
> return -EINVAL;
> addr = mm->addr;
> + vaddr = mm->vaddr;
> + dma_addr = mm->dma_addr;
> + size = mm->len;
> kfree(mm);
>
> if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
> @@ -155,9 +162,17 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> * MA_SYNC register...
> */
> vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> - ret = io_remap_pfn_range(vma, vma->vm_start,
> - addr >> PAGE_SHIFT,
> - len, vma->vm_page_prot);
> + if (vaddr && is_vmalloc_addr(vaddr)) {
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = 0;
> + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> + vaddr, dma_addr, size);
> + vma->vm_pgoff = vm_pgoff;
> + } else {
> + ret = io_remap_pfn_range(vma, vma->vm_start,
> + addr >> PAGE_SHIFT,
> + len, vma->vm_page_prot);
> + }
> } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
> (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
> pci_resource_len(rdev->lldi.pdev, 2)))) {
> @@ -175,17 +190,32 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
> vma->vm_page_prot =
> pgprot_noncached(vma->vm_page_prot);
> }
> - ret = io_remap_pfn_range(vma, vma->vm_start,
> - addr >> PAGE_SHIFT,
> - len, vma->vm_page_prot);
> + if (vaddr && is_vmalloc_addr(vaddr)) {
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = 0;
> + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> + vaddr, dma_addr, size);
> + vma->vm_pgoff = vm_pgoff;
> + } else {
> + ret = io_remap_pfn_range(vma, vma->vm_start,
> + addr >> PAGE_SHIFT,
> + len, vma->vm_page_prot);
> + }
> } else {
>
> /*
> * Map WQ or CQ contig dma memory...
> */
> - ret = remap_pfn_range(vma, vma->vm_start,
> - addr >> PAGE_SHIFT,
> - len, vma->vm_page_prot);
> + if (vaddr && is_vmalloc_addr(vaddr)) {
> + vm_pgoff = vma->vm_pgoff;
> + vma->vm_pgoff = 0;
> + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> + vaddr, dma_addr, size);
> + } else {
> + ret = remap_pfn_range(vma, vma->vm_start,
> + addr >> PAGE_SHIFT,
> + len, vma->vm_page_prot);
> + }
> }
>
> return ret;
> diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
> index d16d8eaa1415..3f6fb4b34d5a 100644
> --- a/drivers/infiniband/hw/cxgb4/qp.c
> +++ b/drivers/infiniband/hw/cxgb4/qp.c
> @@ -2282,16 +2282,22 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
> goto err_free_ma_sync_key;
> sq_key_mm->key = uresp.sq_key;
> sq_key_mm->addr = qhp->wq.sq.phys_addr;
> + sq_key_mm->vaddr = qhp->wq.sq.queue;
> + sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
> sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
> insert_mmap(ucontext, sq_key_mm);
> if (!attrs->srq) {
> rq_key_mm->key = uresp.rq_key;
> rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
> + rq_key_mm->vaddr = qhp->wq.rq.queue;
> + rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
> rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
> insert_mmap(ucontext, rq_key_mm);
> }
> sq_db_key_mm->key = uresp.sq_db_gts_key;
> sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
> + sq_db_key_mm->vaddr = NULL;
> + sq_db_key_mm->dma_addr = 0;
> sq_db_key_mm->len = PAGE_SIZE;
> insert_mmap(ucontext, sq_db_key_mm);
> if (!attrs->srq) {
> @@ -2299,6 +2305,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
> rq_db_key_mm->addr =
> (u64)(unsigned long)qhp->wq.rq.bar2_pa;
> rq_db_key_mm->len = PAGE_SIZE;
> + rq_db_key_mm->vaddr = NULL;
> + rq_db_key_mm->dma_addr = 0;
> insert_mmap(ucontext, rq_db_key_mm);
> }
> if (ma_sync_key_mm) {
> @@ -2307,6 +2315,8 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
> (pci_resource_start(rhp->rdev.lldi.pdev, 0) +
> PCIE_MA_SYNC_A) & PAGE_MASK;
> ma_sync_key_mm->len = PAGE_SIZE;
> + ma_sync_key_mm->vaddr = NULL;
> + ma_sync_key_mm->dma_addr = 0;
> insert_mmap(ucontext, ma_sync_key_mm);
> }
>
> @@ -2763,10 +2773,14 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
> srq_key_mm->key = uresp.srq_key;
> srq_key_mm->addr = virt_to_phys(srq->wq.queue);
> srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
> + srq_key_mm->vaddr = srq->wq.queue;
> + srq_key_mm->dma_addr = srq->wq.dma_addr;
> insert_mmap(ucontext, srq_key_mm);
> srq_db_key_mm->key = uresp.srq_db_gts_key;
> srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
> srq_db_key_mm->len = PAGE_SIZE;
> + srq_db_key_mm->vaddr = NULL;
> + srq_db_key_mm->dma_addr = 0;
> insert_mmap(ucontext, srq_db_key_mm);
> }
>
> --
> 2.39.3
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory
2024-07-07 9:11 ` Leon Romanovsky
@ 2024-07-07 11:31 ` Christoph Hellwig
2024-07-07 11:39 ` Leon Romanovsky
0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2024-07-07 11:31 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Anumula Murali Mohan Reddy, jgg, linux-rdma, Potnuri Bharat Teja,
Christoph Hellwig, Robin Murphy
On Sun, Jul 07, 2024 at 12:11:05PM +0300, Leon Romanovsky wrote:
> On Fri, Jul 05, 2024 at 06:47:53PM +0530, Anumula Murali Mohan Reddy wrote:
> > dma_alloc_coherent() allocates contiguous memory irrespective of
> > iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> > CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
> > dma_alloc_coherent() may allocate non-contiguous memory.
> > Attempt to map this memory results in panic.
> > This patch fixes the issue by using dma_mmap_coherent() to map each page
> > to user space.
>
> It is perfect time to move to use rdma_user_mmap_io(), instead of
> open-code it in the driver.
rdma_user_mmap_io does not work on dma coherent allocations.
> > Fixes: f5ff79fddf0e ("dma-mapping: remove CONFIG_DMA_REMAP")
>
> + authors of the commit mentioned in Fixes.
If that commit triggered a bug for you it was buggy before, you
just didn't hit it. The fixes tag needs to point to the commit
assuming trying to convert the return value from dma_alloc* into
a page/pfn/physical address.
> > +++ b/drivers/infiniband/hw/cxgb4/cq.c
> > @@ -1127,12 +1127,16 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
> >
> > mm->key = uresp.key;
> > mm->addr = virt_to_phys(chp->cq.queue);
... aka this one. And it still is buggy and needs to go away.
> > + if (vaddr && is_vmalloc_addr(vaddr)) {
And this check is broken. The virtual address returned from
dma_alloc_coherent can also be other things than a vmalloc address.
>
>
> > + vm_pgoff = vma->vm_pgoff;
> > + vma->vm_pgoff = 0;
> > + ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
> > + vaddr, dma_addr, size);
> > + vma->vm_pgoff = vm_pgoff;
... and you thus must use this path unconditionally.
Same for the other hunks.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory
2024-07-07 11:31 ` Christoph Hellwig
@ 2024-07-07 11:39 ` Leon Romanovsky
2024-07-08 10:05 ` Christoph Hellwig
0 siblings, 1 reply; 6+ messages in thread
From: Leon Romanovsky @ 2024-07-07 11:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Anumula Murali Mohan Reddy, jgg, linux-rdma, Potnuri Bharat Teja,
Robin Murphy
On Sun, Jul 07, 2024 at 01:31:03PM +0200, Christoph Hellwig wrote:
> On Sun, Jul 07, 2024 at 12:11:05PM +0300, Leon Romanovsky wrote:
> > On Fri, Jul 05, 2024 at 06:47:53PM +0530, Anumula Murali Mohan Reddy wrote:
> > > dma_alloc_coherent() allocates contiguous memory irrespective of
> > > iommu mode, but after commit f5ff79fddf0e ("dma-mapping: remove
> > > CONFIG_DMA_REMAP") if iommu is enabled in translate mode,
> > > dma_alloc_coherent() may allocate non-contiguous memory.
> > > Attempt to map this memory results in panic.
> > > This patch fixes the issue by using dma_mmap_coherent() to map each page
> > > to user space.
> >
> > It is perfect time to move to use rdma_user_mmap_io(), instead of
> > open-code it in the driver.
>
> rdma_user_mmap_io does not work on dma coherent allocations.
They used dma_mmap_coherent() to implement workaround, original cxgb4
didn't use it and probably doesn't need too.
Thanks
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory
2024-07-07 11:39 ` Leon Romanovsky
@ 2024-07-08 10:05 ` Christoph Hellwig
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2024-07-08 10:05 UTC (permalink / raw)
To: Leon Romanovsky
Cc: Christoph Hellwig, Anumula Murali Mohan Reddy, jgg, linux-rdma,
Potnuri Bharat Teja, Robin Murphy
On Sun, Jul 07, 2024 at 02:39:57PM +0300, Leon Romanovsky wrote:
> > > It is perfect time to move to use rdma_user_mmap_io(), instead of
> > > open-code it in the driver.
> >
> > rdma_user_mmap_io does not work on dma coherent allocations.
>
> They used dma_mmap_coherent() to implement workaround, original cxgb4
> didn't use it and probably doesn't need too.
dma_mmap_coherent must be paired with dma_alloc_coherent.
It seems like cxgb4 uses a c4iw_mm_entry as a sort of generic
containers for objects that can be mmaped which are used on first come,
first serve bassis in c4iw_mmap (WTF???). Not questioning the sanity
of the higher level logic here, which is ABI by now, the right fix
is to tag each entry with what is being mmaped, DMA_ALLOC, vs
uncached ba vs WC bar and remove the guessing logic there.
While we're it, pgprot_writecombine is generally available,
t4_pgprot_wc should go away aswell.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-07-08 10:06 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-05 13:17 [PATCH for-next] RDMA/cxgb4: use dma_mmap_coherent() for mapping non-contiguous memory Anumula Murali Mohan Reddy
2024-07-06 0:16 ` Zhu Yanjun
2024-07-07 9:11 ` Leon Romanovsky
2024-07-07 11:31 ` Christoph Hellwig
2024-07-07 11:39 ` Leon Romanovsky
2024-07-08 10:05 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).