linux-rdma.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver
@ 2025-07-25  5:53 Boshi Yu
  2025-07-25  5:53 ` [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer Boshi Yu
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Boshi Yu @ 2025-07-25  5:53 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, chengyou, kaishen

Hi,

This series of patches provides several fixes for the erdma driver:
- #1 uses dma_map_page to map the scatter MTT buffer page by page to avoid
     merging contiguous physical pages.
- #2 fixes ignored return value of init_kernel_qp.
- #3 fixes unset QPN of GSI QP.

Thanks,
Boshi Yu

Boshi Yu (3):
  RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  RDMA/erdma: Fix ignored return value of init_kernel_qp
  RDMA/erdma: Fix unset QPN of GSI QP

 drivers/infiniband/hw/erdma/erdma_verbs.c | 116 ++++++++++++++--------
 drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
 2 files changed, 76 insertions(+), 44 deletions(-)

-- 
2.46.0


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  2025-07-25  5:53 [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Boshi Yu
@ 2025-07-25  5:53 ` Boshi Yu
  2025-07-27 11:27   ` Leon Romanovsky
  2025-07-25  5:53 ` [PATCH for-next 2/3] RDMA/erdma: Fix ignored return value of init_kernel_qp Boshi Yu
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Boshi Yu @ 2025-07-25  5:53 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, chengyou, kaishen

Each high-level indirect MTT entry is assumed to point to exactly one page
of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
pages when mapping. To avoid extra overhead from splitting merged regions,
use dma_map_page to map the scatter MTT buffer page by page.

Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
---
 drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++--------
 drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
 2 files changed, 71 insertions(+), 43 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 94c211df09d8..b4dadd306837 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -149,7 +149,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
 			req.phy_addr[0] = mr->mem.mtt->buf_dma;
 			mtt_level = ERDMA_MR_MTT_1LEVEL;
 		} else {
-			req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
+			req.phy_addr[0] = mr->mem.mtt->dma_addrs[0];
 			mtt_level = mr->mem.mtt->level;
 		}
 	} else if (mr->type != ERDMA_MR_TYPE_DMA) {
@@ -626,18 +626,27 @@ static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
 	return ERR_PTR(-ENOMEM);
 }
 
-static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
-				     struct erdma_mtt *mtt)
+static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma,
+				  u32 npages)
 {
-	dma_unmap_sg(&dev->pdev->dev, mtt->sglist,
-		     DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE);
-	vfree(mtt->sglist);
+	u32 i;
+
+	for (i = 0; i < npages; i++)
+		dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE,
+			       DMA_TO_DEVICE);
+}
+
+static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev,
+					    struct erdma_mtt *mtt)
+{
+	erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages);
+	vfree(mtt->dma_addrs);
 }
 
 static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
 				      struct erdma_mtt *mtt)
 {
-	erdma_destroy_mtt_buf_sg(dev, mtt);
+	erdma_destroy_mtt_buf_dma_addrs(dev, mtt);
 	vfree(mtt->buf);
 	kfree(mtt);
 }
@@ -645,50 +654,69 @@ static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
 static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
 				  struct erdma_mtt *low_mtt)
 {
-	struct scatterlist *sg;
-	u32 idx = 0, i;
+	dma_addr_t *pg_addr = mtt->buf;
+	u32 i;
 
-	for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
-		mtt->buf[idx++] = sg_dma_address(sg);
+	for (i = 0; i < low_mtt->npages; i++)
+		pg_addr[i] = low_mtt->dma_addrs[i];
 }
 
-static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
+static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs,
+				void *buf, u64 len)
 {
-	struct scatterlist *sglist;
-	void *buf = mtt->buf;
-	u32 npages, i, nsg;
+	dma_addr_t *pg_dma;
 	struct page *pg;
+	u32 npages, i;
+	void *addr;
 
-	/* Failed if buf is not page aligned */
-	if ((uintptr_t)buf & ~PAGE_MASK)
-		return -EINVAL;
-
-	npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
-	sglist = vzalloc(npages * sizeof(*sglist));
-	if (!sglist)
-		return -ENOMEM;
+	npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >>
+		 PAGE_SHIFT;
+	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
+	if (!pg_dma)
+		return 0;
 
-	sg_init_table(sglist, npages);
+	addr = buf;
 	for (i = 0; i < npages; i++) {
-		pg = vmalloc_to_page(buf);
+		pg = vmalloc_to_page(addr);
 		if (!pg)
 			goto err;
-		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
-		buf += PAGE_SIZE;
+
+		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+		if (dma_mapping_error(&dev->pdev->dev, pg_dma[i]))
+			goto err;
+
+		addr += PAGE_SIZE;
 	}
 
-	nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
-	if (!nsg)
-		goto err;
+	*dma_addrs = pg_dma;
 
-	mtt->sglist = sglist;
-	mtt->nsg = nsg;
+	return npages;
+err:
+	erdma_unmap_page_list(dev, pg_dma, i);
+	vfree(pg_dma);
 
 	return 0;
-err:
-	vfree(sglist);
+}
 
-	return -ENOMEM;
+static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev,
+					  struct erdma_mtt *mtt)
+{
+	dma_addr_t *addrs;
+	u32 npages;
+
+	/* Failed if buf is not page aligned */
+	if ((uintptr_t)mtt->buf & ~PAGE_MASK)
+		return -EINVAL;
+
+	npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size);
+	if (!npages)
+		return -ENOMEM;
+
+	mtt->dma_addrs = addrs;
+	mtt->npages = npages;
+
+	return 0;
 }
 
 static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
@@ -707,12 +735,12 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
 	if (!mtt->buf)
 		goto err_free_mtt;
 
-	ret = erdma_create_mtt_buf_sg(dev, mtt);
+	ret = erdma_create_mtt_buf_dma_addrs(dev, mtt);
 	if (ret)
 		goto err_free_mtt_buf;
 
-	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
-		  mtt->size, mtt->nsg);
+	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n",
+		  mtt->size, mtt->npages);
 
 	return mtt;
 
@@ -746,8 +774,8 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
 	level = 1;
 
 	/* convergence the mtt table. */
-	while (mtt->nsg != 1 && level <= 3) {
-		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
+	while (mtt->npages != 1 && level <= 3) {
+		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages));
 		if (IS_ERR(tmp_mtt)) {
 			ret = PTR_ERR(tmp_mtt);
 			goto err_free_mtt;
@@ -765,7 +793,7 @@ static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
 
 	mtt->level = level;
 	ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
-		  mtt->level, mtt->sglist[0].dma_address);
+		  mtt->level, mtt->dma_addrs[0]);
 
 	return mtt;
 err_free_mtt:
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index ef411b81fbd7..7d8d3fe501d5 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -99,8 +99,8 @@ struct erdma_mtt {
 	union {
 		dma_addr_t buf_dma;
 		struct {
-			struct scatterlist *sglist;
-			u32 nsg;
+			dma_addr_t *dma_addrs;
+			u32 npages;
 			u32 level;
 		};
 	};
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH for-next 2/3] RDMA/erdma: Fix ignored return value of init_kernel_qp
  2025-07-25  5:53 [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Boshi Yu
  2025-07-25  5:53 ` [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer Boshi Yu
@ 2025-07-25  5:53 ` Boshi Yu
  2025-07-25  5:53 ` [PATCH for-next 3/3] RDMA/erdma: Fix unset QPN of GSI QP Boshi Yu
  2025-08-13 10:27 ` [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Leon Romanovsky
  3 siblings, 0 replies; 11+ messages in thread
From: Boshi Yu @ 2025-07-25  5:53 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, chengyou, kaishen

The init_kernel_qp interface may fail. Check its return value and free
related resources properly when it does.

Fixes: 155055771704 ("RDMA/erdma: Add verbs implementation")
Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
---
 drivers/infiniband/hw/erdma/erdma_verbs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index b4dadd306837..32b11ce228dc 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1059,7 +1059,9 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
 		if (ret)
 			goto err_out_cmd;
 	} else {
-		init_kernel_qp(dev, qp, attrs);
+		ret = init_kernel_qp(dev, qp, attrs);
+		if (ret)
+			goto err_out_xa;
 	}
 
 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH for-next 3/3] RDMA/erdma: Fix unset QPN of GSI QP
  2025-07-25  5:53 [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Boshi Yu
  2025-07-25  5:53 ` [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer Boshi Yu
  2025-07-25  5:53 ` [PATCH for-next 2/3] RDMA/erdma: Fix ignored return value of init_kernel_qp Boshi Yu
@ 2025-07-25  5:53 ` Boshi Yu
  2025-07-25 15:26   ` Zhu Yanjun
  2025-08-13 10:27 ` [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Leon Romanovsky
  3 siblings, 1 reply; 11+ messages in thread
From: Boshi Yu @ 2025-07-25  5:53 UTC (permalink / raw)
  To: jgg, leon; +Cc: linux-rdma, chengyou, kaishen

The QPN of the GSI QP was not set, which may cause issues.
Set the QPN to 1 when creating the GSI QP.

Fixes: 999a0a2e9b87 ("RDMA/erdma: Support UD QPs and UD WRs")
Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
---
 drivers/infiniband/hw/erdma/erdma_verbs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 32b11ce228dc..996860f49b2f 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1022,6 +1022,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
 		old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
 		if (xa_is_err(old_entry))
 			ret = xa_err(old_entry);
+		else
+			qp->ibqp.qp_num = 1;
 	} else {
 		ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
 				      XA_LIMIT(1, dev->attrs.max_qp - 1),
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 3/3] RDMA/erdma: Fix unset QPN of GSI QP
  2025-07-25  5:53 ` [PATCH for-next 3/3] RDMA/erdma: Fix unset QPN of GSI QP Boshi Yu
@ 2025-07-25 15:26   ` Zhu Yanjun
  0 siblings, 0 replies; 11+ messages in thread
From: Zhu Yanjun @ 2025-07-25 15:26 UTC (permalink / raw)
  To: Boshi Yu, jgg, leon; +Cc: linux-rdma, chengyou, kaishen

在 2025/7/24 22:53, Boshi Yu 写道:
> The QPN of the GSI QP was not set, which may cause issues.
> Set the QPN to 1 when creating the GSI QP.

In 17.2.7 MANAGEMENT MESSAGES

"
QP1, used for the General Services Interface (GSI).
• This QP uses the Unreliable Datagram transport service.
• All traffic to and from this QP uses any VL other than VL15.
• GSI packets arriving before the current packet’s command
completes may be dropped (i.e. the minimum queue depth of
QP1 is one).
"

Reviewed-by: Zhu Yanjun <yanjun.zhu@linux.dev>

Zhu Yanjun

> 
> Fixes: 999a0a2e9b87 ("RDMA/erdma: Support UD QPs and UD WRs")
> Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
> Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
> ---
>   drivers/infiniband/hw/erdma/erdma_verbs.c | 2 ++
>   1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
> index 32b11ce228dc..996860f49b2f 100644
> --- a/drivers/infiniband/hw/erdma/erdma_verbs.c
> +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
> @@ -1022,6 +1022,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
>   		old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
>   		if (xa_is_err(old_entry))
>   			ret = xa_err(old_entry);
> +		else
> +			qp->ibqp.qp_num = 1;
>   	} else {
>   		ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
>   				      XA_LIMIT(1, dev->attrs.max_qp - 1),



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  2025-07-25  5:53 ` [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer Boshi Yu
@ 2025-07-27 11:27   ` Leon Romanovsky
  2025-07-28  3:08     ` Boshi Yu
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2025-07-27 11:27 UTC (permalink / raw)
  To: Boshi Yu; +Cc: jgg, linux-rdma, chengyou, kaishen

On Fri, Jul 25, 2025 at 01:53:54PM +0800, Boshi Yu wrote:
> Each high-level indirect MTT entry is assumed to point to exactly one page
> of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
> pages when mapping. To avoid extra overhead from splitting merged regions,
> use dma_map_page to map the scatter MTT buffer page by page.
> 
> Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
> Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
> ---
>  drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++--------
>  drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
>  2 files changed, 71 insertions(+), 43 deletions(-)

<...>

> +	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
> +	if (!pg_dma)
> +		return 0;
>  
> -	sg_init_table(sglist, npages);
> +	addr = buf;
>  	for (i = 0; i < npages; i++) {
> -		pg = vmalloc_to_page(buf);
> +		pg = vmalloc_to_page(addr);

<...>
> +
> +		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
> +					 DMA_TO_DEVICE);

Does it work?

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  2025-07-27 11:27   ` Leon Romanovsky
@ 2025-07-28  3:08     ` Boshi Yu
  2025-07-28  7:08       ` Leon Romanovsky
  0 siblings, 1 reply; 11+ messages in thread
From: Boshi Yu @ 2025-07-28  3:08 UTC (permalink / raw)
  To: Leon Romanovsky; +Cc: jgg, linux-rdma, chengyou, kaishen



On 2025/7/27 19:27, Leon Romanovsky wrote:
> On Fri, Jul 25, 2025 at 01:53:54PM +0800, Boshi Yu wrote:
>> Each high-level indirect MTT entry is assumed to point to exactly one page
>> of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
>> pages when mapping. To avoid extra overhead from splitting merged regions,
>> use dma_map_page to map the scatter MTT buffer page by page.
>>
>> Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
>> Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
>> ---
>>   drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++--------
>>   drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
>>   2 files changed, 71 insertions(+), 43 deletions(-)
> 
> <...>
> 
>> +	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
>> +	if (!pg_dma)
>> +		return 0;
>>   
>> -	sg_init_table(sglist, npages);
>> +	addr = buf;
>>   	for (i = 0; i < npages; i++) {
>> -		pg = vmalloc_to_page(buf);
>> +		pg = vmalloc_to_page(addr);
> 
> <...>
>> +
>> +		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
>> +					 DMA_TO_DEVICE);
> 
> Does it work?

Hi Leon,

I would like to confirm which part you think is not working properly. I 
guess that you might be concerned that if the buffer is not 
page-aligned, it could cause problems with dma_map_page.

In fact, when allocating the MTT buffer, we ensure that it is always 
page-aligned and that its length is a multiple of PAGE_SIZE. We have 
also tested the new code in our production environment, and it works well.

Look forward to your further reply if I have misunderstood your concerns.

Thanks,
Boshi Yu

> 
> Thanks


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  2025-07-28  3:08     ` Boshi Yu
@ 2025-07-28  7:08       ` Leon Romanovsky
  2025-07-28  7:47         ` Cheng Xu
  0 siblings, 1 reply; 11+ messages in thread
From: Leon Romanovsky @ 2025-07-28  7:08 UTC (permalink / raw)
  To: Boshi Yu; +Cc: jgg, linux-rdma, chengyou, kaishen

On Mon, Jul 28, 2025 at 11:08:46AM +0800, Boshi Yu wrote:
> 
> 
> On 2025/7/27 19:27, Leon Romanovsky wrote:
> > On Fri, Jul 25, 2025 at 01:53:54PM +0800, Boshi Yu wrote:
> > > Each high-level indirect MTT entry is assumed to point to exactly one page
> > > of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
> > > pages when mapping. To avoid extra overhead from splitting merged regions,
> > > use dma_map_page to map the scatter MTT buffer page by page.
> > > 
> > > Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
> > > Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
> > > ---
> > >   drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++--------
> > >   drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
> > >   2 files changed, 71 insertions(+), 43 deletions(-)
> > 
> > <...>
> > 
> > > +	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
> > > +	if (!pg_dma)
> > > +		return 0;
> > > -	sg_init_table(sglist, npages);
> > > +	addr = buf;
> > >   	for (i = 0; i < npages; i++) {
> > > -		pg = vmalloc_to_page(buf);
> > > +		pg = vmalloc_to_page(addr);
> > 
> > <...>
> > > +
> > > +		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
> > > +					 DMA_TO_DEVICE);
> > 
> > Does it work?
> 
> Hi Leon,
> 
> I would like to confirm which part you think is not working properly. I
> guess that you might be concerned that if the buffer is not page-aligned, it
> could cause problems with dma_map_page.
> 
> In fact, when allocating the MTT buffer, we ensure that it is always
> page-aligned and that its length is a multiple of PAGE_SIZE. We have also
> tested the new code in our production environment, and it works well.
> 
> Look forward to your further reply if I have misunderstood your concerns.

DMA API expects Kmalloc addresses and not Vmalloc ones.

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  2025-07-28  7:08       ` Leon Romanovsky
@ 2025-07-28  7:47         ` Cheng Xu
  2025-07-28 13:12           ` Leon Romanovsky
  0 siblings, 1 reply; 11+ messages in thread
From: Cheng Xu @ 2025-07-28  7:47 UTC (permalink / raw)
  To: Leon Romanovsky, Boshi Yu; +Cc: jgg, linux-rdma, kaishen



On 7/28/25 3:08 PM, Leon Romanovsky wrote:
> On Mon, Jul 28, 2025 at 11:08:46AM +0800, Boshi Yu wrote:
>>
>>
>> On 2025/7/27 19:27, Leon Romanovsky wrote:
>>> On Fri, Jul 25, 2025 at 01:53:54PM +0800, Boshi Yu wrote:
>>>> Each high-level indirect MTT entry is assumed to point to exactly one page
>>>> of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
>>>> pages when mapping. To avoid extra overhead from splitting merged regions,
>>>> use dma_map_page to map the scatter MTT buffer page by page.
>>>>
>>>> Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
>>>> Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
>>>> ---
>>>>   drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++--------
>>>>   drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
>>>>   2 files changed, 71 insertions(+), 43 deletions(-)
>>>
>>> <...>
>>>
>>>> +	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
>>>> +	if (!pg_dma)
>>>> +		return 0;
>>>> -	sg_init_table(sglist, npages);
>>>> +	addr = buf;
>>>>   	for (i = 0; i < npages; i++) {
>>>> -		pg = vmalloc_to_page(buf);
>>>> +		pg = vmalloc_to_page(addr);
>>>
>>> <...>
>>>> +
>>>> +		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
>>>> +					 DMA_TO_DEVICE);
>>>
>>> Does it work?
>>
>> Hi Leon,
>>
>> I would like to confirm which part you think is not working properly. I
>> guess that you might be concerned that if the buffer is not page-aligned, it
>> could cause problems with dma_map_page.
>>
>> In fact, when allocating the MTT buffer, we ensure that it is always
>> page-aligned and that its length is a multiple of PAGE_SIZE. We have also
>> tested the new code in our production environment, and it works well.
>>
>> Look forward to your further reply if I have misunderstood your concerns.
> 
> DMA API expects Kmalloc addresses and not Vmalloc ones.
> 

Hi Leon,

Thanks for your reply. Could you provide some references for this point?
We cannot find the constraint in the Kernel Documentation.

To our best knowledge, vzalloc allocates enough pages from the page level
allocator, and vmalloc_to_page converts the buffer to 'struct page *', then
dma_map_page can accept 'struct page *' as an input parameter to generate
the DMA address.

We can find many similar uses in the kernel. 

For example, pds_vfio_dirty_seq_ack in drivers/vfio/pci/pds/dirty.c:
<...>
	for (unsigned long long i = 0; i < npages; i++) {
		struct page *page = vmalloc_to_page(bmp);

		if (!page) {
			err = -EFAULT;
			goto out_free_pages;
		}

		pages[i] = page;
		bmp += PAGE_SIZE;
	}

	err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset,
					bmp_bytes, GFP_KERNEL);
	if (err)
		goto out_free_pages;

	err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0);
	if (err)
		goto out_free_sg_table;
<...>

Another example, irdma_map_vm_page_list in drivers/infiniband/hw/irdma/utils.c:
<...>
	for (i = 0; i < pg_cnt; i++) {
		vm_page = vmalloc_to_page(addr);
		if (!vm_page)
			goto err;

		pg_dma[i] = dma_map_page(hw->device, vm_page, 0, PAGE_SIZE,
					 DMA_BIDIRECTIONAL);
		if (dma_mapping_error(hw->device, pg_dma[i]))
			goto err;

		addr += PAGE_SIZE;
	}

<...>

If we have misunderstood something, please point it out and we would appreciate it.

Thanks,
Cheng Xu

> Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer
  2025-07-28  7:47         ` Cheng Xu
@ 2025-07-28 13:12           ` Leon Romanovsky
  0 siblings, 0 replies; 11+ messages in thread
From: Leon Romanovsky @ 2025-07-28 13:12 UTC (permalink / raw)
  To: Cheng Xu; +Cc: Boshi Yu, jgg, linux-rdma, kaishen

On Mon, Jul 28, 2025 at 03:47:57PM +0800, Cheng Xu wrote:
> 
> 
> On 7/28/25 3:08 PM, Leon Romanovsky wrote:
> > On Mon, Jul 28, 2025 at 11:08:46AM +0800, Boshi Yu wrote:
> >>
> >>
> >> On 2025/7/27 19:27, Leon Romanovsky wrote:
> >>> On Fri, Jul 25, 2025 at 01:53:54PM +0800, Boshi Yu wrote:
> >>>> Each high-level indirect MTT entry is assumed to point to exactly one page
> >>>> of the low-level MTT buffer, but dma_map_sg may merge contiguous physical
> >>>> pages when mapping. To avoid extra overhead from splitting merged regions,
> >>>> use dma_map_page to map the scatter MTT buffer page by page.
> >>>>
> >>>> Reviewed-by: Cheng Xu <chengyou@linux.alibaba.com>
> >>>> Signed-off-by: Boshi Yu <boshiyu@linux.alibaba.com>
> >>>> ---
> >>>>   drivers/infiniband/hw/erdma/erdma_verbs.c | 110 ++++++++++++++--------
> >>>>   drivers/infiniband/hw/erdma/erdma_verbs.h |   4 +-
> >>>>   2 files changed, 71 insertions(+), 43 deletions(-)
> >>>
> >>> <...>
> >>>
> >>>> +	pg_dma = vzalloc(npages * sizeof(dma_addr_t));
> >>>> +	if (!pg_dma)
> >>>> +		return 0;
> >>>> -	sg_init_table(sglist, npages);
> >>>> +	addr = buf;
> >>>>   	for (i = 0; i < npages; i++) {
> >>>> -		pg = vmalloc_to_page(buf);
> >>>> +		pg = vmalloc_to_page(addr);
> >>>
> >>> <...>
> >>>> +
> >>>> +		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
> >>>> +					 DMA_TO_DEVICE);
> >>>
> >>> Does it work?
> >>
> >> Hi Leon,
> >>
> >> I would like to confirm which part you think is not working properly. I
> >> guess that you might be concerned that if the buffer is not page-aligned, it
> >> could cause problems with dma_map_page.
> >>
> >> In fact, when allocating the MTT buffer, we ensure that it is always
> >> page-aligned and that its length is a multiple of PAGE_SIZE. We have also
> >> tested the new code in our production environment, and it works well.
> >>
> >> Look forward to your further reply if I have misunderstood your concerns.
> > 
> > DMA API expects Kmalloc addresses and not Vmalloc ones.
> > 
> 
> Hi Leon,
> 
> Thanks for your reply. Could you provide some references for this point?
> We cannot find the constraint in the Kernel Documentation.

Documentation/core-api/dma-api.rst.

The reason to such constraint is a need to get contiguous memory, which
vzalloc doesn't guarantee. In your case, it works because you have
vmalloc_to_page() call.

So everything is ok, sorry for the noise.

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver
  2025-07-25  5:53 [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Boshi Yu
                   ` (2 preceding siblings ...)
  2025-07-25  5:53 ` [PATCH for-next 3/3] RDMA/erdma: Fix unset QPN of GSI QP Boshi Yu
@ 2025-08-13 10:27 ` Leon Romanovsky
  3 siblings, 0 replies; 11+ messages in thread
From: Leon Romanovsky @ 2025-08-13 10:27 UTC (permalink / raw)
  To: Boshi Yu; +Cc: jgg, linux-rdma, chengyou, kaishen

On Fri, Jul 25, 2025 at 01:53:53PM +0800, Boshi Yu wrote:
> Hi,
> 
> This series of patches provides several fixes for the erdma driver:
> - #1 uses dma_map_page to map the scatter MTT buffer page by page to avoid
>      merging contiguous physical pages.
> - #2 fixes ignored return value of init_kernel_qp.
> - #3 fixes unset QPN of GSI QP.
> 
> Thanks,
> Boshi Yu
> 
> Boshi Yu (3):
>   RDMA/erdma: Use dma_map_page to map scatter MTT buffer

Applied to for-next.

>   RDMA/erdma: Fix ignored return value of init_kernel_qp
>   RDMA/erdma: Fix unset QPN of GSI QP

Applied to for-rc.

Thanks

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2025-08-13 10:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-25  5:53 [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Boshi Yu
2025-07-25  5:53 ` [PATCH for-next 1/3] RDMA/erdma: Use dma_map_page to map scatter MTT buffer Boshi Yu
2025-07-27 11:27   ` Leon Romanovsky
2025-07-28  3:08     ` Boshi Yu
2025-07-28  7:08       ` Leon Romanovsky
2025-07-28  7:47         ` Cheng Xu
2025-07-28 13:12           ` Leon Romanovsky
2025-07-25  5:53 ` [PATCH for-next 2/3] RDMA/erdma: Fix ignored return value of init_kernel_qp Boshi Yu
2025-07-25  5:53 ` [PATCH for-next 3/3] RDMA/erdma: Fix unset QPN of GSI QP Boshi Yu
2025-07-25 15:26   ` Zhu Yanjun
2025-08-13 10:27 ` [PATCH for-next 0/3] RDMA/erdma: Misc fixes for the erdma driver Leon Romanovsky

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).