public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration
@ 2021-05-21  9:29 Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 1/5] RDMA/hns: Optimize the base address table config for MTR Weihang Li
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Weihang Li @ 2021-05-21  9:29 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Weihang Li

These series includes several cleanups and fixes for HEM and MTR.

Weihang Li (1):
  RDMA/hns: Use refcount_t APIs for HEM

Xi Wang (4):
  RDMA/hns: Optimize the base address table config for MTR
  RDMA/hns: Refactor root BT allocation for MTR
  RDMA/hns: Fix wrong timer context buffer page size
  RDMA/hns: Clean the hardware related code for HEM

 drivers/infiniband/hw/hns/hns_roce_alloc.c  |  51 ++---
 drivers/infiniband/hw/hns/hns_roce_cq.c     |   4 +-
 drivers/infiniband/hw/hns/hns_roce_device.h |  17 +-
 drivers/infiniband/hw/hns/hns_roce_hem.c    | 342 +++++++++++++---------------
 drivers/infiniband/hw/hns/hns_roce_hem.h    |  13 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  |  77 +++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |   5 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  32 ++-
 drivers/infiniband/hw/hns/hns_roce_mr.c     |  14 +-
 drivers/infiniband/hw/hns/hns_roce_qp.c     |   2 +-
 drivers/infiniband/hw/hns/hns_roce_srq.c    |  10 +-
 11 files changed, 315 insertions(+), 252 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH for-next 1/5] RDMA/hns: Optimize the base address table config for MTR
  2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
@ 2021-05-21  9:29 ` Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 2/5] RDMA/hns: Refactor root BT allocation " Weihang Li
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Weihang Li @ 2021-05-21  9:29 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Xi Wang, Weihang Li

From: Xi Wang <wangxi11@huawei.com>

The base address table is allocated by dma allocator, and the size is
always aligned to PAGE_SIZE. If a fixed size is used to allocate the table,
the number of base address entries stored in the table will be smaller than
that can actually stored.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_alloc.c  | 51 ++++++++++++-----------------
 drivers/infiniband/hw/hns/hns_roce_cq.c     |  4 +--
 drivers/infiniband/hw/hns/hns_roce_device.h | 15 ++++++---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 26 ++++++++++++---
 drivers/infiniband/hw/hns/hns_roce_mr.c     | 14 ++++----
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  2 +-
 drivers/infiniband/hw/hns/hns_roce_srq.c    | 10 +++---
 7 files changed, 68 insertions(+), 54 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 5d389ed..51374b68 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -208,10 +208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
 
 	/* Calc the trunk size and num by required size and page_shift */
 	if (flags & HNS_ROCE_BUF_DIRECT) {
-		buf->trunk_shift = ilog2(ALIGN(size, PAGE_SIZE));
+		buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE));
 		ntrunk = 1;
 	} else {
-		buf->trunk_shift = ilog2(ALIGN(page_size, PAGE_SIZE));
+		buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE));
 		ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
 	}
 
@@ -252,50 +252,41 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
 }
 
 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
-			   int buf_cnt, int start, struct hns_roce_buf *buf)
+			   int buf_cnt, struct hns_roce_buf *buf,
+			   unsigned int page_shift)
 {
-	int i, end;
-	int total;
-
-	end = start + buf_cnt;
-	if (end > buf->npages) {
-		dev_err(hr_dev->dev,
-			"failed to check kmem bufs, end %d + %d total %u!\n",
-			start, buf_cnt, buf->npages);
+	unsigned int offset, max_size;
+	int total = 0;
+	int i;
+
+	if (page_shift > buf->trunk_shift) {
+		dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n",
+			page_shift, buf->trunk_shift);
 		return -EINVAL;
 	}
 
-	total = 0;
-	for (i = start; i < end; i++)
-		bufs[total++] = hns_roce_buf_page(buf, i);
+	offset = 0;
+	max_size = buf->ntrunks << buf->trunk_shift;
+	for (i = 0; i < buf_cnt && offset < max_size; i++) {
+		bufs[total++] = hns_roce_buf_dma_addr(buf, offset);
+		offset += (1 << page_shift);
+	}
 
 	return total;
 }
 
 int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
-			   int buf_cnt, int start, struct ib_umem *umem,
+			   int buf_cnt, struct ib_umem *umem,
 			   unsigned int page_shift)
 {
 	struct ib_block_iter biter;
 	int total = 0;
-	int idx = 0;
-	u64 addr;
-
-	if (page_shift < HNS_HW_PAGE_SHIFT) {
-		dev_err(hr_dev->dev, "failed to check umem page shift %u!\n",
-			page_shift);
-		return -EINVAL;
-	}
 
 	/* convert system page cnt to hw page cnt */
 	rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
-		addr = rdma_block_iter_dma_address(&biter);
-		if (idx >= start) {
-			bufs[total++] = addr;
-			if (total >= buf_cnt)
-				goto done;
-		}
-		idx++;
+		bufs[total++] = rdma_block_iter_dma_address(&biter);
+		if (total >= buf_cnt)
+			goto done;
 	}
 
 done:
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 488d86b..a5a74b6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -202,13 +202,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
 	struct hns_roce_buf_attr buf_attr = {};
 	int ret;
 
-	buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+	buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT;
 	buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
 	buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
 	buf_attr.region_count = 1;
 
 	ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
-				  hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT,
+				  hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT,
 				  udata, addr);
 	if (ret)
 		ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 111cab5..02fffb7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -1060,14 +1060,18 @@ static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
 			(offset & ((1 << buf->trunk_shift) - 1));
 }
 
-static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
+static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf,
+					       unsigned int offset)
 {
-	unsigned int offset = idx << buf->page_shift;
-
 	return buf->trunk_list[offset >> buf->trunk_shift].map +
 			(offset & ((1 << buf->trunk_shift) - 1));
 }
 
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
+{
+	return hns_roce_buf_dma_addr(buf, idx << buf->page_shift);
+}
+
 #define hr_hw_page_align(x)		ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
 
 static inline u64 to_hr_hw_page_addr(u64 addr)
@@ -1204,9 +1208,10 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
 					u32 page_shift, u32 flags);
 
 int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
-			   int buf_cnt, int start, struct hns_roce_buf *buf);
+			   int buf_cnt, struct hns_roce_buf *buf,
+			   unsigned int page_shift);
 int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
-			   int buf_cnt, int start, struct ib_umem *umem,
+			   int buf_cnt, struct ib_umem *umem,
 			   unsigned int page_shift);
 
 int hns_roce_create_srq(struct ib_srq *srq,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index d5e72e5..0c12d87 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2018,6 +2018,8 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
 	caps->llm_buf_pg_sz = 0;
 
 	/* MR */
+	caps->mpt_ba_pg_sz = 0;
+	caps->mpt_buf_pg_sz = 0;
 	caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
 	caps->pbl_buf_pg_sz = 0;
 	calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
@@ -2025,8 +2027,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
 		   HEM_TYPE_MTPT);
 
 	/* QP */
-	caps->qpc_timer_ba_pg_sz  = 0;
+	caps->qpc_ba_pg_sz = 0;
+	caps->qpc_buf_pg_sz = 0;
+	caps->qpc_timer_ba_pg_sz = 0;
 	caps->qpc_timer_buf_pg_sz = 0;
+	caps->sccc_ba_pg_sz = 0;
+	caps->sccc_buf_pg_sz = 0;
 	caps->mtt_ba_pg_sz = 0;
 	caps->mtt_buf_pg_sz = 0;
 	calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
@@ -2039,6 +2045,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
 			   &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC);
 
 	/* CQ */
+	caps->cqc_ba_pg_sz = 0;
+	caps->cqc_buf_pg_sz = 0;
+	caps->cqc_timer_ba_pg_sz = 0;
+	caps->cqc_timer_buf_pg_sz = 0;
+	caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
+	caps->cqe_buf_pg_sz = 0;
 	calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
 		   caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
 		   HEM_TYPE_CQC);
@@ -2053,6 +2065,12 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
 
 	/* SRQ */
 	if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) {
+		caps->srqc_ba_pg_sz = 0;
+		caps->srqc_buf_pg_sz = 0;
+		caps->srqwqe_ba_pg_sz = 0;
+		caps->srqwqe_buf_pg_sz = 0;
+		caps->idx_ba_pg_sz = 0;
+		caps->idx_buf_pg_sz = 0;
 		calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz,
 			   caps->srqc_hop_num, caps->srqc_bt_num,
 			   &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz,
@@ -6161,14 +6179,14 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
 	else
 		eq->hop_num = hr_dev->caps.eqe_hop_num;
 
-	buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+	buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT;
 	buf_attr.region[0].size = eq->entries * eq->eqe_size;
 	buf_attr.region[0].hopnum = eq->hop_num;
 	buf_attr.region_count = 1;
 
 	err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
-				  hr_dev->caps.eqe_ba_pg_sz +
-				  HNS_HW_PAGE_SHIFT, NULL, 0);
+				  hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL,
+				  0);
 	if (err)
 		dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 79b3c30..8e6b1ae 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -122,7 +122,7 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
 	buf_attr.mtt_only = is_fast;
 
 	err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
-				  hr_dev->caps.pbl_ba_pg_sz + HNS_HW_PAGE_SHIFT,
+				  hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
 				  udata, start);
 	if (err)
 		ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
@@ -737,11 +737,11 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
 		return -ENOMEM;
 
 	if (mtr->umem)
-		npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0,
+		npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
 					       mtr->umem, page_shift);
 	else
-		npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0,
-					       mtr->kmem);
+		npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
+					       mtr->kmem, page_shift);
 
 	if (npage != page_count) {
 		ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
@@ -753,8 +753,8 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
 	if (mtr->hem_cfg.is_direct && npage > 1) {
 		ret = mtr_check_direct_pages(pages, npage, page_shift);
 		if (ret) {
-			ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n",
-				  mtr->umem ? "user" : "kernel", ret);
+			ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
+				  mtr->umem ? "umtr" : "kmtr", ret, npage);
 			ret = -ENOBUFS;
 			goto err_alloc_list;
 		}
@@ -799,7 +799,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
 		if (r->offset + r->count > page_cnt) {
 			ret = -EINVAL;
 			ibdev_err(ibdev,
-				  "failed to check mtr%u end %u + %u, max %u.\n",
+				  "failed to check mtr%u count %u + %u > %u.\n",
 				  i, r->offset, r->count, page_cnt);
 			return ret;
 		}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index c6e120e..9203cf1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -761,7 +761,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 		goto err_inline;
 	}
 	ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
-				  HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
+				  PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
 				  udata, addr);
 	if (ret) {
 		ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 546d182..c842210 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -167,14 +167,14 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
 
 	srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
 
-	buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+	buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT;
 	buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
 					srq->idx_que.entry_shift);
 	buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
 	buf_attr.region_count = 1;
 
 	ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
-				  hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT,
+				  hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT,
 				  udata, addr);
 	if (ret) {
 		ibdev_err(ibdev,
@@ -222,15 +222,15 @@ static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
 						      HNS_ROCE_SGE_SIZE *
 						      srq->max_gs)));
 
-	buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT;
+	buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT;
 	buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
 							 srq->wqe_shift);
 	buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
 	buf_attr.region_count = 1;
 
 	ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
-				  hr_dev->caps.srqwqe_ba_pg_sz +
-				  HNS_HW_PAGE_SHIFT, udata, addr);
+				  hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT,
+				  udata, addr);
 	if (ret)
 		ibdev_err(ibdev,
 			  "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-next 2/5] RDMA/hns: Refactor root BT allocation for MTR
  2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 1/5] RDMA/hns: Optimize the base address table config for MTR Weihang Li
@ 2021-05-21  9:29 ` Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 3/5] RDMA/hns: Fix wrong timer context buffer page size Weihang Li
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Weihang Li @ 2021-05-21  9:29 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Xi Wang, Weihang Li

From: Xi Wang <wangxi11@huawei.com>

Split the hem_list_alloc_root_bt() into serval small functions to make the
code flow more clear.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hem.c | 230 ++++++++++++++++++++-----------
 1 file changed, 146 insertions(+), 84 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index cfd2e1b..63776da 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1053,7 +1053,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
 	hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
 }
 
-struct roce_hem_item {
+struct hns_roce_hem_item {
 	struct list_head list; /* link all hems in the same bt level */
 	struct list_head sibling; /* link all hems in last hop for mtt */
 	void *addr;
@@ -1063,12 +1063,18 @@ struct roce_hem_item {
 	int end; /* end buf offset in this hem */
 };
 
-static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev,
-						   int start, int end,
-						   int count, bool exist_bt,
-						   int bt_level)
+/* All HEM items are linked in a tree structure */
+struct hns_roce_hem_head {
+	struct list_head branch[HNS_ROCE_MAX_BT_REGION];
+	struct list_head root;
+	struct list_head leaf;
+};
+
+static struct hns_roce_hem_item *
+hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
+		    bool exist_bt, int bt_level)
 {
-	struct roce_hem_item *hem;
+	struct hns_roce_hem_item *hem;
 
 	hem = kzalloc(sizeof(*hem), GFP_KERNEL);
 	if (!hem)
@@ -1093,7 +1099,7 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev,
 }
 
 static void hem_list_free_item(struct hns_roce_dev *hr_dev,
-			       struct roce_hem_item *hem, bool exist_bt)
+			       struct hns_roce_hem_item *hem, bool exist_bt)
 {
 	if (exist_bt)
 		dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
@@ -1104,7 +1110,7 @@ static void hem_list_free_item(struct hns_roce_dev *hr_dev,
 static void hem_list_free_all(struct hns_roce_dev *hr_dev,
 			      struct list_head *head, bool exist_bt)
 {
-	struct roce_hem_item *hem, *temp_hem;
+	struct hns_roce_hem_item *hem, *temp_hem;
 
 	list_for_each_entry_safe(hem, temp_hem, head, list) {
 		list_del(&hem->list);
@@ -1120,24 +1126,24 @@ static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr,
 
 /* assign L0 table address to hem from root bt */
 static void hem_list_assign_bt(struct hns_roce_dev *hr_dev,
-			       struct roce_hem_item *hem, void *cpu_addr,
+			       struct hns_roce_hem_item *hem, void *cpu_addr,
 			       u64 phy_addr)
 {
 	hem->addr = cpu_addr;
 	hem->dma_addr = (dma_addr_t)phy_addr;
 }
 
-static inline bool hem_list_page_is_in_range(struct roce_hem_item *hem,
+static inline bool hem_list_page_is_in_range(struct hns_roce_hem_item *hem,
 					     int offset)
 {
 	return (hem->start <= offset && offset <= hem->end);
 }
 
-static struct roce_hem_item *hem_list_search_item(struct list_head *ba_list,
-						    int page_offset)
+static struct hns_roce_hem_item *hem_list_search_item(struct list_head *ba_list,
+						      int page_offset)
 {
-	struct roce_hem_item *hem, *temp_hem;
-	struct roce_hem_item *found = NULL;
+	struct hns_roce_hem_item *hem, *temp_hem;
+	struct hns_roce_hem_item *found = NULL;
 
 	list_for_each_entry_safe(hem, temp_hem, ba_list, list) {
 		if (hem_list_page_is_in_range(hem, page_offset)) {
@@ -1227,9 +1233,9 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
 				 int offset, struct list_head *mid_bt,
 				 struct list_head *btm_bt)
 {
-	struct roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
+	struct hns_roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
 	struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL];
-	struct roce_hem_item *cur, *pre;
+	struct hns_roce_hem_item *cur, *pre;
 	const int hopnum = r->hopnum;
 	int start_aligned;
 	int distance;
@@ -1307,56 +1313,96 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
 	return ret;
 }
 
-static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
-				  struct hns_roce_hem_list *hem_list, int unit,
-				  const struct hns_roce_buf_region *regions,
-				  int region_cnt)
+static struct hns_roce_hem_item *
+alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num,
+	       const struct hns_roce_buf_region *regions, int region_cnt)
 {
-	struct list_head temp_list[HNS_ROCE_MAX_BT_REGION];
-	struct roce_hem_item *hem, *temp_hem, *root_hem;
 	const struct hns_roce_buf_region *r;
-	struct list_head temp_root;
-	struct list_head temp_btm;
-	void *cpu_base;
-	u64 phy_base;
-	int ret = 0;
+	struct hns_roce_hem_item *hem;
 	int ba_num;
 	int offset;
-	int total;
-	int step;
-	int i;
-
-	r = &regions[0];
-	root_hem = hem_list_search_item(&hem_list->root_bt, r->offset);
-	if (root_hem)
-		return 0;
 
 	ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
 	if (ba_num < 1)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	if (ba_num > unit)
-		return -ENOBUFS;
+		return ERR_PTR(-ENOBUFS);
 
-	ba_num = min_t(int, ba_num, unit);
-	INIT_LIST_HEAD(&temp_root);
-	offset = r->offset;
+	offset = regions[0].offset;
 	/* indicate to last region */
 	r = &regions[region_cnt - 1];
-	root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
-				       ba_num, true, 0);
-	if (!root_hem)
+	hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
+				  ba_num, true, 0);
+	if (!hem)
+		return ERR_PTR(-ENOMEM);
+
+	*max_ba_num = ba_num;
+
+	return hem;
+}
+
+static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+			      u64 phy_base, const struct hns_roce_buf_region *r,
+			      struct list_head *branch_head,
+			      struct list_head *leaf_head)
+{
+	struct hns_roce_hem_item *hem;
+
+	hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
+				  r->count, false, 0);
+	if (!hem)
 		return -ENOMEM;
-	list_add(&root_hem->list, &temp_root);
 
-	hem_list->root_ba = root_hem->dma_addr;
+	hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
+	list_add(&hem->list, branch_head);
+	list_add(&hem->sibling, leaf_head);
 
-	INIT_LIST_HEAD(&temp_btm);
-	for (i = 0; i < region_cnt; i++)
-		INIT_LIST_HEAD(&temp_list[i]);
+	return r->count;
+}
+
+static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+			   int unit, const struct hns_roce_buf_region *r,
+			   const struct list_head *branch_head)
+{
+	struct hns_roce_hem_item *hem, *temp_hem;
+	int total = 0;
+	int offset;
+	int step;
+
+	step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+	if (step < 1)
+		return -EINVAL;
+
+	/* if exist mid bt, link L1 to L0 */
+	list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
+		offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
+		hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr);
+		total++;
+	}
+
+	return total;
+}
+
+static int
+setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list,
+	       int unit, int max_ba_num, struct hns_roce_hem_head *head,
+	       const struct hns_roce_buf_region *regions, int region_cnt)
+{
+	const struct hns_roce_buf_region *r;
+	struct hns_roce_hem_item *root_hem;
+	void *cpu_base;
+	u64 phy_base;
+	int i, total;
+	int ret;
+
+	root_hem = list_first_entry(&head->root,
+				    struct hns_roce_hem_item, list);
+	if (!root_hem)
+		return -ENOMEM;
 
 	total = 0;
-	for (i = 0; i < region_cnt && total < ba_num; i++) {
+	for (i = 0; i < region_cnt && total < max_ba_num; i++) {
 		r = &regions[i];
 		if (!r->count)
 			continue;
@@ -1368,48 +1414,64 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
 		/* if hopnum is 0 or 1, cut a new fake hem from the root bt
 		 * which's address share to all regions.
 		 */
-		if (hem_list_is_bottom_bt(r->hopnum, 0)) {
-			hem = hem_list_alloc_item(hr_dev, r->offset,
-						  r->offset + r->count - 1,
-						  r->count, false, 0);
-			if (!hem) {
-				ret = -ENOMEM;
-				goto err_exit;
-			}
-			hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
-			list_add(&hem->list, &temp_list[i]);
-			list_add(&hem->sibling, &temp_btm);
-			total += r->count;
-		} else {
-			step = hem_list_calc_ba_range(r->hopnum, 1, unit);
-			if (step < 1) {
-				ret = -EINVAL;
-				goto err_exit;
-			}
-			/* if exist mid bt, link L1 to L0 */
-			list_for_each_entry_safe(hem, temp_hem,
-					  &hem_list->mid_bt[i][1], list) {
-				offset = (hem->start - r->offset) / step *
-					  BA_BYTE_LEN;
-				hem_list_link_bt(hr_dev, cpu_base + offset,
-						 hem->dma_addr);
-				total++;
-			}
-		}
+		if (hem_list_is_bottom_bt(r->hopnum, 0))
+			ret = alloc_fake_root_bt(hr_dev, cpu_base, phy_base, r,
+						 &head->branch[i], &head->leaf);
+		else
+			ret = setup_middle_bt(hr_dev, cpu_base, unit, r,
+					      &hem_list->mid_bt[i][1]);
+
+		if (ret < 0)
+			return ret;
+
+		total += ret;
 	}
 
-	list_splice(&temp_btm, &hem_list->btm_bt);
-	list_splice(&temp_root, &hem_list->root_bt);
+	list_splice(&head->leaf, &hem_list->btm_bt);
+	list_splice(&head->root, &hem_list->root_bt);
 	for (i = 0; i < region_cnt; i++)
-		list_splice(&temp_list[i], &hem_list->mid_bt[i][0]);
+		list_splice(&head->branch[i], &hem_list->mid_bt[i][0]);
 
 	return 0;
+}
 
-err_exit:
+static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
+				  struct hns_roce_hem_list *hem_list, int unit,
+				  const struct hns_roce_buf_region *regions,
+				  int region_cnt)
+{
+	struct hns_roce_hem_item *root_hem;
+	struct hns_roce_hem_head head;
+	int max_ba_num;
+	int ret;
+	int i;
+
+	root_hem = hem_list_search_item(&hem_list->root_bt, regions[0].offset);
+	if (root_hem)
+		return 0;
+
+	max_ba_num = 0;
+	root_hem = alloc_root_hem(hr_dev, unit, &max_ba_num, regions,
+				  region_cnt);
+	if (IS_ERR(root_hem))
+		return PTR_ERR(root_hem);
+
+	/* List head for storing all allocated HEM items */
+	INIT_LIST_HEAD(&head.root);
+	INIT_LIST_HEAD(&head.leaf);
 	for (i = 0; i < region_cnt; i++)
-		hem_list_free_all(hr_dev, &temp_list[i], false);
+		INIT_LIST_HEAD(&head.branch[i]);
 
-	hem_list_free_all(hr_dev, &temp_root, true);
+	hem_list->root_ba = root_hem->dma_addr;
+	list_add(&root_hem->list, &head.root);
+	ret = setup_root_hem(hr_dev, hem_list, unit, max_ba_num, &head, regions,
+			     region_cnt);
+	if (ret) {
+		for (i = 0; i < region_cnt; i++)
+			hem_list_free_all(hr_dev, &head.branch[i], false);
+
+		hem_list_free_all(hr_dev, &head.root, true);
+	}
 
 	return ret;
 }
@@ -1495,7 +1557,7 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
 				 int offset, int *mtt_cnt, u64 *phy_addr)
 {
 	struct list_head *head = &hem_list->btm_bt;
-	struct roce_hem_item *hem, *temp_hem;
+	struct hns_roce_hem_item *hem, *temp_hem;
 	void *cpu_base = NULL;
 	u64 phy_base = 0;
 	int nr = 0;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-next 3/5] RDMA/hns: Fix wrong timer context buffer page size
  2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 1/5] RDMA/hns: Optimize the base address table config for MTR Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 2/5] RDMA/hns: Refactor root BT allocation " Weihang Li
@ 2021-05-21  9:29 ` Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 4/5] RDMA/hns: Use refcount_t APIs for HEM Weihang Li
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Weihang Li @ 2021-05-21  9:29 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Xi Wang, Weihang Li

From: Xi Wang <wangxi11@huawei.com>

The HEM page size for QPC timer and CQC timer is always 4K and there's no
need to calculate a different size by the hns driver, otherwise the ROCEE
may access an invalid address.

Fixes: 719d13415f59 ("RDMA/hns: Remove duplicated hem page size config code")
Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0c12d87..17cf51a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2057,12 +2057,6 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
 	calc_pg_sz(caps->max_cqes, caps->cqe_sz, caps->cqe_hop_num,
 		   1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
 
-	if (caps->cqc_timer_entry_sz)
-		calc_pg_sz(caps->num_cqc_timer, caps->cqc_timer_entry_sz,
-			   caps->cqc_timer_hop_num, caps->cqc_timer_bt_num,
-			   &caps->cqc_timer_buf_pg_sz,
-			   &caps->cqc_timer_ba_pg_sz, HEM_TYPE_CQC_TIMER);
-
 	/* SRQ */
 	if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) {
 		caps->srqc_ba_pg_sz = 0;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-next 4/5] RDMA/hns: Use refcount_t APIs for HEM
  2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
                   ` (2 preceding siblings ...)
  2021-05-21  9:29 ` [PATCH for-next 3/5] RDMA/hns: Fix wrong timer context buffer page size Weihang Li
@ 2021-05-21  9:29 ` Weihang Li
  2021-05-21  9:29 ` [PATCH for-next 5/5] RDMA/hns: Clean the hardware related code " Weihang Li
  2021-05-28 23:20 ` [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Jason Gunthorpe
  5 siblings, 0 replies; 7+ messages in thread
From: Weihang Li @ 2021-05-21  9:29 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Weihang Li

refcount_t is better than integer for reference counting, it will WARN on
overflow/underflow and avoid use-after-free risks.

Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_hem.c | 32 +++++++++++++++-----------------
 drivers/infiniband/hw/hns/hns_roce_hem.h |  4 ++--
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 63776da..fe6940b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -271,7 +271,6 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
 	if (!hem)
 		return NULL;
 
-	hem->refcount = 0;
 	INIT_LIST_HEAD(&hem->chunk_list);
 
 	order = get_order(hem_alloc_size);
@@ -618,7 +617,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
 
 	mutex_lock(&table->mutex);
 	if (table->hem[index.buf]) {
-		++table->hem[index.buf]->refcount;
+		refcount_inc(&table->hem[index.buf]->refcount);
 		goto out;
 	}
 
@@ -637,7 +636,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
 		}
 	}
 
-	++table->hem[index.buf]->refcount;
+	refcount_set(&table->hem[index.buf]->refcount, 1);
 	goto out;
 
 err_alloc:
@@ -663,7 +662,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
 	mutex_lock(&table->mutex);
 
 	if (table->hem[i]) {
-		++table->hem[i]->refcount;
+		refcount_inc(&table->hem[i]->refcount);
 		goto out;
 	}
 
@@ -686,7 +685,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
 		goto out;
 	}
 
-	++table->hem[i]->refcount;
+	refcount_set(&table->hem[i]->refcount, 1);
 out:
 	mutex_unlock(&table->mutex);
 	return ret;
@@ -753,11 +752,11 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
 		return;
 	}
 
-	mutex_lock(&table->mutex);
-	if (check_refcount && (--table->hem[index.buf]->refcount > 0)) {
-		mutex_unlock(&table->mutex);
+	if (!check_refcount)
+		mutex_lock(&table->mutex);
+	else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount,
+					      &table->mutex))
 		return;
-	}
 
 	clear_mhop_hem(hr_dev, table, obj, &mhop, &index);
 	free_mhop_hem(hr_dev, table, &mhop, &index);
@@ -779,16 +778,15 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
 	i = (obj & (table->num_obj - 1)) /
 	    (table->table_chunk_size / table->obj_size);
 
-	mutex_lock(&table->mutex);
+	if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
+					 &table->mutex))
+		return;
 
-	if (--table->hem[i]->refcount == 0) {
-		/* Clear HEM base address */
-		if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
-			dev_warn(dev, "Clear HEM base address failed.\n");
+	if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
+		dev_warn(dev, "failed to clear HEM base address.\n");
 
-		hns_roce_free_hem(hr_dev, table->hem[i]);
-		table->hem[i] = NULL;
-	}
+	hns_roce_free_hem(hr_dev, table->hem[i]);
+	table->hem[i] = NULL;
 
 	mutex_unlock(&table->mutex);
 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 13fdeb3..ffa65e8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -88,8 +88,8 @@ struct hns_roce_hem_chunk {
 };
 
 struct hns_roce_hem {
-	struct list_head	 chunk_list;
-	int			 refcount;
+	struct list_head chunk_list;
+	refcount_t refcount;
 };
 
 struct hns_roce_hem_iter {
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH for-next 5/5] RDMA/hns: Clean the hardware related code for HEM
  2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
                   ` (3 preceding siblings ...)
  2021-05-21  9:29 ` [PATCH for-next 4/5] RDMA/hns: Use refcount_t APIs for HEM Weihang Li
@ 2021-05-21  9:29 ` Weihang Li
  2021-05-28 23:20 ` [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Jason Gunthorpe
  5 siblings, 0 replies; 7+ messages in thread
From: Weihang Li @ 2021-05-21  9:29 UTC (permalink / raw)
  To: dledford, jgg; +Cc: leon, linux-rdma, linuxarm, Xi Wang, Weihang Li

From: Xi Wang <wangxi11@huawei.com>

Move the HIP06 related code to the hw v1 source file for HEM.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  2 -
 drivers/infiniband/hw/hns/hns_roce_hem.c    | 82 +----------------------------
 drivers/infiniband/hw/hns/hns_roce_hem.h    |  9 +---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c  | 77 +++++++++++++++++++++++++++
 drivers/infiniband/hw/hns/hns_roce_hw_v1.h  |  5 ++
 5 files changed, 85 insertions(+), 90 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 02fffb7..c6cacd2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -47,8 +47,6 @@
 
 #define HNS_ROCE_IB_MIN_SQ_STRIDE		6
 
-#define HNS_ROCE_BA_SIZE			(32 * 4096)
-
 #define BA_BYTE_LEN				8
 
 /* Hardware specification only for v1 engine */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index fe6940b..7fdeedd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -36,9 +36,6 @@
 #include "hns_roce_hem.h"
 #include "hns_roce_common.h"
 
-#define DMA_ADDR_T_SHIFT		12
-#define BT_BA_SHIFT			32
-
 #define HEM_INDEX_BUF			BIT(0)
 #define HEM_INDEX_L0			BIT(1)
 #define HEM_INDEX_L1			BIT(2)
@@ -337,81 +334,6 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
 	kfree(hem);
 }
 
-static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
-			    struct hns_roce_hem_table *table, unsigned long obj)
-{
-	spinlock_t *lock = &hr_dev->bt_cmd_lock;
-	struct device *dev = hr_dev->dev;
-	struct hns_roce_hem_iter iter;
-	void __iomem *bt_cmd;
-	__le32 bt_cmd_val[2];
-	__le32 bt_cmd_h = 0;
-	unsigned long flags;
-	__le32 bt_cmd_l;
-	int ret = 0;
-	u64 bt_ba;
-	long end;
-
-	/* Find the HEM(Hardware Entry Memory) entry */
-	unsigned long i = (obj & (table->num_obj - 1)) /
-			  (table->table_chunk_size / table->obj_size);
-
-	switch (table->type) {
-	case HEM_TYPE_QPC:
-	case HEM_TYPE_MTPT:
-	case HEM_TYPE_CQC:
-	case HEM_TYPE_SRQC:
-		roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-			ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
-		break;
-	default:
-		return ret;
-	}
-
-	roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
-		       ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
-	roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
-	roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
-	/* Currently iter only a chunk */
-	for (hns_roce_hem_first(table->hem[i], &iter);
-	     !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
-		bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT;
-
-		spin_lock_irqsave(lock, flags);
-
-		bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
-		end = HW_SYNC_TIMEOUT_MSECS;
-		while (end > 0) {
-			if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT))
-				break;
-
-			mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
-			end -= HW_SYNC_SLEEP_TIME_INTERVAL;
-		}
-
-		if (end <= 0) {
-			dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
-			spin_unlock_irqrestore(lock, flags);
-			return -EBUSY;
-		}
-
-		bt_cmd_l = cpu_to_le32(bt_ba);
-		roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
-			       ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
-			       bt_ba >> BT_BA_SHIFT);
-
-		bt_cmd_val[0] = bt_cmd_l;
-		bt_cmd_val[1] = bt_cmd_h;
-		hns_roce_write64_k(bt_cmd_val,
-				   hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
-		spin_unlock_irqrestore(lock, flags);
-	}
-
-	return ret;
-}
-
 static int calc_hem_config(struct hns_roce_dev *hr_dev,
 			   struct hns_roce_hem_table *table, unsigned long obj,
 			   struct hns_roce_hem_mhop *mhop,
@@ -677,7 +599,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
 	}
 
 	/* Set HEM base address(128K/page, pa) to Hardware */
-	if (hns_roce_set_hem(hr_dev, table, obj)) {
+	if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) {
 		hns_roce_free_hem(hr_dev, table->hem[i]);
 		table->hem[i] = NULL;
 		ret = -ENODEV;
@@ -782,7 +704,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
 					 &table->mutex))
 		return;
 
-	if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
+	if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT))
 		dev_warn(dev, "failed to clear HEM base address.\n");
 
 	hns_roce_free_hem(hr_dev, table->hem[i]);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index ffa65e8..2d84a6b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -34,9 +34,7 @@
 #ifndef _HNS_ROCE_HEM_H
 #define _HNS_ROCE_HEM_H
 
-#define HW_SYNC_SLEEP_TIME_INTERVAL	20
-#define HW_SYNC_TIMEOUT_MSECS           (25 * HW_SYNC_SLEEP_TIME_INTERVAL)
-#define BT_CMD_SYNC_SHIFT		31
+#define HEM_HOP_STEP_DIRECT 0xff
 
 enum {
 	/* MAP HEM(Hardware Entry Memory) */
@@ -74,11 +72,6 @@ enum {
 	(type >= HEM_TYPE_MTT && hop_num == 1) || \
 	(type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0))
 
-enum {
-	 HNS_ROCE_HEM_PAGE_SHIFT = 12,
-	 HNS_ROCE_HEM_PAGE_SIZE  = 1 << HNS_ROCE_HEM_PAGE_SHIFT,
-};
-
 struct hns_roce_hem_chunk {
 	struct list_head	 list;
 	int			 npages;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 620acf6..0c836cc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -462,6 +462,82 @@ static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
 	roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
 }
 
+static int hns_roce_v1_set_hem(struct hns_roce_dev *hr_dev,
+			       struct hns_roce_hem_table *table, int obj,
+			       int step_idx)
+{
+	spinlock_t *lock = &hr_dev->bt_cmd_lock;
+	struct device *dev = hr_dev->dev;
+	struct hns_roce_hem_iter iter;
+	void __iomem *bt_cmd;
+	__le32 bt_cmd_val[2];
+	__le32 bt_cmd_h = 0;
+	unsigned long flags;
+	__le32 bt_cmd_l;
+	int ret = 0;
+	u64 bt_ba;
+	long end;
+
+	/* Find the HEM(Hardware Entry Memory) entry */
+	unsigned long i = (obj & (table->num_obj - 1)) /
+			  (table->table_chunk_size / table->obj_size);
+
+	switch (table->type) {
+	case HEM_TYPE_QPC:
+	case HEM_TYPE_MTPT:
+	case HEM_TYPE_CQC:
+	case HEM_TYPE_SRQC:
+		roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+			ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type);
+		break;
+	default:
+		return ret;
+	}
+
+	roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
+		       ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
+	roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
+	roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
+
+	/* Currently iter only a chunk */
+	for (hns_roce_hem_first(table->hem[i], &iter);
+	     !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
+		bt_ba = hns_roce_hem_addr(&iter) >> HNS_HW_PAGE_SHIFT;
+
+		spin_lock_irqsave(lock, flags);
+
+		bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
+
+		end = HW_SYNC_TIMEOUT_MSECS;
+		while (end > 0) {
+			if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT))
+				break;
+
+			mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
+			end -= HW_SYNC_SLEEP_TIME_INTERVAL;
+		}
+
+		if (end <= 0) {
+			dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
+			spin_unlock_irqrestore(lock, flags);
+			return -EBUSY;
+		}
+
+		bt_cmd_l = cpu_to_le32(bt_ba);
+		roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
+			       ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
+			       upper_32_bits(bt_ba));
+
+		bt_cmd_val[0] = bt_cmd_l;
+		bt_cmd_val[1] = bt_cmd_h;
+		hns_roce_write64_k(bt_cmd_val,
+				   hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
+		spin_unlock_irqrestore(lock, flags);
+	}
+
+	return ret;
+}
+
 static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
 				     u32 odb_mode)
 {
@@ -4352,6 +4428,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = {
 	.set_mtu = hns_roce_v1_set_mtu,
 	.write_mtpt = hns_roce_v1_write_mtpt,
 	.write_cqc = hns_roce_v1_write_cqc,
+	.set_hem = hns_roce_v1_set_hem,
 	.clear_hem = hns_roce_v1_clear_hem,
 	.modify_qp = hns_roce_v1_modify_qp,
 	.dereg_mr = hns_roce_v1_dereg_mr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 8438323..60fdcba 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -1085,6 +1085,11 @@ struct hns_roce_db_table {
 	struct hns_roce_ext_db *ext_db;
 };
 
+#define HW_SYNC_SLEEP_TIME_INTERVAL 20
+#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL)
+#define BT_CMD_SYNC_SHIFT 31
+#define HNS_ROCE_BA_SIZE (32 * 4096)
+
 struct hns_roce_bt_table {
 	struct hns_roce_buf_list qpc_buf;
 	struct hns_roce_buf_list mtpt_buf;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration
  2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
                   ` (4 preceding siblings ...)
  2021-05-21  9:29 ` [PATCH for-next 5/5] RDMA/hns: Clean the hardware related code " Weihang Li
@ 2021-05-28 23:20 ` Jason Gunthorpe
  5 siblings, 0 replies; 7+ messages in thread
From: Jason Gunthorpe @ 2021-05-28 23:20 UTC (permalink / raw)
  To: Weihang Li; +Cc: dledford, leon, linux-rdma, linuxarm

On Fri, May 21, 2021 at 05:29:50PM +0800, Weihang Li wrote:
> These series includes several cleanups and fixes for HEM and MTR.
> 
> Weihang Li (1):
>   RDMA/hns: Use refcount_t APIs for HEM
> 
> Xi Wang (4):
>   RDMA/hns: Optimize the base address table config for MTR
>   RDMA/hns: Refactor root BT allocation for MTR
>   RDMA/hns: Fix wrong timer context buffer page size
>   RDMA/hns: Clean the hardware related code for HEM

Applied to for-next, thanks

Jason

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-05-28 23:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-05-21  9:29 [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Weihang Li
2021-05-21  9:29 ` [PATCH for-next 1/5] RDMA/hns: Optimize the base address table config for MTR Weihang Li
2021-05-21  9:29 ` [PATCH for-next 2/5] RDMA/hns: Refactor root BT allocation " Weihang Li
2021-05-21  9:29 ` [PATCH for-next 3/5] RDMA/hns: Fix wrong timer context buffer page size Weihang Li
2021-05-21  9:29 ` [PATCH for-next 4/5] RDMA/hns: Use refcount_t APIs for HEM Weihang Li
2021-05-21  9:29 ` [PATCH for-next 5/5] RDMA/hns: Clean the hardware related code " Weihang Li
2021-05-28 23:20 ` [PATCH for-next 0/5] RDMA/hns: Changes about memory allocation and configuration Jason Gunthorpe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox