[PATCH net-next,0/2] octeontx2-af: Update CPT block for CN10KB

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next,0/2] octeontx2-af: Update CPT block for CN10KB
@ 2024-08-27  4:25 Srujana Challa
  2024-08-27  4:25 ` [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors " Srujana Challa
  2024-08-27  4:25 ` [PATCH net-next,2/2] octeontx2-af: configure default CPT credits Srujana Challa
  0 siblings, 2 replies; 6+ messages in thread
From: Srujana Challa @ 2024-08-27  4:25 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, pabeni, edumazet, sgoutham, lcherian, gakula, jerinj,
	hkelam, sbhatta, bbhushan2, ndabilpuram

This commit addresses two key updates for the CN10KB hardware:

1. The number of FLT interrupt vectors has been reduced to 2. The code
is updated to reflect this change across the CN10K series.
2. The maximum CPT credits that RX can use are now configurable through
a hardware CSR. This patch sets the default value to optimize peak
performance, aligning it with other chip versions.

Srujana Challa (2):
  octeontx2-af: reduce cpt flt interrupt vectors for CN10KB
  octeontx2-af: configure default CPT credits

 .../net/ethernet/marvell/octeontx2/af/mbox.h  |   5 +-
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |  30 +++++
 .../ethernet/marvell/octeontx2/af/rvu_cpt.c   | 108 +++++++++++++++---
 .../ethernet/marvell/octeontx2/af/rvu_reg.h   |   1 +
 .../marvell/octeontx2/af/rvu_struct.h         |   6 +-
 5 files changed, 129 insertions(+), 21 deletions(-)

-- 
2.25.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors for CN10KB
  2024-08-27  4:25 [PATCH net-next,0/2] octeontx2-af: Update CPT block for CN10KB Srujana Challa
@ 2024-08-27  4:25 ` Srujana Challa
  2024-08-27 16:32   ` Simon Horman
  2024-08-27  4:25 ` [PATCH net-next,2/2] octeontx2-af: configure default CPT credits Srujana Challa
  1 sibling, 1 reply; 6+ messages in thread
From: Srujana Challa @ 2024-08-27  4:25 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, pabeni, edumazet, sgoutham, lcherian, gakula, jerinj,
	hkelam, sbhatta, bbhushan2, ndabilpuram

On CN10KB, number of flt interrupt vectors are reduced to
2. So, modify the code accordingly for cn10k.

Signed-off-by: Srujana Challa <schalla@marvell.com>
---
 .../net/ethernet/marvell/octeontx2/af/mbox.h  |  5 +-
 .../ethernet/marvell/octeontx2/af/rvu_cpt.c   | 79 ++++++++++++++++---
 .../marvell/octeontx2/af/rvu_struct.h         |  6 +-
 3 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index ed2160cc5acb..6ea2f3071fe8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1856,8 +1856,9 @@ struct cpt_flt_eng_info_req {
 
 struct cpt_flt_eng_info_rsp {
 	struct mbox_msghdr hdr;
-	u64 flt_eng_map[CPT_10K_AF_INT_VEC_RVU];
-	u64 rcvrd_eng_map[CPT_10K_AF_INT_VEC_RVU];
+#define CPT_AF_MAX_FLT_INT_VECS 3
+	u64 flt_eng_map[CPT_AF_MAX_FLT_INT_VECS];
+	u64 rcvrd_eng_map[CPT_AF_MAX_FLT_INT_VECS];
 	u64 rsvd;
 };
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index 3e09d2285814..e56d27018828 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -37,6 +37,44 @@
 	(_rsp)->free_sts_##etype = free_sts;                        \
 })
 
+#define MAX_AE  GENMASK_ULL(47, 32)
+#define MAX_IE  GENMASK_ULL(31, 16)
+#define MAX_SE  GENMASK_ULL(15, 0)
+static u32 cpt_max_engines_get(struct rvu *rvu)
+{
+	u16 max_ses, max_ies, max_aes;
+	u64 reg;
+
+	reg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS1);
+	max_ses = FIELD_GET(MAX_SE, reg);
+	max_ies = FIELD_GET(MAX_IE, reg);
+	max_aes = FIELD_GET(MAX_AE, reg);
+
+	return max_ses + max_ies + max_aes;
+}
+
+/* Number of flt interrupt vectors are depends on number of engines that the
+ * chip has. Each flt vector represents 64 engines.
+ */
+static int cpt_10k_flt_nvecs_get(struct rvu *rvu)
+{
+	u32 max_engs;
+	int flt_vecs;
+
+	max_engs = cpt_max_engines_get(rvu);
+
+	flt_vecs = (max_engs / 64);
+	flt_vecs += (max_engs % 64) ? 1 : 0;
+
+	if (flt_vecs > CPT_10K_AF_INT_VEC_FLT_MAX) {
+		dev_warn(rvu->dev, "flt_vecs(%d) exceeds the max vectors(%d)\n",
+			 flt_vecs, CPT_10K_AF_INT_VEC_FLT_MAX);
+		flt_vecs = CPT_10K_AF_INT_VEC_FLT_MAX;
+	}
+
+	return flt_vecs;
+}
+
 static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr)
 {
 	struct rvu_block *block = ptr;
@@ -150,17 +188,25 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off)
 {
 	struct rvu *rvu = block->rvu;
 	int blkaddr = block->addr;
+	u32 max_engs;
+	u8 nr;
 	int i;
 
+	max_engs = cpt_max_engines_get(rvu);
+
 	/* Disable all CPT AF interrupts */
-	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL);
-	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL);
-	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF);
+	for (i = CPT_10K_AF_INT_VEC_FLT0; i < cpt_10k_flt_nvecs_get(rvu); i++) {
+		nr = (max_engs > 64) ? 64 : max_engs;
+		max_engs -= nr;
+		rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i),
+			    INTR_MASK(nr));
+	}
 
 	rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1);
 	rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1);
 
-	for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++)
+	/* CPT AF interrupt vectors are flt_int, rvu_int and ras_int. */
+	for (i = 0; i < cpt_10k_flt_nvecs_get(rvu) + 2; i++)
 		if (rvu->irq_allocated[off + i]) {
 			free_irq(pci_irq_vector(rvu->pdev, off + i), block);
 			rvu->irq_allocated[off + i] = false;
@@ -206,12 +252,17 @@ void rvu_cpt_unregister_interrupts(struct rvu *rvu)
 
 static int cpt_10k_register_interrupts(struct rvu_block *block, int off)
 {
+	int rvu_intr_vec, ras_intr_vec;
 	struct rvu *rvu = block->rvu;
 	int blkaddr = block->addr;
 	irq_handler_t flt_fn;
+	u32 max_engs;
 	int i, ret;
+	u8 nr;
+
+	max_engs = cpt_max_engines_get(rvu);
 
-	for (i = CPT_10K_AF_INT_VEC_FLT0; i < CPT_10K_AF_INT_VEC_RVU; i++) {
+	for (i = CPT_10K_AF_INT_VEC_FLT0; i < cpt_10k_flt_nvecs_get(rvu); i++) {
 		sprintf(&rvu->irq_name[(off + i) * NAME_SIZE], "CPTAF FLT%d", i);
 
 		switch (i) {
@@ -229,20 +280,24 @@ static int cpt_10k_register_interrupts(struct rvu_block *block, int off)
 						    flt_fn, &rvu->irq_name[(off + i) * NAME_SIZE]);
 		if (ret)
 			goto err;
-		if (i == CPT_10K_AF_INT_VEC_FLT2)
-			rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), 0xFFFF);
-		else
-			rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i), ~0ULL);
+
+		nr = (max_engs > 64) ? 64 : max_engs;
+		max_engs -= nr;
+		rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1S(i),
+			    INTR_MASK(nr));
 	}
 
-	ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RVU,
+	rvu_intr_vec = cpt_10k_flt_nvecs_get(rvu);
+	ras_intr_vec = rvu_intr_vec + 1;
+
+	ret = rvu_cpt_do_register_interrupt(block, off + rvu_intr_vec,
 					    rvu_cpt_af_rvu_intr_handler,
 					    "CPTAF RVU");
 	if (ret)
 		goto err;
 	rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1S, 0x1);
 
-	ret = rvu_cpt_do_register_interrupt(block, off + CPT_10K_AF_INT_VEC_RAS,
+	ret = rvu_cpt_do_register_interrupt(block, off + ras_intr_vec,
 					    rvu_cpt_af_ras_intr_handler,
 					    "CPTAF RAS");
 	if (ret)
@@ -928,7 +983,7 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r
 		return blkaddr;
 
 	block = &rvu->hw->block[blkaddr];
-	for (vec = 0; vec < CPT_10K_AF_INT_VEC_RVU; vec++) {
+	for (vec = 0; vec < cpt_10k_flt_nvecs_get(block->rvu); vec++) {
 		spin_lock_irqsave(&rvu->cpt_intr_lock, flags);
 		rsp->flt_eng_map[vec] = block->cpt_flt_eng_map[vec];
 		rsp->rcvrd_eng_map[vec] = block->cpt_rcvrd_eng_map[vec];
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index 5ef406c7e8a4..fc8da2090657 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -71,13 +71,11 @@ enum cpt_af_int_vec_e {
 	CPT_AF_INT_VEC_CNT	= 0x4,
 };
 
-enum cpt_10k_af_int_vec_e {
+enum cpt_cn10k_flt_int_vec_e {
 	CPT_10K_AF_INT_VEC_FLT0	= 0x0,
 	CPT_10K_AF_INT_VEC_FLT1	= 0x1,
 	CPT_10K_AF_INT_VEC_FLT2	= 0x2,
-	CPT_10K_AF_INT_VEC_RVU	= 0x3,
-	CPT_10K_AF_INT_VEC_RAS	= 0x4,
-	CPT_10K_AF_INT_VEC_CNT	= 0x5,
+	CPT_10K_AF_INT_VEC_FLT_MAX = 0x3,
 };
 
 /* NPA Admin function Interrupt Vector Enumeration */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH net-next,2/2] octeontx2-af: configure default CPT credits
  2024-08-27  4:25 [PATCH net-next,0/2] octeontx2-af: Update CPT block for CN10KB Srujana Challa
  2024-08-27  4:25 ` [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors " Srujana Challa
@ 2024-08-27  4:25 ` Srujana Challa
  2024-08-27 16:37   ` Simon Horman
  1 sibling, 1 reply; 6+ messages in thread
From: Srujana Challa @ 2024-08-27  4:25 UTC (permalink / raw)
  To: netdev
  Cc: davem, kuba, pabeni, edumazet, sgoutham, lcherian, gakula, jerinj,
	hkelam, sbhatta, bbhushan2, ndabilpuram

The maximum CPT credits that RX can use are now configurable through
a hardware CSR. This patch sets the default value to optimize peak
performance, aligning it with other chip versions.
This patch also adds changes to avoid RXC HW registers access on
CN10KB as RXC is not available on CN10KB.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>
---
 .../net/ethernet/marvell/octeontx2/af/rvu.h   | 30 +++++++++++++++++++
 .../ethernet/marvell/octeontx2/af/rvu_cpt.c   | 29 ++++++++++++++++--
 .../ethernet/marvell/octeontx2/af/rvu_reg.h   |  1 +
 3 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 03ee93fd9e94..43b1d83686d1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -400,6 +400,7 @@ struct hw_cap {
 	bool	nix_multiple_dwrr_mtu;   /* Multiple DWRR_MTU to choose from */
 	bool	npc_hash_extract; /* Hash extract enabled ? */
 	bool	npc_exact_match_enabled; /* Exact match supported ? */
+	bool    cpt_rxc;   /* Is CPT-RXC supported */
 };
 
 struct rvu_hwinfo {
@@ -690,6 +691,35 @@ static inline bool is_cnf10ka_a0(struct rvu *rvu)
 	return false;
 }
 
+static inline bool is_cn10ka_a0(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A &&
+	    (pdev->revision & 0x0F) == 0x0)
+		return true;
+	return false;
+}
+
+static inline bool is_cn10ka_a1(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A &&
+	    (pdev->revision & 0x0F) == 0x1)
+		return true;
+	return false;
+}
+
+static inline bool is_cn10kb(struct rvu *rvu)
+{
+	struct pci_dev *pdev = rvu->pdev;
+
+	if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_B)
+		return true;
+	return false;
+}
+
 static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu)
 {
 	u64 npc_const3;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
index e56d27018828..e0438b819309 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
@@ -728,6 +728,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req)
 		case CPT_AF_BLK_RST:
 		case CPT_AF_CONSTANTS1:
 		case CPT_AF_CTX_FLUSH_TIMER:
+		case CPT_AF_RXC_CFG1:
 			return true;
 		}
 
@@ -788,6 +789,8 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu,
 
 static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
 {
+	struct rvu_hwinfo *hw = rvu->hw;
+
 	if (is_rvu_otx2(rvu))
 		return;
 
@@ -811,14 +814,16 @@ static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
 	rsp->ctx_err = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ERR);
 	rsp->ctx_enc_id = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ENC_ID);
 	rsp->ctx_flush_timer = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FLUSH_TIMER);
+	rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0));
+	rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1));
 
+	if (!hw->cap.cpt_rxc)
+		return;
 	rsp->rxc_time = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME);
 	rsp->rxc_time_cfg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG);
 	rsp->rxc_active_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS);
 	rsp->rxc_zombie_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS);
 	rsp->rxc_dfrg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_DFRG);
-	rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0));
-	rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1));
 }
 
 static void get_eng_sts(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr)
@@ -999,10 +1004,11 @@ int rvu_mbox_handler_cpt_flt_eng_info(struct rvu *rvu, struct cpt_flt_eng_info_r
 static void cpt_rxc_teardown(struct rvu *rvu, int blkaddr)
 {
 	struct cpt_rxc_time_cfg_req req, prev;
+	struct rvu_hwinfo *hw = rvu->hw;
 	int timeout = 2000;
 	u64 reg;
 
-	if (is_rvu_otx2(rvu))
+	if (!hw->cap.cpt_rxc)
 		return;
 
 	/* Set time limit to minimum values, so that rxc entries will be
@@ -1277,8 +1283,25 @@ int rvu_cpt_ctx_flush(struct rvu *rvu, u16 pcifunc)
 
 int rvu_cpt_init(struct rvu *rvu)
 {
+	struct rvu_hwinfo *hw = rvu->hw;
+	u64 reg_val;
+
 	/* Retrieve CPT PF number */
 	rvu->cpt_pf_num = get_cpt_pf_num(rvu);
+	if (is_block_implemented(rvu->hw, BLKADDR_CPT0) && !is_rvu_otx2(rvu) &&
+	    !is_cn10kb(rvu))
+		hw->cap.cpt_rxc = true;
+
+	if (hw->cap.cpt_rxc && !is_cn10ka_a0(rvu) && !is_cn10ka_a1(rvu)) {
+		/* Set CPT_AF_RXC_CFG1:max_rxc_icb_cnt to 0xc0 to not effect
+		 * inline inbound peak performance
+		 */
+		reg_val = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1);
+		reg_val &= ~(0x1FFULL << 32);
+		reg_val |= 0xC0ULL << 32;
+		rvu_write64(rvu, BLKADDR_CPT0, CPT_AF_RXC_CFG1, reg_val);
+	}
+
 	spin_lock_init(&rvu->cpt_intr_lock);
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index d56be5fb7eb4..2b299fa85159 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -545,6 +545,7 @@
 #define CPT_AF_CTX_PSH_PC               (0x49450ull)
 #define CPT_AF_CTX_PSH_LATENCY_PC       (0x49458ull)
 #define CPT_AF_CTX_CAM_DATA(a)          (0x49800ull | (u64)(a) << 3)
+#define CPT_AF_RXC_CFG1                 (0x50000ull)
 #define CPT_AF_RXC_TIME                 (0x50010ull)
 #define CPT_AF_RXC_TIME_CFG             (0x50018ull)
 #define CPT_AF_RXC_DFRG                 (0x50020ull)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors for CN10KB
  2024-08-27  4:25 ` [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors " Srujana Challa
@ 2024-08-27 16:32   ` Simon Horman
  2024-08-28  5:39     ` [EXTERNAL] " Srujana Challa
  0 siblings, 1 reply; 6+ messages in thread
From: Simon Horman @ 2024-08-27 16:32 UTC (permalink / raw)
  To: Srujana Challa
  Cc: netdev, davem, kuba, pabeni, edumazet, sgoutham, lcherian, gakula,
	jerinj, hkelam, sbhatta, bbhushan2, ndabilpuram

On Tue, Aug 27, 2024 at 09:55:11AM +0530, Srujana Challa wrote:
> On CN10KB, number of flt interrupt vectors are reduced to
> 2. So, modify the code accordingly for cn10k.

I think it would be nice to state that the patch moves
away from a hard-coded to dynamic number of vectors.
And that this is in order to accommodate the CN10KB,
which has 2 vectors, as opposed to chips supported by
the driver to date, which have 3.

> 
> Signed-off-by: Srujana Challa <schalla@marvell.com>

...

> diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> index 3e09d2285814..e56d27018828 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> @@ -37,6 +37,44 @@
>  	(_rsp)->free_sts_##etype = free_sts;                        \
>  })
>  
> +#define MAX_AE  GENMASK_ULL(47, 32)
> +#define MAX_IE  GENMASK_ULL(31, 16)
> +#define MAX_SE  GENMASK_ULL(15, 0)

nit: Maybe a blank line here.

> +static u32 cpt_max_engines_get(struct rvu *rvu)
> +{
> +	u16 max_ses, max_ies, max_aes;
> +	u64 reg;
> +
> +	reg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS1);
> +	max_ses = FIELD_GET(MAX_SE, reg);
> +	max_ies = FIELD_GET(MAX_IE, reg);
> +	max_aes = FIELD_GET(MAX_AE, reg);
> +
> +	return max_ses + max_ies + max_aes;

Maybe I am wrong, but it looks like this will perform u16 addition.
Can that overflow? I ask because the return type is u32, implying
values larger than 64k are expected.

> +}
> +
> +/* Number of flt interrupt vectors are depends on number of engines that the
> + * chip has. Each flt vector represents 64 engines.
> + */
> +static int cpt_10k_flt_nvecs_get(struct rvu *rvu)
> +{
> +	u32 max_engs;
> +	int flt_vecs;
> +
> +	max_engs = cpt_max_engines_get(rvu);
> +
> +	flt_vecs = (max_engs / 64);
> +	flt_vecs += (max_engs % 64) ? 1 : 0;

I don't think the parentheses are needed on the lines above.
And likewise elsewhere in this patch.

At any rate, here, I think you can use DIV_ROUND_UP.

> +
> +	if (flt_vecs > CPT_10K_AF_INT_VEC_FLT_MAX) {
> +		dev_warn(rvu->dev, "flt_vecs(%d) exceeds the max vectors(%d)\n",
> +			 flt_vecs, CPT_10K_AF_INT_VEC_FLT_MAX);
> +		flt_vecs = CPT_10K_AF_INT_VEC_FLT_MAX;
> +	}

cpt_max_engines_get seems to get called quite a bit.
I think dev_warn_once() is probably appropriate here.

> +
> +	return flt_vecs;
> +}
> +
>  static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr)
>  {
>  	struct rvu_block *block = ptr;
> @@ -150,17 +188,25 @@ static void cpt_10k_unregister_interrupts(struct rvu_block *block, int off)
>  {
>  	struct rvu *rvu = block->rvu;
>  	int blkaddr = block->addr;
> +	u32 max_engs;
> +	u8 nr;
>  	int i;
>  
> +	max_engs = cpt_max_engines_get(rvu);
> +
>  	/* Disable all CPT AF interrupts */
> -	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL);
> -	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL);
> -	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF);
> +	for (i = CPT_10K_AF_INT_VEC_FLT0; i < cpt_10k_flt_nvecs_get(rvu); i++) {

I think it would be best to cache the value of cpt_10k_flt_nvecs_get()
rather than run it for each iteration of the loop. I'm assuming it has a
non-zero cost as it reads a register, the value of which which I assume
does not change.

Also, the same register is already read by the call to
cpt_max_engines_get(). It would be nice to read it just once in this scope.

Likewise elsewhere.

Also, there is an inconsistency between the type of i and the return type
of cpt_10k_flt_nvecs_get(). Probably harmless, but IMHO it would be nice to
fix.

> +		nr = (max_engs > 64) ? 64 : max_engs;
> +		max_engs -= nr;
> +		rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i),
> +			    INTR_MASK(nr));
> +	}
>  
>  	rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1);
>  	rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1);
>  
> -	for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++)
> +	/* CPT AF interrupt vectors are flt_int, rvu_int and ras_int. */
> +	for (i = 0; i < cpt_10k_flt_nvecs_get(rvu) + 2; i++)

It would be nice to avoid the naked '2' here.
Perhaps a macro is appropriate.

>  		if (rvu->irq_allocated[off + i]) {
>  			free_irq(pci_irq_vector(rvu->pdev, off + i), block);
>  			rvu->irq_allocated[off + i] = false;

...

-- 
pw-bot: cr

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH net-next,2/2] octeontx2-af: configure default CPT credits
  2024-08-27  4:25 ` [PATCH net-next,2/2] octeontx2-af: configure default CPT credits Srujana Challa
@ 2024-08-27 16:37   ` Simon Horman
  0 siblings, 0 replies; 6+ messages in thread
From: Simon Horman @ 2024-08-27 16:37 UTC (permalink / raw)
  To: Srujana Challa
  Cc: netdev, davem, kuba, pabeni, edumazet, sgoutham, lcherian, gakula,
	jerinj, hkelam, sbhatta, bbhushan2, ndabilpuram

On Tue, Aug 27, 2024 at 09:55:12AM +0530, Srujana Challa wrote:
> The maximum CPT credits that RX can use are now configurable through
> a hardware CSR. This patch sets the default value to optimize peak
> performance, aligning it with other chip versions.
> This patch also adds changes to avoid RXC HW registers access on
> CN10KB as RXC is not available on CN10KB.

This sounds like it should be split into two patches.

> Signed-off-by: Srujana Challa <schalla@marvell.com>
> Signed-off-by: Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>

...

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [EXTERNAL] Re: [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors for CN10KB
  2024-08-27 16:32   ` Simon Horman
@ 2024-08-28  5:39     ` Srujana Challa
  0 siblings, 0 replies; 6+ messages in thread
From: Srujana Challa @ 2024-08-28  5:39 UTC (permalink / raw)
  To: Simon Horman
  Cc: netdev@vger.kernel.org, davem@davemloft.net, kuba@kernel.org,
	pabeni@redhat.com, edumazet@google.com, Sunil Kovvuri Goutham,
	Linu Cherian, Geethasowjanya Akula, Jerin Jacob, Hariprasad Kelam,
	Subbaraya Sundeep Bhatta, Bharat Bhushan, Nithin Kumar Dabilpuram

> Subject: [EXTERNAL] Re: [PATCH net-next,1/2] octeontx2-af: reduce cpt flt
> interrupt vectors for CN10KB
> 
> On Tue, Aug 27, 2024 at 09: 55: 11AM +0530, Srujana Challa wrote: > On
> CN10KB, number of flt interrupt vectors are reduced to > 2. So, modify the
> code accordingly for cn10k. I think it would be nice to state that the patch
> moves away from 
> On Tue, Aug 27, 2024 at 09:55:11AM +0530, Srujana Challa wrote:
> > On CN10KB, number of flt interrupt vectors are reduced to 2. So,
> > modify the code accordingly for cn10k.
> 
> I think it would be nice to state that the patch moves away from a hard-coded
> to dynamic number of vectors.
> And that this is in order to accommodate the CN10KB, which has 2 vectors, as
> opposed to chips supported by the driver to date, which have 3.
Sure, I will make the change.
> 
> >
> > Signed-off-by: Srujana Challa <schalla@marvell.com>
> 
> ...
> 
> > diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> > b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> > index 3e09d2285814..e56d27018828 100644
> > --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> > +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c
> > @@ -37,6 +37,44 @@
> >  	(_rsp)->free_sts_##etype = free_sts;                        \
> >  })
> >
> > +#define MAX_AE  GENMASK_ULL(47, 32)
> > +#define MAX_IE  GENMASK_ULL(31, 16)
> > +#define MAX_SE  GENMASK_ULL(15, 0)
> 
> nit: Maybe a blank line here.
> 
> > +static u32 cpt_max_engines_get(struct rvu *rvu) {
> > +	u16 max_ses, max_ies, max_aes;
> > +	u64 reg;
> > +
> > +	reg = rvu_read64(rvu, BLKADDR_CPT0, CPT_AF_CONSTANTS1);
> > +	max_ses = FIELD_GET(MAX_SE, reg);
> > +	max_ies = FIELD_GET(MAX_IE, reg);
> > +	max_aes = FIELD_GET(MAX_AE, reg);
> > +
> > +	return max_ses + max_ies + max_aes;
> 
> Maybe I am wrong, but it looks like this will perform u16 addition.
> Can that overflow? I ask because the return type is u32, implying values larger
> than 64k are expected.
No, it couldn't overflow. I will change the return type to u16.
> 
> > +}
> > +
> > +/* Number of flt interrupt vectors are depends on number of engines
> > +that the
> > + * chip has. Each flt vector represents 64 engines.
> > + */
> > +static int cpt_10k_flt_nvecs_get(struct rvu *rvu) {
> > +	u32 max_engs;
> > +	int flt_vecs;
> > +
> > +	max_engs = cpt_max_engines_get(rvu);
> > +
> > +	flt_vecs = (max_engs / 64);
> > +	flt_vecs += (max_engs % 64) ? 1 : 0;
> 
> I don't think the parentheses are needed on the lines above.
> And likewise elsewhere in this patch.
> 
> At any rate, here, I think you can use DIV_ROUND_UP.
Ack.

> 
> > +
> > +	if (flt_vecs > CPT_10K_AF_INT_VEC_FLT_MAX) {
> > +		dev_warn(rvu->dev, "flt_vecs(%d) exceeds the max
> vectors(%d)\n",
> > +			 flt_vecs, CPT_10K_AF_INT_VEC_FLT_MAX);
> > +		flt_vecs = CPT_10K_AF_INT_VEC_FLT_MAX;
> > +	}
> 
> cpt_max_engines_get seems to get called quite a bit.
> I think dev_warn_once() is probably appropriate here.
Ack.

> 
> > +
> > +	return flt_vecs;
> > +}
> > +
> >  static irqreturn_t cpt_af_flt_intr_handler(int vec, void *ptr)  {
> >  	struct rvu_block *block = ptr;
> > @@ -150,17 +188,25 @@ static void cpt_10k_unregister_interrupts(struct
> > rvu_block *block, int off)  {
> >  	struct rvu *rvu = block->rvu;
> >  	int blkaddr = block->addr;
> > +	u32 max_engs;
> > +	u8 nr;
> >  	int i;
> >
> > +	max_engs = cpt_max_engines_get(rvu);
> > +
> >  	/* Disable all CPT AF interrupts */
> > -	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(0), ~0ULL);
> > -	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(1), ~0ULL);
> > -	rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(2), 0xFFFF);
> > +	for (i = CPT_10K_AF_INT_VEC_FLT0; i < cpt_10k_flt_nvecs_get(rvu);
> > +i++) {
> 
> I think it would be best to cache the value of cpt_10k_flt_nvecs_get() rather
> than run it for each iteration of the loop. I'm assuming it has a non-zero cost as
> it reads a register, the value of which which I assume does not change.
> 
> Also, the same register is already read by the call to cpt_max_engines_get(). It
> would be nice to read it just once in this scope.
Ack.

> 
> Likewise elsewhere.
> 
> Also, there is an inconsistency between the type of i and the return type of
> cpt_10k_flt_nvecs_get(). Probably harmless, but IMHO it would be nice to fix.
Both are int only.

> 
> > +		nr = (max_engs > 64) ? 64 : max_engs;
> > +		max_engs -= nr;
> > +		rvu_write64(rvu, blkaddr, CPT_AF_FLTX_INT_ENA_W1C(i),
> > +			    INTR_MASK(nr));
> > +	}
> >
> >  	rvu_write64(rvu, blkaddr, CPT_AF_RVU_INT_ENA_W1C, 0x1);
> >  	rvu_write64(rvu, blkaddr, CPT_AF_RAS_INT_ENA_W1C, 0x1);
> >
> > -	for (i = 0; i < CPT_10K_AF_INT_VEC_CNT; i++)
> > +	/* CPT AF interrupt vectors are flt_int, rvu_int and ras_int. */
> > +	for (i = 0; i < cpt_10k_flt_nvecs_get(rvu) + 2; i++)
> 
> It would be nice to avoid the naked '2' here.
> Perhaps a macro is appropriate.
Ack.
> 
> >  		if (rvu->irq_allocated[off + i]) {
> >  			free_irq(pci_irq_vector(rvu->pdev, off + i), block);
> >  			rvu->irq_allocated[off + i] = false;
> 
> ...
> 
> --
> pw-bot: cr

Thanks for reviewing the patch.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-08-28  5:39 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-27  4:25 [PATCH net-next,0/2] octeontx2-af: Update CPT block for CN10KB Srujana Challa
2024-08-27  4:25 ` [PATCH net-next,1/2] octeontx2-af: reduce cpt flt interrupt vectors " Srujana Challa
2024-08-27 16:32   ` Simon Horman
2024-08-28  5:39     ` [EXTERNAL] " Srujana Challa
2024-08-27  4:25 ` [PATCH net-next,2/2] octeontx2-af: configure default CPT credits Srujana Challa
2024-08-27 16:37   ` Simon Horman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).