* [PATCH 1/4 RESEND] crypto: octeontx2: add timeout for load_fvc completion poll
2025-05-14 5:10 [PATCH 0/4 RESEND] crypto: octeontx2: Fix hang and address alignment issues Bharat Bhushan
@ 2025-05-14 5:10 ` Bharat Bhushan
2025-05-14 5:10 ` [PATCH 2/4 RESEND] crypto: octeontx2: Fix address alignment issue on ucode loading Bharat Bhushan
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Bharat Bhushan @ 2025-05-14 5:10 UTC (permalink / raw)
To: bbrezillon, arno, schalla, herbert, davem, giovanni.cabiddu,
linux, linux-crypto, linux-kernel, bharatb.linux
Cc: stable, Bharat Bhushan
Adds timeout to exit from possible infinite loop, which polls
on CPT instruction(load_fvc) completion.
Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
---
.../crypto/marvell/octeontx2/otx2_cptpf_ucode.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 42c5484ce66a..3a818ac89295 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1494,6 +1494,7 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
dma_addr_t rptr_baddr;
struct pci_dev *pdev;
u32 len, compl_rlen;
+ int timeout = 10000;
int ret, etype;
void *rptr;
@@ -1556,16 +1557,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
etype);
otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr);
lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
+ timeout = 10000;
while (lfs->ops->cpt_get_compcode(result) ==
- OTX2_CPT_COMPLETION_CODE_INIT)
+ OTX2_CPT_COMPLETION_CODE_INIT) {
cpu_relax();
+ udelay(1);
+ timeout--;
+ if (!timeout) {
+ ret = -ENODEV;
+ cptpf->is_eng_caps_discovered = false;
+ dev_warn(&pdev->dev, "Timeout on CPT load_fvc completion poll\n");
+ goto error_no_response;
+ }
+ }
cptpf->eng_caps[etype].u = be64_to_cpup(rptr);
}
- dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL);
cptpf->is_eng_caps_discovered = true;
+error_no_response:
+ dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL);
free_result:
kfree(result);
lf_cleanup:
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 2/4 RESEND] crypto: octeontx2: Fix address alignment issue on ucode loading
2025-05-14 5:10 [PATCH 0/4 RESEND] crypto: octeontx2: Fix hang and address alignment issues Bharat Bhushan
2025-05-14 5:10 ` [PATCH 1/4 RESEND] crypto: octeontx2: add timeout for load_fvc completion poll Bharat Bhushan
@ 2025-05-14 5:10 ` Bharat Bhushan
2025-05-14 5:10 ` [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2 Bharat Bhushan
2025-05-14 5:10 ` [PATCH 4/4 RESEND] crypto: octeontx2: Fix address alignment on CN10KB and CN10KA-B0 Bharat Bhushan
3 siblings, 0 replies; 9+ messages in thread
From: Bharat Bhushan @ 2025-05-14 5:10 UTC (permalink / raw)
To: bbrezillon, arno, schalla, herbert, davem, giovanni.cabiddu,
linux, linux-crypto, linux-kernel, bharatb.linux
Cc: stable, Bharat Bhushan
octeontx2 crypto driver allocates memory using kmalloc/kzalloc,
and uses this memory for dma (does dma_map_single()). It assumes
that kmalloc/kzalloc will return 128-byte aligned address. But
kmalloc/kzalloc returns 8-byte aligned address after below changes:
"9382bc44b5f5 arm64: allow kmalloc() caches aligned to the
smaller cache_line_size()"
Completion address should be 32-Byte alignment when loading
microcode.
Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
---
.../marvell/octeontx2/otx2_cptpf_ucode.c | 30 +++++++++++--------
1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index 3a818ac89295..1c2aa9626088 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -1491,12 +1491,13 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
union otx2_cpt_opcode opcode;
union otx2_cpt_res_s *result;
union otx2_cpt_inst_s inst;
+ dma_addr_t result_baddr;
dma_addr_t rptr_baddr;
struct pci_dev *pdev;
- u32 len, compl_rlen;
int timeout = 10000;
int ret, etype;
void *rptr;
+ u32 len;
/*
* We don't get capabilities if it was already done
@@ -1521,22 +1522,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
if (ret)
goto delete_grps;
- compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN);
- len = compl_rlen + LOADFVC_RLEN;
+ len = LOADFVC_RLEN + sizeof(union otx2_cpt_res_s) +
+ OTX2_CPT_RES_ADDR_ALIGN;
- result = kzalloc(len, GFP_KERNEL);
- if (!result) {
+ rptr = kzalloc(len, GFP_KERNEL);
+ if (!rptr) {
ret = -ENOMEM;
goto lf_cleanup;
}
- rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len,
+
+ rptr_baddr = dma_map_single(&pdev->dev, rptr, len,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(&pdev->dev, rptr_baddr)) {
dev_err(&pdev->dev, "DMA mapping failed\n");
ret = -EFAULT;
- goto free_result;
+ goto free_rptr;
}
- rptr = (u8 *)result + compl_rlen;
+
+ result = (union otx2_cpt_res_s *)PTR_ALIGN(rptr + LOADFVC_RLEN,
+ OTX2_CPT_RES_ADDR_ALIGN);
+ result_baddr = ALIGN(rptr_baddr + LOADFVC_RLEN,
+ OTX2_CPT_RES_ADDR_ALIGN);
/* Fill in the command */
opcode.s.major = LOADFVC_MAJOR_OP;
@@ -1548,14 +1554,14 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
/* 64-bit swap for microcode data reads, not needed for addresses */
cpu_to_be64s(&iq_cmd.cmd.u);
iq_cmd.dptr = 0;
- iq_cmd.rptr = rptr_baddr + compl_rlen;
+ iq_cmd.rptr = rptr_baddr;
iq_cmd.cptr.u = 0;
for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) {
result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT;
iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps,
etype);
- otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr);
+ otx2_cpt_fill_inst(&inst, &iq_cmd, result_baddr);
lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
timeout = 10000;
@@ -1578,8 +1584,8 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
error_no_response:
dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL);
-free_result:
- kfree(result);
+free_rptr:
+ kfree(rptr);
lf_cleanup:
otx2_cptlf_shutdown(lfs);
delete_grps:
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
2025-05-14 5:10 [PATCH 0/4 RESEND] crypto: octeontx2: Fix hang and address alignment issues Bharat Bhushan
2025-05-14 5:10 ` [PATCH 1/4 RESEND] crypto: octeontx2: add timeout for load_fvc completion poll Bharat Bhushan
2025-05-14 5:10 ` [PATCH 2/4 RESEND] crypto: octeontx2: Fix address alignment issue on ucode loading Bharat Bhushan
@ 2025-05-14 5:10 ` Bharat Bhushan
2025-05-19 4:26 ` Herbert Xu
2025-05-14 5:10 ` [PATCH 4/4 RESEND] crypto: octeontx2: Fix address alignment on CN10KB and CN10KA-B0 Bharat Bhushan
3 siblings, 1 reply; 9+ messages in thread
From: Bharat Bhushan @ 2025-05-14 5:10 UTC (permalink / raw)
To: bbrezillon, arno, schalla, herbert, davem, giovanni.cabiddu,
linux, linux-crypto, linux-kernel, bharatb.linux
Cc: stable, Bharat Bhushan
octeontx2 crypto driver allocates memory using kmalloc/kzalloc,
and uses this memory for dma (does dma_map_single()). It assumes
that kmalloc/kzalloc will return 128-byte aligned address. But
kmalloc/kzalloc returns 8-byte aligned address after below changes:
"9382bc44b5f5 arm64: allow kmalloc() caches aligned to the
smaller cache_line_size()
Memory allocated are used for following purpose:
- Input data or scatter list address - 8-Byte alignment
- Output data or gather list address - 8-Byte alignment
- Completion address - 32-Byte alignment.
This patch ensures all addresses are aligned as mentioned above.
Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
---
.../marvell/octeontx2/otx2_cpt_reqmgr.h | 62 ++++++++++++++-----
1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
index e27e849b01df..f0f1ff45c383 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
@@ -34,6 +34,9 @@
#define SG_COMP_2 2
#define SG_COMP_1 1
+#define OTX2_CPT_DPTR_RPTR_ALIGN 8
+#define OTX2_CPT_RES_ADDR_ALIGN 32
+
union otx2_cpt_opcode {
u16 flags;
struct {
@@ -417,10 +420,9 @@ static inline struct otx2_cpt_inst_info *
otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
gfp_t gfp)
{
- int align = OTX2_CPT_DMA_MINALIGN;
struct otx2_cpt_inst_info *info;
- u32 dlen, align_dlen, info_len;
- u16 g_sz_bytes, s_sz_bytes;
+ u32 dlen, info_len;
+ u16 g_len, s_len;
u32 total_mem_len;
if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT ||
@@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
return NULL;
}
- g_sz_bytes = ((req->in_cnt + 3) / 4) *
- sizeof(struct otx2_cpt_sglist_component);
- s_sz_bytes = ((req->out_cnt + 3) / 4) *
- sizeof(struct otx2_cpt_sglist_component);
+ /* Allocate memory to meet below alignment requirement:
+ * ----------------------------------
+ * | struct otx2_cpt_inst_info |
+ * | (No alignment required) |
+ * | -----------------------------|
+ * | | padding for 8B alignment |
+ * |----------------------------------|
+ * | SG List Gather/Input memory |
+ * | Length = multiple of 32Bytes |
+ * | Alignment = 8Byte |
+ * |----------------------------------|
+ * | SG List Scatter/Output memory |
+ * | Length = multiple of 32Bytes |
+ * | Alignment = 8Byte |
+ * | (padding for below alignment) |
+ * | -----------------------------|
+ * | | padding for 32B alignment |
+ * |----------------------------------|
+ * | Result response memory |
+ * ----------------------------------
+ */
- dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
- align_dlen = ALIGN(dlen, align);
- info_len = ALIGN(sizeof(*info), align);
- total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
+ info_len = sizeof(*info);
+
+ g_len = ((req->in_cnt + 3) / 4) *
+ sizeof(struct otx2_cpt_sglist_component);
+ s_len = ((req->out_cnt + 3) / 4) *
+ sizeof(struct otx2_cpt_sglist_component);
+
+ dlen = g_len + s_len + SG_LIST_HDR_SIZE;
+
+ /* Allocate extra memory for SG and response address alignment */
+ total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
+ total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
+ sizeof(union otx2_cpt_res_s);
info = kzalloc(total_mem_len, gfp);
if (unlikely(!info))
return NULL;
info->dlen = dlen;
- info->in_buffer = (u8 *)info + info_len;
+ info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
+ OTX2_CPT_DPTR_RPTR_ALIGN);
+ info->out_buffer = info->in_buffer + 8 + g_len;
((u16 *)info->in_buffer)[0] = req->out_cnt;
((u16 *)info->in_buffer)[1] = req->in_cnt;
@@ -460,7 +490,7 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
}
if (setup_sgio_components(pdev, req->out, req->out_cnt,
- &info->in_buffer[8 + g_sz_bytes])) {
+ info->out_buffer)) {
dev_err(&pdev->dev, "Failed to setup scatter list\n");
goto destroy_info;
}
@@ -476,8 +506,10 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
* Get buffer for union otx2_cpt_res_s response
* structure and its physical address
*/
- info->completion_addr = info->in_buffer + align_dlen;
- info->comp_baddr = info->dptr_baddr + align_dlen;
+ info->completion_addr = PTR_ALIGN((info->in_buffer + dlen),
+ OTX2_CPT_RES_ADDR_ALIGN);
+ info->comp_baddr = ALIGN((info->dptr_baddr + dlen),
+ OTX2_CPT_RES_ADDR_ALIGN);
return info;
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
2025-05-14 5:10 ` [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2 Bharat Bhushan
@ 2025-05-19 4:26 ` Herbert Xu
2025-05-19 6:17 ` Bharat Bhushan
0 siblings, 1 reply; 9+ messages in thread
From: Herbert Xu @ 2025-05-19 4:26 UTC (permalink / raw)
To: Bharat Bhushan
Cc: bbrezillon, arno, schalla, davem, giovanni.cabiddu, linux,
linux-crypto, linux-kernel, bharatb.linux, stable
On Wed, May 14, 2025 at 10:40:42AM +0530, Bharat Bhushan wrote:
>
> @@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
> return NULL;
> }
>
> - g_sz_bytes = ((req->in_cnt + 3) / 4) *
> - sizeof(struct otx2_cpt_sglist_component);
> - s_sz_bytes = ((req->out_cnt + 3) / 4) *
> - sizeof(struct otx2_cpt_sglist_component);
> + /* Allocate memory to meet below alignment requirement:
> + * ----------------------------------
> + * | struct otx2_cpt_inst_info |
> + * | (No alignment required) |
> + * | -----------------------------|
> + * | | padding for 8B alignment |
> + * |----------------------------------|
> + * | SG List Gather/Input memory |
> + * | Length = multiple of 32Bytes |
> + * | Alignment = 8Byte |
> + * |----------------------------------|
> + * | SG List Scatter/Output memory |
> + * | Length = multiple of 32Bytes |
> + * | Alignment = 8Byte |
> + * | (padding for below alignment) |
> + * | -----------------------------|
> + * | | padding for 32B alignment |
> + * |----------------------------------|
> + * | Result response memory |
> + * ----------------------------------
> + */
>
> - dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
> - align_dlen = ALIGN(dlen, align);
> - info_len = ALIGN(sizeof(*info), align);
> - total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
> + info_len = sizeof(*info);
> +
> + g_len = ((req->in_cnt + 3) / 4) *
> + sizeof(struct otx2_cpt_sglist_component);
> + s_len = ((req->out_cnt + 3) / 4) *
> + sizeof(struct otx2_cpt_sglist_component);
> +
> + dlen = g_len + s_len + SG_LIST_HDR_SIZE;
> +
> + /* Allocate extra memory for SG and response address alignment */
> + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
> + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> + sizeof(union otx2_cpt_res_s);
This doesn't look right. It would be correct if kzalloc returned
a 32-byte aligned pointer to start with. But it doesn't anymore,
which is why you're making this patch in the first place :)
So you need to add extra memory to bridge the gap between what it
returns and what you expect. Since it returns 8-byte aligned
memory, and you expect 32-byte aligned pointers, you should add
24 bytes.
IOW the calculation should be:
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN);
total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) &
~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
> info = kzalloc(total_mem_len, gfp);
> if (unlikely(!info))
> return NULL;
>
> info->dlen = dlen;
> - info->in_buffer = (u8 *)info + info_len;
> + info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
> + OTX2_CPT_DPTR_RPTR_ALIGN);
> + info->out_buffer = info->in_buffer + 8 + g_len;
I presume the 8 here corresponds to SG_LIST_HDR_SIZE from the dlen
calculation above. If so please spell it out as otherwise it's just
confusing.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
2025-05-19 4:26 ` Herbert Xu
@ 2025-05-19 6:17 ` Bharat Bhushan
2025-05-19 7:35 ` Herbert Xu
0 siblings, 1 reply; 9+ messages in thread
From: Bharat Bhushan @ 2025-05-19 6:17 UTC (permalink / raw)
To: Herbert Xu
Cc: Bharat Bhushan, bbrezillon, arno, schalla, davem,
giovanni.cabiddu, linux, linux-crypto, linux-kernel, stable
On Mon, May 19, 2025 at 9:57 AM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Wed, May 14, 2025 at 10:40:42AM +0530, Bharat Bhushan wrote:
> >
> > @@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
> > return NULL;
> > }
> >
> > - g_sz_bytes = ((req->in_cnt + 3) / 4) *
> > - sizeof(struct otx2_cpt_sglist_component);
> > - s_sz_bytes = ((req->out_cnt + 3) / 4) *
> > - sizeof(struct otx2_cpt_sglist_component);
> > + /* Allocate memory to meet below alignment requirement:
> > + * ----------------------------------
> > + * | struct otx2_cpt_inst_info |
> > + * | (No alignment required) |
> > + * | -----------------------------|
> > + * | | padding for 8B alignment |
> > + * |----------------------------------|
> > + * | SG List Gather/Input memory |
> > + * | Length = multiple of 32Bytes |
> > + * | Alignment = 8Byte |
> > + * |----------------------------------|
> > + * | SG List Scatter/Output memory |
> > + * | Length = multiple of 32Bytes |
> > + * | Alignment = 8Byte |
> > + * | (padding for below alignment) |
> > + * | -----------------------------|
> > + * | | padding for 32B alignment |
> > + * |----------------------------------|
> > + * | Result response memory |
> > + * ----------------------------------
> > + */
> >
> > - dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
> > - align_dlen = ALIGN(dlen, align);
> > - info_len = ALIGN(sizeof(*info), align);
> > - total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
> > + info_len = sizeof(*info);
> > +
> > + g_len = ((req->in_cnt + 3) / 4) *
> > + sizeof(struct otx2_cpt_sglist_component);
> > + s_len = ((req->out_cnt + 3) / 4) *
> > + sizeof(struct otx2_cpt_sglist_component);
> > +
> > + dlen = g_len + s_len + SG_LIST_HDR_SIZE;
> > +
> > + /* Allocate extra memory for SG and response address alignment */
> > + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
> > + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> > + sizeof(union otx2_cpt_res_s);
This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN))
In case not observed, OTX2_CPT_RES_ADDR_ALIGN is not the same as
OTX2_CPT_DPTR_RPTR_ALIGN.
>
> This doesn't look right. It would be correct if kzalloc returned
> a 32-byte aligned pointer to start with. But it doesn't anymore,
> which is why you're making this patch in the first place :)
>
> So you need to add extra memory to bridge the gap between what it
> returns and what you expect. Since it returns 8-byte aligned
> memory, and you expect 32-byte aligned pointers, you should add
> 24 bytes.
>
> IOW the calculation should be:
>
> total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
> total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN);
> total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) &
> ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
>
> > info = kzalloc(total_mem_len, gfp);
> > if (unlikely(!info))
> > return NULL;
> >
> > info->dlen = dlen;
> > - info->in_buffer = (u8 *)info + info_len;
> > + info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
> > + OTX2_CPT_DPTR_RPTR_ALIGN);
> > + info->out_buffer = info->in_buffer + 8 + g_len;
>
> I presume the 8 here corresponds to SG_LIST_HDR_SIZE from the dlen
> calculation above. If so please spell it out as otherwise it's just
> confusing.
Yes, this is for SG_LIST_HDR_SIZE, will use same here.
Thanks
-Bharat
>
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
2025-05-19 6:17 ` Bharat Bhushan
@ 2025-05-19 7:35 ` Herbert Xu
2025-05-20 4:34 ` Bharat Bhushan
0 siblings, 1 reply; 9+ messages in thread
From: Herbert Xu @ 2025-05-19 7:35 UTC (permalink / raw)
To: Bharat Bhushan
Cc: Bharat Bhushan, bbrezillon, arno, schalla, davem,
giovanni.cabiddu, linux, linux-crypto, linux-kernel, stable
On Mon, May 19, 2025 at 11:47:18AM +0530, Bharat Bhushan wrote:
>
> > > + /* Allocate extra memory for SG and response address alignment */
> > > + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
>
> This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
>
> > > + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> > > + sizeof(union otx2_cpt_res_s);
>
> This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN))
> In case not observed, OTX2_CPT_RES_ADDR_ALIGN is not the same as
> OTX2_CPT_DPTR_RPTR_ALIGN.
But it doesn't do that. Look, assume that total_mem_len is 64,
then ALIGN(64, 32) will still be 64. You're not adding any extra
space for the alignment padding.
OTOH, kmalloc can return something that has a page offset of 8,
and you will need 24 extra bytes in your structure to make it
align at 32.
Now of course if you're very lucky, and total_mem_len starts out
at 8, then it would work but that's purely by chance.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2
2025-05-19 7:35 ` Herbert Xu
@ 2025-05-20 4:34 ` Bharat Bhushan
0 siblings, 0 replies; 9+ messages in thread
From: Bharat Bhushan @ 2025-05-20 4:34 UTC (permalink / raw)
To: Herbert Xu
Cc: Bharat Bhushan, bbrezillon, arno, schalla, davem,
giovanni.cabiddu, linux, linux-crypto, linux-kernel, stable
On Mon, May 19, 2025 at 1:05 PM Herbert Xu <herbert@gondor.apana.org.au> wrote:
>
> On Mon, May 19, 2025 at 11:47:18AM +0530, Bharat Bhushan wrote:
> >
> > > > + /* Allocate extra memory for SG and response address alignment */
> > > > + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
> >
> > This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
> >
> > > > + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
> > > > + sizeof(union otx2_cpt_res_s);
> >
> > This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN))
> > In case not observed, OTX2_CPT_RES_ADDR_ALIGN is not the same as
> > OTX2_CPT_DPTR_RPTR_ALIGN.
>
> But it doesn't do that. Look, assume that total_mem_len is 64,
> then ALIGN(64, 32) will still be 64. You're not adding any extra
> space for the alignment padding.
>
> OTOH, kmalloc can return something that has a page offset of 8,
> and you will need 24 extra bytes in your structure to make it
> align at 32.
>
> Now of course if you're very lucky, and total_mem_len starts out
> at 8, then it would work but that's purely by chance.
Thanks for explaining, will change in the next version.
Thanks
-Bharat
>
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/4 RESEND] crypto: octeontx2: Fix address alignment on CN10KB and CN10KA-B0
2025-05-14 5:10 [PATCH 0/4 RESEND] crypto: octeontx2: Fix hang and address alignment issues Bharat Bhushan
` (2 preceding siblings ...)
2025-05-14 5:10 ` [PATCH 3/4 RESEND] crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2 Bharat Bhushan
@ 2025-05-14 5:10 ` Bharat Bhushan
3 siblings, 0 replies; 9+ messages in thread
From: Bharat Bhushan @ 2025-05-14 5:10 UTC (permalink / raw)
To: bbrezillon, arno, schalla, herbert, davem, giovanni.cabiddu,
linux, linux-crypto, linux-kernel, bharatb.linux
Cc: stable, Bharat Bhushan
octeontx2 crypto driver allocates memory using kmalloc/kzalloc,
and uses this memory for dma (does dma_map_single()). It assumes
that kmalloc/kzalloc will return 128-byte aligned address. But
kmalloc/kzalloc returns 8-byte aligned address after below changes:
"9382bc44b5f5 arm64: allow kmalloc() caches aligned to the
smaller cache_line_size()
Memory allocated are used for following purpose:
- Input data or scatter list address - 8-Byte alignment
- Output data or gather list address - 8-Byte alignment
- Completion address - 32-Byte alignment.
This patch ensures all addresses are aligned as mentioned above.
Signed-off-by: Bharat Bhushan <bbhushan2@marvell.com>
---
.../marvell/octeontx2/otx2_cpt_reqmgr.h | 57 ++++++++++++++-----
1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
index f0f1ff45c383..b49dafc596c7 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h
@@ -350,22 +350,45 @@ static inline struct otx2_cpt_inst_info *
cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
gfp_t gfp)
{
- u32 dlen = 0, g_len, sg_len, info_len;
- int align = OTX2_CPT_DMA_MINALIGN;
+ u32 dlen = 0, g_len, s_len, sg_len, info_len;
struct otx2_cpt_inst_info *info;
- u16 g_sz_bytes, s_sz_bytes;
u32 total_mem_len;
int i;
- g_sz_bytes = ((req->in_cnt + 2) / 3) *
- sizeof(struct cn10kb_cpt_sglist_component);
- s_sz_bytes = ((req->out_cnt + 2) / 3) *
- sizeof(struct cn10kb_cpt_sglist_component);
+ /* Allocate memory to meet below alignment requirement:
+ * ----------------------------------
+ * | struct otx2_cpt_inst_info |
+ * | (No alignment required) |
+ * | -----------------------------|
+ * | | padding for 8B alignment |
+ * |----------------------------------|
+ * | SG List Gather/Input memory |
+ * | Length = multiple of 32Bytes |
+ * | Alignment = 8Byte |
+ * |----------------------------------|
+ * | SG List Scatter/Output memory |
+ * | Length = multiple of 32Bytes |
+ * | Alignment = 8Byte |
+ * | (padding for below alignment) |
+ * | -----------------------------|
+ * | | padding for 32B alignment |
+ * |----------------------------------|
+ * | Result response memory |
+ * ----------------------------------
+ */
+
+ info_len = sizeof(*info);
- g_len = ALIGN(g_sz_bytes, align);
- sg_len = ALIGN(g_len + s_sz_bytes, align);
- info_len = ALIGN(sizeof(*info), align);
- total_mem_len = sg_len + info_len + sizeof(union otx2_cpt_res_s);
+ g_len = ((req->in_cnt + 2) / 3) *
+ sizeof(struct cn10kb_cpt_sglist_component);
+ s_len = ((req->out_cnt + 2) / 3) *
+ sizeof(struct cn10kb_cpt_sglist_component);
+ sg_len = g_len + s_len;
+
+ /* Allocate extra memory for SG and response address alignment */
+ total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + sg_len;
+ total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
+ sizeof(union otx2_cpt_res_s);
info = kzalloc(total_mem_len, gfp);
if (unlikely(!info))
@@ -375,7 +398,9 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
dlen += req->in[i].size;
info->dlen = dlen;
- info->in_buffer = (u8 *)info + info_len;
+ info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
+ OTX2_CPT_DPTR_RPTR_ALIGN);
+ info->out_buffer = info->in_buffer + g_len;
info->gthr_sz = req->in_cnt;
info->sctr_sz = req->out_cnt;
@@ -387,7 +412,7 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
}
if (sgv2io_components_setup(pdev, req->out, req->out_cnt,
- &info->in_buffer[g_len])) {
+ info->out_buffer)) {
dev_err(&pdev->dev, "Failed to setup scatter list\n");
goto destroy_info;
}
@@ -404,8 +429,10 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
* Get buffer for union otx2_cpt_res_s response
* structure and its physical address
*/
- info->completion_addr = info->in_buffer + sg_len;
- info->comp_baddr = info->dptr_baddr + sg_len;
+ info->completion_addr = PTR_ALIGN((info->in_buffer + sg_len),
+ OTX2_CPT_RES_ADDR_ALIGN);
+ info->comp_baddr = ALIGN((info->dptr_baddr + sg_len),
+ OTX2_CPT_RES_ADDR_ALIGN);
return info;
--
2.34.1
^ permalink raw reply related [flat|nested] 9+ messages in thread