* [PATCH RESEND] crypto: gf128mul - remove dead gf128mul_64k_lle code
From: Alex Cope @ 2016-11-09 1:16 UTC (permalink / raw)
To: linux-crypto; +Cc: Alex Cope
This code is unlikely to be useful in the future because transforms
don't know how often keys will be changed, new algorithms are unlikely
to use lle representation, and tables should be replaced with
carryless multiplication instructions when available.
Signed-off-by: Alex Cope <alexcope@google.com>
---
crypto/gf128mul.c | 55 -----------------------------------------------
include/crypto/gf128mul.h | 13 ++++++-----
2 files changed, 6 insertions(+), 62 deletions(-)
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 5276607..57c85dd 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -263,48 +263,6 @@ EXPORT_SYMBOL(gf128mul_bbe);
* t[1][BYTE] contains g*x^8*BYTE
* ..
* t[15][BYTE] contains g*x^120*BYTE */
-struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
-{
- struct gf128mul_64k *t;
- int i, j, k;
-
- t = kzalloc(sizeof(*t), GFP_KERNEL);
- if (!t)
- goto out;
-
- for (i = 0; i < 16; i++) {
- t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
- if (!t->t[i]) {
- gf128mul_free_64k(t);
- t = NULL;
- goto out;
- }
- }
-
- t->t[0]->t[128] = *g;
- for (j = 64; j > 0; j >>= 1)
- gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
-
- for (i = 0;;) {
- for (j = 2; j < 256; j += j)
- for (k = 1; k < j; ++k)
- be128_xor(&t->t[i]->t[j + k],
- &t->t[i]->t[j], &t->t[i]->t[k]);
-
- if (++i >= 16)
- break;
-
- for (j = 128; j > 0; j >>= 1) {
- t->t[i]->t[j] = t->t[i - 1]->t[j];
- gf128mul_x8_lle(&t->t[i]->t[j]);
- }
- }
-
-out:
- return t;
-}
-EXPORT_SYMBOL(gf128mul_init_64k_lle);
-
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
{
struct gf128mul_64k *t;
@@ -357,19 +315,6 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
}
EXPORT_SYMBOL(gf128mul_free_64k);
-void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
-{
- u8 *ap = (u8 *)a;
- be128 r[1];
- int i;
-
- *r = t->t[0]->t[ap[0]];
- for (i = 1; i < 16; ++i)
- be128_xor(r, r, &t->t[i]->t[ap[i]]);
- *a = *r;
-}
-EXPORT_SYMBOL(gf128mul_64k_lle);
-
void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
{
u8 *ap = (u8 *)a;
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h
index da2530e..b611aa9 100644
--- a/include/crypto/gf128mul.h
+++ b/include/crypto/gf128mul.h
@@ -181,20 +181,19 @@ static inline void gf128mul_free_4k(struct gf128mul_4k *t)
}
-/* 64k table optimization, implemented for lle and bbe */
+/* 64k table optimization, implemented for bbe */
struct gf128mul_64k {
struct gf128mul_4k *t[16];
};
-/* first initialize with the constant factor with which you
- * want to multiply and then call gf128_64k_lle with the other
- * factor in the first argument, the table in the second and a
- * scratch register in the third. Afterwards *a = *r. */
-struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g);
+/* First initialize with the constant factor with which you
+ * want to multiply and then call gf128mul_64k_bbe with the other
+ * factor in the first argument, and the table in the second.
+ * Afterwards, the result is stored in *a.
+ */
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
void gf128mul_free_64k(struct gf128mul_64k *t);
-void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t);
void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
#endif /* _CRYPTO_GF128MUL_H */
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related
* Re: [PATCH] crypto: rsa: rename two rsa key files
From: yjin @ 2016-11-09 6:39 UTC (permalink / raw)
To: Herbert Xu; +Cc: davem, linux-kernel, linux-crypto, jinyanjiang
In-Reply-To: <20161108120900.GA5267@gondor.apana.org.au>
Thanks for Herbert's reminder.
I have drop this patch in a previous mail.
Regards!
Yanjiang
On 2016年11月08日 20:09, Herbert Xu wrote:
> yanjiang.jin@windriver.com wrote:
>> From: Yanjiang Jin <yanjiang.jin@windriver.com>
>>
>> This is to eliminate the below compile error:
>>
>> crypto/rsa_helper.c:19:29: fatal error: rsaprivkey-asn1.h: No such file or directory
>> #include "rsaprivkey-asn1.h"
>> ^
>> compilation terminated.
>>
>> Signed-off-by: Yanjiang Jin <yanjiang.jin@windriver.com>
> This patch is bogus. The header files are meant to be generated.
> Please find out why they are not being generated in your case.
>
> Thanks,
^ permalink raw reply
* [PATCH 01/14] crypto: caam - fix AEAD givenc descriptors
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto, Alex Porosanu
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
From: Alex Porosanu <alexandru.porosanu@nxp.com>
The AEAD givenc descriptor relies on moving the IV through the
output FIFO and then back to the CTX2 for authentication. The
SEQ FIFO STORE could be scheduled before the data can be
read from OFIFO, especially since the SEQ FIFO LOAD needs
to wait for the SEQ FIFO LOAD SKIP to finish first. The
SKIP takes more time when the input is SG than when it's
a contiguous buffer. If the SEQ FIFO LOAD is not scheduled
before the STORE, the DECO will hang waiting for data
to be available in the OFIFO so it can be transferred to C2.
In order to overcome this, first force transfer of IV to C2
by starting the "cryptlen" transfer first and then starting to
store data from OFIFO to the output buffer.
Fixes: 1acebad3d8db8 ("crypto: caam - faster aead implementation")
Cc: <stable@vger.kernel.org> # 3.2+
Signed-off-by: Alex Porosanu <alexandru.porosanu@nxp.com>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamalg.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 8de85dfb1b04..5317d8cad44d 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -736,7 +736,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
/* Will read cryptlen */
append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
- aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
+ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
+ FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
+ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
/* Write ICV */
append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
--
2.4.4
^ permalink raw reply related
* [PATCH 04/14] crypto: caam - fix sparse warnings
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
Fix the following sparse warning (note that endianness issues
are not not addressed in current patch):
drivers/crypto/caam/ctrl.c:388:24: warning: incorrect type in argument 1 (different address spaces)
drivers/crypto/caam/ctrl.c:388:24: expected void [noderef] <asn:2>*reg
drivers/crypto/caam/ctrl.c:388:24: got unsigned int *<noident>
drivers/crypto/caam/ctrl.c:390:24: warning: incorrect type in argument 1 (different address spaces)
drivers/crypto/caam/ctrl.c:390:24: expected void [noderef] <asn:2>*reg
drivers/crypto/caam/ctrl.c:390:24: got unsigned int *<noident>
drivers/crypto/caam/ctrl.c:548:24: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:548:24: expected struct caam_ctrl [noderef] <asn:2>*ctrl
drivers/crypto/caam/ctrl.c:548:24: got struct caam_ctrl *<noident>
drivers/crypto/caam/ctrl.c:550:30: warning: cast removes address space of expression
drivers/crypto/caam/ctrl.c:549:26: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:549:26: expected struct caam_assurance [noderef] <asn:2>*assure
drivers/crypto/caam/ctrl.c:549:26: got struct caam_assurance *<noident>
drivers/crypto/caam/ctrl.c:554:28: warning: cast removes address space of expression
drivers/crypto/caam/ctrl.c:553:24: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:553:24: expected struct caam_deco [noderef] <asn:2>*deco
drivers/crypto/caam/ctrl.c:553:24: got struct caam_deco *<noident>
drivers/crypto/caam/ctrl.c:634:48: warning: cast removes address space of expression
drivers/crypto/caam/ctrl.c:633:44: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:633:44: expected struct caam_job_ring [noderef] <asn:2>*<noident>
drivers/crypto/caam/ctrl.c:633:44: got struct caam_job_ring *<noident>
drivers/crypto/caam/ctrl.c:648:34: warning: cast removes address space of expression
drivers/crypto/caam/ctrl.c:647:30: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:647:30: expected struct caam_queue_if [noderef] <asn:2>*qi
drivers/crypto/caam/ctrl.c:647:30: got struct caam_queue_if *<noident>
drivers/crypto/caam/ctrl.c:806:37: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:806:37: expected void *data
drivers/crypto/caam/ctrl.c:806:37: got unsigned int [noderef] <asn:2>*
drivers/crypto/caam/ctrl.c:814:38: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:814:38: expected void *data
drivers/crypto/caam/ctrl.c:814:38: got unsigned int [noderef] <asn:2>*
drivers/crypto/caam/ctrl.c:822:38: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/ctrl.c:822:38: expected void *data
drivers/crypto/caam/ctrl.c:822:38: got unsigned int [noderef] <asn:2>*
drivers/crypto/caam/jr.c:492:23: warning: incorrect type in assignment (different address spaces)
drivers/crypto/caam/jr.c:492:23: expected struct caam_job_ring [noderef] <asn:2>*rregs
drivers/crypto/caam/jr.c:492:23: got struct caam_job_ring *<noident>
drivers/crypto/caam/caampkc.c:398:35: warning: Using plain integer as NULL pointer
drivers/crypto/caam/caampkc.c:444:35: warning: Using plain integer as NULL pointer
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caampkc.c | 4 ++--
drivers/crypto/caam/ctrl.c | 40 +++++++++++++++++-----------------------
drivers/crypto/caam/jr.c | 2 +-
3 files changed, 20 insertions(+), 26 deletions(-)
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 851015e652b8..32100c4851dd 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -395,7 +395,7 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
unsigned int keylen)
{
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
- struct rsa_key raw_key = {0};
+ struct rsa_key raw_key = {NULL};
struct caam_rsa_key *rsa_key = &ctx->key;
int ret;
@@ -441,7 +441,7 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
unsigned int keylen)
{
struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
- struct rsa_key raw_key = {0};
+ struct rsa_key raw_key = {NULL};
struct caam_rsa_key *rsa_key = &ctx->key;
int ret;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index a79937d68c26..be62a7f482ac 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -365,11 +365,8 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
*/
val = (rd_reg32(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK)
>> RTSDCTL_ENT_DLY_SHIFT;
- if (ent_delay <= val) {
- /* put RNG4 into run mode */
- clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0);
- return;
- }
+ if (ent_delay <= val)
+ goto start_rng;
val = rd_reg32(&r4tst->rtsdctl);
val = (val & ~RTSDCTL_ENT_DLY_MASK) |
@@ -381,15 +378,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
/* read the control register */
val = rd_reg32(&r4tst->rtmctl);
+start_rng:
/*
* select raw sampling in both entropy shifter
- * and statistical checker
+ * and statistical checker; ; put RNG4 into run mode
*/
- clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC);
- /* put RNG4 into run mode */
- clrsetbits_32(&val, RTMCTL_PRGM, 0);
- /* write back the control register */
- wr_reg32(&r4tst->rtmctl, val);
+ clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
}
/**
@@ -545,13 +539,13 @@ static int caam_probe(struct platform_device *pdev)
else
BLOCK_OFFSET = PG_SIZE_64K;
- ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl;
- ctrlpriv->assure = (struct caam_assurance __force *)
- ((uint8_t *)ctrl +
+ ctrlpriv->ctrl = (struct caam_ctrl __iomem __force *)ctrl;
+ ctrlpriv->assure = (struct caam_assurance __iomem __force *)
+ ((__force uint8_t *)ctrl +
BLOCK_OFFSET * ASSURE_BLOCK_NUMBER
);
- ctrlpriv->deco = (struct caam_deco __force *)
- ((uint8_t *)ctrl +
+ ctrlpriv->deco = (struct caam_deco __iomem __force *)
+ ((__force uint8_t *)ctrl +
BLOCK_OFFSET * DECO_BLOCK_NUMBER
);
@@ -630,8 +624,8 @@ static int caam_probe(struct platform_device *pdev)
ring);
continue;
}
- ctrlpriv->jr[ring] = (struct caam_job_ring __force *)
- ((uint8_t *)ctrl +
+ ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
+ ((__force uint8_t *)ctrl +
(ring + JR_BLOCK_NUMBER) *
BLOCK_OFFSET
);
@@ -644,8 +638,8 @@ static int caam_probe(struct platform_device *pdev)
!!(rd_reg32(&ctrl->perfmon.comp_parms_ms) &
CTPR_MS_QI_MASK);
if (ctrlpriv->qi_present) {
- ctrlpriv->qi = (struct caam_queue_if __force *)
- ((uint8_t *)ctrl +
+ ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
+ ((__force uint8_t *)ctrl +
BLOCK_OFFSET * QI_BLOCK_NUMBER
);
/* This is all that's required to physically enable QI */
@@ -803,7 +797,7 @@ static int caam_probe(struct platform_device *pdev)
&caam_fops_u32_ro);
/* Internal covering keys (useful in non-secure mode only) */
- ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0];
+ ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];
ctrlpriv->ctl_kek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
ctrlpriv->ctl_kek = debugfs_create_blob("kek",
S_IRUSR |
@@ -811,7 +805,7 @@ static int caam_probe(struct platform_device *pdev)
ctrlpriv->ctl,
&ctrlpriv->ctl_kek_wrap);
- ctrlpriv->ctl_tkek_wrap.data = &ctrlpriv->ctrl->tkek[0];
+ ctrlpriv->ctl_tkek_wrap.data = (__force void *)&ctrlpriv->ctrl->tkek[0];
ctrlpriv->ctl_tkek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
ctrlpriv->ctl_tkek = debugfs_create_blob("tkek",
S_IRUSR |
@@ -819,7 +813,7 @@ static int caam_probe(struct platform_device *pdev)
ctrlpriv->ctl,
&ctrlpriv->ctl_tkek_wrap);
- ctrlpriv->ctl_tdsk_wrap.data = &ctrlpriv->ctrl->tdsk[0];
+ ctrlpriv->ctl_tdsk_wrap.data = (__force void *)&ctrlpriv->ctrl->tdsk[0];
ctrlpriv->ctl_tdsk_wrap.size = KEK_KEY_SIZE * sizeof(u32);
ctrlpriv->ctl_tdsk = debugfs_create_blob("tdsk",
S_IRUSR |
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 757c27f9953d..7331ea734f37 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -489,7 +489,7 @@ static int caam_jr_probe(struct platform_device *pdev)
return -ENOMEM;
}
- jrpriv->rregs = (struct caam_job_ring __force *)ctrl;
+ jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
if (sizeof(dma_addr_t) == sizeof(u64))
if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring"))
--
2.4.4
^ permalink raw reply related
* Re: [PATCH 11/14] Revert "crypto: caam - get rid of tasklet"
From: Russell King - ARM Linux @ 2016-11-09 8:53 UTC (permalink / raw)
To: Horia Geantă, Thomas Gleixner
Cc: Herbert Xu, David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-12-git-send-email-horia.geanta@nxp.com>
Please include Thomas in this.
On Wed, Nov 09, 2016 at 10:46:21AM +0200, Horia Geantă wrote:
> This reverts commit 66d2e2028091a074aa1290d2eeda5ddb1a6c329c.
>
> Quoting from Russell's findings:
> https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg21136.html
>
> [quote]
> Okay, I've re-tested, using a different way of measuring, because using
> openssl speed is impractical for off-loaded engines. I've decided to
> use this way to measure the performance:
>
> dd if=/dev/zero bs=1048576 count=128 | /usr/bin/time openssl dgst -md5
>
> For the threaded IRQs case gives:
>
> 0.05user 2.74system 0:05.30elapsed 52%CPU (0avgtext+0avgdata 2400maxresident)k
> 0.06user 2.52system 0:05.18elapsed 49%CPU (0avgtext+0avgdata 2404maxresident)k
> 0.12user 2.60system 0:05.61elapsed 48%CPU (0avgtext+0avgdata 2460maxresident)k
> => 5.36s => 25.0MB/s
>
> and the tasklet case:
>
> 0.08user 2.53system 0:04.83elapsed 54%CPU (0avgtext+0avgdata 2468maxresident)k
> 0.09user 2.47system 0:05.16elapsed 49%CPU (0avgtext+0avgdata 2368maxresident)k
> 0.10user 2.51system 0:04.87elapsed 53%CPU (0avgtext+0avgdata 2460maxresident)k
> => 4.95 => 27.1MB/s
>
> which corresponds to an 8% slowdown for the threaded IRQ case. So,
> tasklets are indeed faster than threaded IRQs.
>
> [...]
>
> I think I've proven from the above that this patch needs to be reverted
> due to the performance regression, and that there _is_ most definitely
> a deterimental effect of switching from tasklets to threaded IRQs.
> [/quote]
>
> Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
> ---
> drivers/crypto/caam/intern.h | 1 +
> drivers/crypto/caam/jr.c | 25 ++++++++++++++++---------
> 2 files changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
> index 5d4c05074a5c..e2bcacc1a921 100644
> --- a/drivers/crypto/caam/intern.h
> +++ b/drivers/crypto/caam/intern.h
> @@ -41,6 +41,7 @@ struct caam_drv_private_jr {
> struct device *dev;
> int ridx;
> struct caam_job_ring __iomem *rregs; /* JobR's register space */
> + struct tasklet_struct irqtask;
> int irq; /* One per queue */
>
> /* Number of scatterlist crypt transforms active on the JobR */
> diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
> index 7331ea734f37..c8604dfadbf5 100644
> --- a/drivers/crypto/caam/jr.c
> +++ b/drivers/crypto/caam/jr.c
> @@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev)
>
> ret = caam_reset_hw_jr(dev);
>
> + tasklet_kill(&jrp->irqtask);
> +
> /* Release interrupt */
> free_irq(jrp->irq, dev);
>
> @@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
>
> /*
> * Check the output ring for ready responses, kick
> - * the threaded irq if jobs done.
> + * tasklet if jobs done.
> */
> irqstate = rd_reg32(&jrp->rregs->jrintstatus);
> if (!irqstate)
> @@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
> /* Have valid interrupt at this point, just ACK and trigger */
> wr_reg32(&jrp->rregs->jrintstatus, irqstate);
>
> - return IRQ_WAKE_THREAD;
> + preempt_disable();
> + tasklet_schedule(&jrp->irqtask);
> + preempt_enable();
> +
> + return IRQ_HANDLED;
> }
>
> -static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
> +/* Deferred service handler, run as interrupt-fired tasklet */
> +static void caam_jr_dequeue(unsigned long devarg)
> {
> int hw_idx, sw_idx, i, head, tail;
> - struct device *dev = st_dev;
> + struct device *dev = (struct device *)devarg;
> struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
> void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
> u32 *userdesc, userstatus;
> @@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
>
> /* reenable / unmask IRQs */
> clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
> -
> - return IRQ_HANDLED;
> }
>
> /**
> @@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev)
>
> jrp = dev_get_drvdata(dev);
>
> + tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
> +
> /* Connect job ring interrupt handler. */
> - error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
> - caam_jr_threadirq, IRQF_SHARED,
> - dev_name(dev), dev);
> + error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
> + dev_name(dev), dev);
> if (error) {
> dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
> jrp->ridx, jrp->irq);
> @@ -454,6 +460,7 @@ static int caam_jr_init(struct device *dev)
> out_free_irq:
> free_irq(jrp->irq, dev);
> out_kill_deq:
> + tasklet_kill(&jrp->irqtask);
> return error;
> }
>
> --
> 2.4.4
>
--
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
^ permalink raw reply
* [PATCH 03/14] crypto: caam - desc.h fixes
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
1. fix HDR_START_IDX_MASK, HDR_SD_SHARE_MASK, HDR_JD_SHARE_MASK
Define HDR_START_IDX_MASK consistently with the other masks:
mask = bitmask << offset
2. OP_ALG_TYPE_CLASS1 and OP_ALG_TYPE_CLASS2 must be shifted.
3. fix FIFO_STORE output data type value for AFHA S-Box
4. fix OPERATION pkha modular arithmetic source mask
5. rename LDST_SRCDST_WORD_CLASS1_ICV_SZ to
LDST_SRCDST_WORD_CLASS1_IV_SZ (it refers to IV, not ICV).
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/desc.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 513b6646bb36..61059abef737 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -90,8 +90,8 @@ struct sec4_sg_entry {
#define HDR_ZRO 0x00008000
/* Start Index or SharedDesc Length */
-#define HDR_START_IDX_MASK 0x3f
#define HDR_START_IDX_SHIFT 16
+#define HDR_START_IDX_MASK (0x3f << HDR_START_IDX_SHIFT)
/* If shared descriptor header, 6-bit length */
#define HDR_DESCLEN_SHR_MASK 0x3f
@@ -121,10 +121,10 @@ struct sec4_sg_entry {
#define HDR_PROP_DNR 0x00000800
/* JobDesc/SharedDesc share property */
-#define HDR_SD_SHARE_MASK 0x03
#define HDR_SD_SHARE_SHIFT 8
-#define HDR_JD_SHARE_MASK 0x07
+#define HDR_SD_SHARE_MASK (0x03 << HDR_SD_SHARE_SHIFT)
#define HDR_JD_SHARE_SHIFT 8
+#define HDR_JD_SHARE_MASK (0x07 << HDR_JD_SHARE_SHIFT)
#define HDR_SHARE_NEVER (0x00 << HDR_SD_SHARE_SHIFT)
#define HDR_SHARE_WAIT (0x01 << HDR_SD_SHARE_SHIFT)
@@ -235,7 +235,7 @@ struct sec4_sg_entry {
#define LDST_SRCDST_WORD_DECO_MATH2 (0x0a << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DECO_AAD_SZ (0x0b << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DECO_MATH3 (0x0b << LDST_SRCDST_SHIFT)
-#define LDST_SRCDST_WORD_CLASS1_ICV_SZ (0x0c << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_CLASS1_IV_SZ (0x0c << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_ALTDS_CLASS1 (0x0f << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_PKHA_A_SZ (0x10 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT)
@@ -400,7 +400,7 @@ struct sec4_sg_entry {
#define FIFOST_TYPE_PKHA_N (0x08 << FIFOST_TYPE_SHIFT)
#define FIFOST_TYPE_PKHA_A (0x0c << FIFOST_TYPE_SHIFT)
#define FIFOST_TYPE_PKHA_B (0x0d << FIFOST_TYPE_SHIFT)
-#define FIFOST_TYPE_AF_SBOX_JKEK (0x10 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_AF_SBOX_JKEK (0x20 << FIFOST_TYPE_SHIFT)
#define FIFOST_TYPE_AF_SBOX_TKEK (0x21 << FIFOST_TYPE_SHIFT)
#define FIFOST_TYPE_PKHA_E_JKEK (0x22 << FIFOST_TYPE_SHIFT)
#define FIFOST_TYPE_PKHA_E_TKEK (0x23 << FIFOST_TYPE_SHIFT)
@@ -1107,8 +1107,8 @@ struct sec4_sg_entry {
/* For non-protocol/alg-only op commands */
#define OP_ALG_TYPE_SHIFT 24
#define OP_ALG_TYPE_MASK (0x7 << OP_ALG_TYPE_SHIFT)
-#define OP_ALG_TYPE_CLASS1 2
-#define OP_ALG_TYPE_CLASS2 4
+#define OP_ALG_TYPE_CLASS1 (2 << OP_ALG_TYPE_SHIFT)
+#define OP_ALG_TYPE_CLASS2 (4 << OP_ALG_TYPE_SHIFT)
#define OP_ALG_ALGSEL_SHIFT 16
#define OP_ALG_ALGSEL_MASK (0xff << OP_ALG_ALGSEL_SHIFT)
@@ -1249,7 +1249,7 @@ struct sec4_sg_entry {
#define OP_ALG_PKMODE_MOD_PRIMALITY 0x00f
/* PKHA mode copy-memory functions */
-#define OP_ALG_PKMODE_SRC_REG_SHIFT 13
+#define OP_ALG_PKMODE_SRC_REG_SHIFT 17
#define OP_ALG_PKMODE_SRC_REG_MASK (7 << OP_ALG_PKMODE_SRC_REG_SHIFT)
#define OP_ALG_PKMODE_DST_REG_SHIFT 10
#define OP_ALG_PKMODE_DST_REG_MASK (7 << OP_ALG_PKMODE_DST_REG_SHIFT)
--
2.4.4
^ permalink raw reply related
* [PATCH 11/14] Revert "crypto: caam - get rid of tasklet"
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto, Russell King
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
This reverts commit 66d2e2028091a074aa1290d2eeda5ddb1a6c329c.
Quoting from Russell's findings:
https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg21136.html
[quote]
Okay, I've re-tested, using a different way of measuring, because using
openssl speed is impractical for off-loaded engines. I've decided to
use this way to measure the performance:
dd if=/dev/zero bs=1048576 count=128 | /usr/bin/time openssl dgst -md5
For the threaded IRQs case gives:
0.05user 2.74system 0:05.30elapsed 52%CPU (0avgtext+0avgdata 2400maxresident)k
0.06user 2.52system 0:05.18elapsed 49%CPU (0avgtext+0avgdata 2404maxresident)k
0.12user 2.60system 0:05.61elapsed 48%CPU (0avgtext+0avgdata 2460maxresident)k
=> 5.36s => 25.0MB/s
and the tasklet case:
0.08user 2.53system 0:04.83elapsed 54%CPU (0avgtext+0avgdata 2468maxresident)k
0.09user 2.47system 0:05.16elapsed 49%CPU (0avgtext+0avgdata 2368maxresident)k
0.10user 2.51system 0:04.87elapsed 53%CPU (0avgtext+0avgdata 2460maxresident)k
=> 4.95 => 27.1MB/s
which corresponds to an 8% slowdown for the threaded IRQ case. So,
tasklets are indeed faster than threaded IRQs.
[...]
I think I've proven from the above that this patch needs to be reverted
due to the performance regression, and that there _is_ most definitely
a deterimental effect of switching from tasklets to threaded IRQs.
[/quote]
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/intern.h | 1 +
drivers/crypto/caam/jr.c | 25 ++++++++++++++++---------
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 5d4c05074a5c..e2bcacc1a921 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -41,6 +41,7 @@ struct caam_drv_private_jr {
struct device *dev;
int ridx;
struct caam_job_ring __iomem *rregs; /* JobR's register space */
+ struct tasklet_struct irqtask;
int irq; /* One per queue */
/* Number of scatterlist crypt transforms active on the JobR */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 7331ea734f37..c8604dfadbf5 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev)
ret = caam_reset_hw_jr(dev);
+ tasklet_kill(&jrp->irqtask);
+
/* Release interrupt */
free_irq(jrp->irq, dev);
@@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
/*
* Check the output ring for ready responses, kick
- * the threaded irq if jobs done.
+ * tasklet if jobs done.
*/
irqstate = rd_reg32(&jrp->rregs->jrintstatus);
if (!irqstate)
@@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
/* Have valid interrupt at this point, just ACK and trigger */
wr_reg32(&jrp->rregs->jrintstatus, irqstate);
- return IRQ_WAKE_THREAD;
+ preempt_disable();
+ tasklet_schedule(&jrp->irqtask);
+ preempt_enable();
+
+ return IRQ_HANDLED;
}
-static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void caam_jr_dequeue(unsigned long devarg)
{
int hw_idx, sw_idx, i, head, tail;
- struct device *dev = st_dev;
+ struct device *dev = (struct device *)devarg;
struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
u32 *userdesc, userstatus;
@@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
/* reenable / unmask IRQs */
clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
-
- return IRQ_HANDLED;
}
/**
@@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev)
jrp = dev_get_drvdata(dev);
+ tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
+
/* Connect job ring interrupt handler. */
- error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
- caam_jr_threadirq, IRQF_SHARED,
- dev_name(dev), dev);
+ error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
+ dev_name(dev), dev);
if (error) {
dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
jrp->ridx, jrp->irq);
@@ -454,6 +460,7 @@ static int caam_jr_init(struct device *dev)
out_free_irq:
free_irq(jrp->irq, dev);
out_kill_deq:
+ tasklet_kill(&jrp->irqtask);
return error;
}
--
2.4.4
^ permalink raw reply related
* [PATCH 02/14] crypto: caam - completely remove error propagation handling
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
Commit 4464a7d4f53d756101291da26563f37f7fce40f3
("crypto: caam - remove error propagation handling")
removed error propagation handling only from caamalg.
Do this in all other places: caamhash, caamrng.
Update descriptors' lengths appropriately.
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamhash.c | 5 +----
drivers/crypto/caam/caamrng.c | 5 +----
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 660dc206969f..51990dd56024 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -72,7 +72,7 @@
#define CAAM_MAX_HASH_DIGEST_SIZE SHA512_DIGEST_SIZE
/* length of descriptors text */
-#define DESC_AHASH_BASE (4 * CAAM_CMD_SZ)
+#define DESC_AHASH_BASE (3 * CAAM_CMD_SZ)
#define DESC_AHASH_UPDATE_LEN (6 * CAAM_CMD_SZ)
#define DESC_AHASH_UPDATE_FIRST_LEN (DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
#define DESC_AHASH_FINAL_LEN (DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
@@ -246,9 +246,6 @@ static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
set_jump_tgt_here(desc, key_jump_cmd);
}
-
- /* Propagate errors from shared to job descriptor */
- append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
}
/*
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 9b92af2c7241..bb1c118b2d1b 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -52,7 +52,7 @@
/* length of descriptors */
#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
-#define DESC_RNG_LEN (4 * CAAM_CMD_SZ)
+#define DESC_RNG_LEN (3 * CAAM_CMD_SZ)
/* Buffer, its dma address and lock */
struct buf_data {
@@ -196,9 +196,6 @@ static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
init_sh_desc(desc, HDR_SHARE_SERIAL);
- /* Propagate errors from shared to job descriptor */
- append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-
/* Generate random bytes */
append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
--
2.4.4
^ permalink raw reply related
* [PATCH 10/14] crypto: caam - fix DMA API mapping leak in ablkcipher code
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
alkcipher_edesc_alloc() and ablkcipher_giv_edesc_alloc() don't
free / unmap resources on error path:
- dmap_map_sg() could fail, thus make sure the return value is checked
- unmap DMA mappings in case of error
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamalg.c | 44 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 34dfcdb7ff84..8db54b090d39 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -2600,16 +2600,33 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
if (likely(req->src == req->dst)) {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL);
+ if (unlikely(!sgc)) {
+ dev_err(jrdev, "unable to map source\n");
+ return ERR_PTR(-ENOMEM);
+ }
} else {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE);
+ if (unlikely(!sgc)) {
+ dev_err(jrdev, "unable to map source\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE);
+ if (unlikely(!sgc)) {
+ dev_err(jrdev, "unable to map destination\n");
+ dma_unmap_sg(jrdev, req->src, src_nents ? : 1,
+ DMA_TO_DEVICE);
+ return ERR_PTR(-ENOMEM);
+ }
}
iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
+ caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+ 0, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -2629,6 +2646,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
+ caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+ iv_dma, ivsize, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -2655,6 +2674,9 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
sec4_sg_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
dev_err(jrdev, "unable to map S/G table\n");
+ caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+ iv_dma, ivsize, 0, 0);
+ kfree(edesc);
return ERR_PTR(-ENOMEM);
}
@@ -2776,11 +2798,26 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
if (likely(req->src == req->dst)) {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_BIDIRECTIONAL);
+ if (unlikely(!sgc)) {
+ dev_err(jrdev, "unable to map source\n");
+ return ERR_PTR(-ENOMEM);
+ }
} else {
sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
DMA_TO_DEVICE);
+ if (unlikely(!sgc)) {
+ dev_err(jrdev, "unable to map source\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
DMA_FROM_DEVICE);
+ if (unlikely(!sgc)) {
+ dev_err(jrdev, "unable to map destination\n");
+ dma_unmap_sg(jrdev, req->src, src_nents ? : 1,
+ DMA_TO_DEVICE);
+ return ERR_PTR(-ENOMEM);
+ }
}
/*
@@ -2790,6 +2827,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, iv_dma)) {
dev_err(jrdev, "unable to map IV\n");
+ caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+ 0, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -2805,6 +2844,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
GFP_DMA | flags);
if (!edesc) {
dev_err(jrdev, "could not allocate extended descriptor\n");
+ caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+ iv_dma, ivsize, 0, 0);
return ERR_PTR(-ENOMEM);
}
@@ -2832,6 +2873,9 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
sec4_sg_bytes, DMA_TO_DEVICE);
if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
dev_err(jrdev, "unable to map S/G table\n");
+ caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+ iv_dma, ivsize, 0, 0);
+ kfree(edesc);
return ERR_PTR(-ENOMEM);
}
edesc->iv_dma = iv_dma;
--
2.4.4
^ permalink raw reply related
* [PATCH 00/14] crypto: caam - fixes, clean-up
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
This is a batch of fixes and clean-up for caam driver.
Only the fix for the givencrypt shared descriptors is high-impact
and thus sent to -stable.
Thanks,
Horia
Alex Porosanu (1):
crypto: caam - fix AEAD givenc descriptors
Horia Geantă (13):
crypto: caam - completely remove error propagation handling
crypto: caam - desc.h fixes
crypto: caam - fix sparse warnings
crypto: caam - fix smatch warnings
crypto: caam - remove unused may_sleep in dbg_dump_sg()
crypto: caam - remove unused command from aead givencrypt
crypto: caam - trivial code clean-up
crypto: caam - remove unreachable code in report_ccb_status()
crypto: caam - fix DMA API mapping leak in ablkcipher code
Revert "crypto: caam - get rid of tasklet"
crypto: caam - move sec4_sg_entry to sg_sw_sec4.h
crypto: caam - constify pointer to descriptor buffer
crypto: caam - merge identical ahash_final/finup shared desc
drivers/crypto/caam/caamalg.c | 92 +++++++++++++++++++++++++--------------
drivers/crypto/caam/caamhash.c | 43 +++---------------
drivers/crypto/caam/caampkc.c | 4 +-
drivers/crypto/caam/caamrng.c | 10 ++---
drivers/crypto/caam/ctrl.c | 40 ++++++++---------
drivers/crypto/caam/desc.h | 22 ++++------
drivers/crypto/caam/desc_constr.h | 72 +++++++++++++++---------------
drivers/crypto/caam/error.c | 5 +--
drivers/crypto/caam/intern.h | 1 +
drivers/crypto/caam/jr.c | 27 +++++++-----
drivers/crypto/caam/sg_sw_sec4.h | 6 ++-
11 files changed, 158 insertions(+), 164 deletions(-)
--
2.4.4
^ permalink raw reply
* [PATCH 12/14] crypto: caam - move sec4_sg_entry to sg_sw_sec4.h
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
sec4_sg_entry structure is used only by helper functions in sg_sw_sec4.h.
Since SEC HW S/G entries are to be manipulated only indirectly, via these
functions, move sec4_sg_entry to the corresponding header.
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/desc.h | 6 ------
drivers/crypto/caam/sg_sw_sec4.h | 6 +++++-
2 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 61059abef737..2e6766a1573f 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -22,12 +22,6 @@
#define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */
#define SEC4_SG_OFFSET_MASK 0x00001fff
-struct sec4_sg_entry {
- u64 ptr;
- u32 len;
- u32 bpid_offset;
-};
-
/* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
#define MAX_CAAM_DESCSIZE 64
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 41cd5a356d05..6afa20c4a013 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -7,7 +7,11 @@
#include "regs.h"
-struct sec4_sg_entry;
+struct sec4_sg_entry {
+ u64 ptr;
+ u32 len;
+ u32 bpid_offset;
+};
/*
* convert single dma address to h/w link table format
--
2.4.4
^ permalink raw reply related
* [PATCH 07/14] crypto: caam - remove unused command from aead givencrypt
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
REG3 no longer needs to be updated, since it's not used after that.
This shared descriptor command is a leftover of the conversion to
AEAD interface.
Fixes: 479bcc7c5b9e1 "crypto: caam - Convert authenc to new AEAD interface"
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamalg.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index cb26a57c5aaf..1982dacce80e 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -75,7 +75,7 @@
#define DESC_AEAD_BASE (4 * CAAM_CMD_SZ)
#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
-#define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 9 * CAAM_CMD_SZ)
+#define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ)
/* Note: Nonce is counted in enckeylen */
#define DESC_AEAD_CTR_RFC3686_LEN (4 * CAAM_CMD_SZ)
@@ -697,9 +697,6 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
- /* ivsize + cryptlen = seqoutlen - authsize */
- append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
/* Skip assoc data */
append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
--
2.4.4
^ permalink raw reply related
* [PATCH 05/14] crypto: caam - fix smatch warnings
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
Fix the following smatch warnings:
drivers/crypto/caam/caamalg.c:2350 aead_edesc_alloc() warn: we tested 'src_nents' before and it was 'true'
drivers/crypto/caam/caamrng.c:351 caam_rng_init() error: no modifiers for allocation.
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamalg.c | 4 +---
drivers/crypto/caam/caamrng.c | 2 +-
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 5317d8cad44d..bc433edd34c4 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -2346,10 +2346,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
/* Check if data are contiguous. */
all_contig = !src_nents;
- if (!all_contig) {
- src_nents = src_nents ? : 1;
+ if (!all_contig)
sec4_sg_len = src_nents;
- }
sec4_sg_len += dst_nents;
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index bb1c118b2d1b..38bb2411afcf 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -348,7 +348,7 @@ static int __init caam_rng_init(void)
pr_err("Job Ring Device allocation for transform failed\n");
return PTR_ERR(dev);
}
- rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA);
+ rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
if (!rng_ctx) {
err = -ENOMEM;
goto free_caam_alloc;
--
2.4.4
^ permalink raw reply related
* [PATCH 06/14] crypto: caam - remove unused may_sleep in dbg_dump_sg()
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
Fixes: 5ecf8ef9103cb "crypto: caam - fix sg dump"
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamalg.c | 19 ++++++-------------
1 file changed, 6 insertions(+), 13 deletions(-)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index bc433edd34c4..cb26a57c5aaf 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -117,8 +117,7 @@
static void dbg_dump_sg(const char *level, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
- struct scatterlist *sg, size_t tlen, bool ascii,
- bool may_sleep)
+ struct scatterlist *sg, size_t tlen, bool ascii)
{
struct scatterlist *it;
void *it_page;
@@ -2033,7 +2032,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
edesc->src_nents > 1 ? 100 : ivsize, 1);
dbg_dump_sg(KERN_ERR, "dst @"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
- edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
+ edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
#endif
ablkcipher_unmap(jrdev, edesc, req);
@@ -2065,7 +2064,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
ivsize, 1);
dbg_dump_sg(KERN_ERR, "dst @"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
- edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
+ edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
#endif
ablkcipher_unmap(jrdev, edesc, req);
@@ -2220,15 +2219,13 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
int len, sec4_sg_index = 0;
#ifdef DEBUG
- bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->info,
ivsize, 1);
printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes);
dbg_dump_sg(KERN_ERR, "src @"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->src,
- edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
+ edesc->src_nents ? 100 : req->nbytes, 1);
#endif
len = desc_len(sh_desc);
@@ -2280,14 +2277,12 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
int len, sec4_sg_index = 0;
#ifdef DEBUG
- bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->info,
ivsize, 1);
dbg_dump_sg(KERN_ERR, "src @" __stringify(__LINE__) ": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->src,
- edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
+ edesc->src_nents ? 100 : req->nbytes, 1);
#endif
len = desc_len(sh_desc);
@@ -2556,11 +2551,9 @@ static int aead_decrypt(struct aead_request *req)
int ret = 0;
#ifdef DEBUG
- bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
DUMP_PREFIX_ADDRESS, 16, 4, req->src,
- req->assoclen + req->cryptlen, 1, may_sleep);
+ req->assoclen + req->cryptlen, 1);
#endif
/* allocate extended descriptor */
--
2.4.4
^ permalink raw reply related
* [PATCH 08/14] crypto: caam - trivial code clean-up
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
-replace offsetof with container_of
-remove unused "assoc_nents", "iv_dma" from aead_edesc
and fix comments
-remove unused CAAM_MAX_IV_LENGTH #define
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamalg.c | 16 ++++------------
drivers/crypto/caam/caamhash.c | 12 ++++--------
drivers/crypto/caam/caamrng.c | 3 +--
3 files changed, 9 insertions(+), 22 deletions(-)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 1982dacce80e..34dfcdb7ff84 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -62,8 +62,6 @@
#define CAAM_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + \
CTR_RFC3686_NONCE_SIZE + \
SHA512_DIGEST_SIZE * 2)
-/* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
-#define CAAM_MAX_IV_LENGTH 16
#define AEAD_DESC_JOB_IO_LEN (DESC_JOB_IO_LEN + CAAM_CMD_SZ * 2)
#define GCM_DESC_JOB_IO_LEN (AEAD_DESC_JOB_IO_LEN + \
@@ -1873,20 +1871,16 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
/*
* aead_edesc - s/w-extended aead descriptor
- * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
* @src_nents: number of segments in input scatterlist
* @dst_nents: number of segments in output scatterlist
- * @iv_dma: dma address of iv for checking continuity and link table
- * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
* @sec4_sg_bytes: length of dma mapped sec4_sg space
* @sec4_sg_dma: bus physical mapped address of h/w link table
+ * @sec4_sg: pointer to h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
*/
struct aead_edesc {
- int assoc_nents;
int src_nents;
int dst_nents;
- dma_addr_t iv_dma;
int sec4_sg_bytes;
dma_addr_t sec4_sg_dma;
struct sec4_sg_entry *sec4_sg;
@@ -1898,9 +1892,9 @@ struct aead_edesc {
* @src_nents: number of segments in input scatterlist
* @dst_nents: number of segments in output scatterlist
* @iv_dma: dma address of iv for checking continuity and link table
- * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
* @sec4_sg_bytes: length of dma mapped sec4_sg space
* @sec4_sg_dma: bus physical mapped address of h/w link table
+ * @sec4_sg: pointer to h/w link table
* @hw_desc: the h/w job descriptor followed by any referenced link tables
*/
struct ablkcipher_edesc {
@@ -2017,8 +2011,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
- edesc = (struct ablkcipher_edesc *)((char *)desc -
- offsetof(struct ablkcipher_edesc, hw_desc));
+ edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
@@ -2050,8 +2043,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
- edesc = (struct ablkcipher_edesc *)((char *)desc -
- offsetof(struct ablkcipher_edesc, hw_desc));
+ edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 51990dd56024..d5a06c6923c0 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -636,8 +636,7 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
- edesc = (struct ahash_edesc *)((char *)desc -
- offsetof(struct ahash_edesc, hw_desc));
+ edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
@@ -671,8 +670,7 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
- edesc = (struct ahash_edesc *)((char *)desc -
- offsetof(struct ahash_edesc, hw_desc));
+ edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
@@ -706,8 +704,7 @@ static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
- edesc = (struct ahash_edesc *)((char *)desc -
- offsetof(struct ahash_edesc, hw_desc));
+ edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
@@ -741,8 +738,7 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
#endif
- edesc = (struct ahash_edesc *)((char *)desc -
- offsetof(struct ahash_edesc, hw_desc));
+ edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 38bb2411afcf..41398da3edf4 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -100,8 +100,7 @@ static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
{
struct buf_data *bd;
- bd = (struct buf_data *)((char *)desc -
- offsetof(struct buf_data, hw_desc));
+ bd = container_of(desc, struct buf_data, hw_desc[0]);
if (err)
caam_jr_strstatus(jrdev, err);
--
2.4.4
^ permalink raw reply related
* [PATCH 14/14] crypto: caam - merge identical ahash_final/finup shared desc
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
Shared descriptors used by ahash_final() and ahash_finup()
are identical, thus get rid of one of them (sh_desc_finup).
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/caamhash.c | 26 +-------------------------
1 file changed, 1 insertion(+), 25 deletions(-)
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index d5a06c6923c0..86f360853502 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -103,12 +103,10 @@ struct caam_hash_ctx {
u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
- u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
dma_addr_t sh_desc_update_dma ____cacheline_aligned;
dma_addr_t sh_desc_update_first_dma;
dma_addr_t sh_desc_fin_dma;
dma_addr_t sh_desc_digest_dma;
- dma_addr_t sh_desc_finup_dma;
struct device *jrdev;
u32 alg_type;
u32 alg_op;
@@ -380,24 +378,6 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
desc_bytes(desc), 1);
#endif
- /* ahash_finup shared descriptor */
- desc = ctx->sh_desc_finup;
-
- ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
- OP_ALG_AS_FINALIZE, digestsize, ctx);
-
- ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) {
- dev_err(jrdev, "unable to map shared descriptor\n");
- return -ENOMEM;
- }
-#ifdef DEBUG
- print_hex_dump(KERN_ERR, "ahash finup shdesc@"__stringify(__LINE__)": ",
- DUMP_PREFIX_ADDRESS, 16, 4, desc,
- desc_bytes(desc), 1);
-#endif
-
/* ahash_digest shared descriptor */
desc = ctx->sh_desc_digest;
@@ -1071,7 +1051,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
/* allocate space for base edesc and hw desc commands, link tables */
edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
- ctx->sh_desc_finup, ctx->sh_desc_finup_dma,
+ ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
flags);
if (!edesc) {
dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
@@ -1886,10 +1866,6 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma,
desc_bytes(ctx->sh_desc_digest),
DMA_TO_DEVICE);
- if (ctx->sh_desc_finup_dma &&
- !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma))
- dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma,
- desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE);
caam_jr_free(ctx->jrdev);
}
--
2.4.4
^ permalink raw reply related
* [PATCH 09/14] crypto: caam - remove unreachable code in report_ccb_status()
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
ERRID is a 4-bit field.
Since err_id values are in [0..15] and err_id_list array size is 16,
the condition "err_id < ARRAY_SIZE(err_id_list)" is always true.
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/error.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 33e41ea83fcc..79a0cc70717f 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -146,10 +146,9 @@ static void report_ccb_status(struct device *jrdev, const u32 status,
strlen(rng_err_id_list[err_id])) {
/* RNG-only error */
err_str = rng_err_id_list[err_id];
- } else if (err_id < ARRAY_SIZE(err_id_list))
+ } else {
err_str = err_id_list[err_id];
- else
- snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id);
+ }
/*
* CCB ICV check failures are part of normal operation life;
--
2.4.4
^ permalink raw reply related
* [PATCH 13/14] crypto: caam - constify pointer to descriptor buffer
From: Horia Geantă @ 2016-11-09 8:46 UTC (permalink / raw)
To: Herbert Xu; +Cc: David S. Miller, linux-crypto
In-Reply-To: <1478681184-9442-1-git-send-email-horia.geanta@nxp.com>
The pointer to the descriptor buffer is not touched,
it always points to start of the descriptor buffer.
Thus, make it const.
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
drivers/crypto/caam/desc_constr.h | 72 +++++++++++++++++++++------------------
1 file changed, 38 insertions(+), 34 deletions(-)
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index a8cd8a78ec1f..354da735af62 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -33,38 +33,39 @@
extern bool caam_little_end;
-static inline int desc_len(u32 *desc)
+static inline int desc_len(u32 * const desc)
{
return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
}
-static inline int desc_bytes(void *desc)
+static inline int desc_bytes(void * const desc)
{
return desc_len(desc) * CAAM_CMD_SZ;
}
-static inline u32 *desc_end(u32 *desc)
+static inline u32 *desc_end(u32 * const desc)
{
return desc + desc_len(desc);
}
-static inline void *sh_desc_pdb(u32 *desc)
+static inline void *sh_desc_pdb(u32 * const desc)
{
return desc + 1;
}
-static inline void init_desc(u32 *desc, u32 options)
+static inline void init_desc(u32 * const desc, u32 options)
{
*desc = cpu_to_caam32((options | HDR_ONE) + 1);
}
-static inline void init_sh_desc(u32 *desc, u32 options)
+static inline void init_sh_desc(u32 * const desc, u32 options)
{
PRINT_POS;
init_desc(desc, CMD_SHARED_DESC_HDR | options);
}
-static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+static inline void init_sh_desc_pdb(u32 * const desc, u32 options,
+ size_t pdb_bytes)
{
u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
@@ -72,19 +73,20 @@ static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
options);
}
-static inline void init_job_desc(u32 *desc, u32 options)
+static inline void init_job_desc(u32 * const desc, u32 options)
{
init_desc(desc, CMD_DESC_HDR | options);
}
-static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+static inline void init_job_desc_pdb(u32 * const desc, u32 options,
+ size_t pdb_bytes)
{
u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options);
}
-static inline void append_ptr(u32 *desc, dma_addr_t ptr)
+static inline void append_ptr(u32 * const desc, dma_addr_t ptr)
{
dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
@@ -94,8 +96,8 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr)
CAAM_PTR_SZ / CAAM_CMD_SZ);
}
-static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
- u32 options)
+static inline void init_job_desc_shared(u32 * const desc, dma_addr_t ptr,
+ int len, u32 options)
{
PRINT_POS;
init_job_desc(desc, HDR_SHARED | options |
@@ -103,7 +105,7 @@ static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
append_ptr(desc, ptr);
}
-static inline void append_data(u32 *desc, void *data, int len)
+static inline void append_data(u32 * const desc, void *data, int len)
{
u32 *offset = desc_end(desc);
@@ -114,7 +116,7 @@ static inline void append_data(u32 *desc, void *data, int len)
(len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ);
}
-static inline void append_cmd(u32 *desc, u32 command)
+static inline void append_cmd(u32 * const desc, u32 command)
{
u32 *cmd = desc_end(desc);
@@ -125,7 +127,7 @@ static inline void append_cmd(u32 *desc, u32 command)
#define append_u32 append_cmd
-static inline void append_u64(u32 *desc, u64 data)
+static inline void append_u64(u32 * const desc, u64 data)
{
u32 *offset = desc_end(desc);
@@ -142,14 +144,14 @@ static inline void append_u64(u32 *desc, u64 data)
}
/* Write command without affecting header, and return pointer to next word */
-static inline u32 *write_cmd(u32 *desc, u32 command)
+static inline u32 *write_cmd(u32 * const desc, u32 command)
{
*desc = cpu_to_caam32(command);
return desc + 1;
}
-static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
+static inline void append_cmd_ptr(u32 * const desc, dma_addr_t ptr, int len,
u32 command)
{
append_cmd(desc, command | len);
@@ -157,7 +159,7 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
}
/* Write length after pointer, rather than inside command */
-static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
+static inline void append_cmd_ptr_extlen(u32 * const desc, dma_addr_t ptr,
unsigned int len, u32 command)
{
append_cmd(desc, command);
@@ -166,7 +168,7 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
append_cmd(desc, len);
}
-static inline void append_cmd_data(u32 *desc, void *data, int len,
+static inline void append_cmd_data(u32 * const desc, void *data, int len,
u32 command)
{
append_cmd(desc, command | IMMEDIATE | len);
@@ -174,7 +176,7 @@ static inline void append_cmd_data(u32 *desc, void *data, int len,
}
#define APPEND_CMD_RET(cmd, op) \
-static inline u32 *append_##cmd(u32 *desc, u32 options) \
+static inline u32 *append_##cmd(u32 * const desc, u32 options) \
{ \
u32 *cmd = desc_end(desc); \
PRINT_POS; \
@@ -184,13 +186,13 @@ static inline u32 *append_##cmd(u32 *desc, u32 options) \
APPEND_CMD_RET(jump, JUMP)
APPEND_CMD_RET(move, MOVE)
-static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
+static inline void set_jump_tgt_here(u32 * const desc, u32 *jump_cmd)
{
*jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) |
(desc_len(desc) - (jump_cmd - desc)));
}
-static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
+static inline void set_move_tgt_here(u32 * const desc, u32 *move_cmd)
{
u32 val = caam32_to_cpu(*move_cmd);
@@ -200,7 +202,7 @@ static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
}
#define APPEND_CMD(cmd, op) \
-static inline void append_##cmd(u32 *desc, u32 options) \
+static inline void append_##cmd(u32 * const desc, u32 options) \
{ \
PRINT_POS; \
append_cmd(desc, CMD_##op | options); \
@@ -208,7 +210,8 @@ static inline void append_##cmd(u32 *desc, u32 options) \
APPEND_CMD(operation, OPERATION)
#define APPEND_CMD_LEN(cmd, op) \
-static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \
+static inline void append_##cmd(u32 * const desc, unsigned int len, \
+ u32 options) \
{ \
PRINT_POS; \
append_cmd(desc, CMD_##op | len | options); \
@@ -220,8 +223,8 @@ APPEND_CMD_LEN(seq_fifo_load, SEQ_FIFO_LOAD)
APPEND_CMD_LEN(seq_fifo_store, SEQ_FIFO_STORE)
#define APPEND_CMD_PTR(cmd, op) \
-static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
- u32 options) \
+static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \
+ unsigned int len, u32 options) \
{ \
PRINT_POS; \
append_cmd_ptr(desc, ptr, len, CMD_##op | options); \
@@ -231,8 +234,8 @@ APPEND_CMD_PTR(load, LOAD)
APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
APPEND_CMD_PTR(fifo_store, FIFO_STORE)
-static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
- u32 options)
+static inline void append_store(u32 * const desc, dma_addr_t ptr,
+ unsigned int len, u32 options)
{
u32 cmd_src;
@@ -249,7 +252,8 @@ static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
}
#define APPEND_SEQ_PTR_INTLEN(cmd, op) \
-static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
+static inline void append_seq_##cmd##_ptr_intlen(u32 * const desc, \
+ dma_addr_t ptr, \
unsigned int len, \
u32 options) \
{ \
@@ -263,7 +267,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN)
APPEND_SEQ_PTR_INTLEN(out, OUT)
#define APPEND_CMD_PTR_TO_IMM(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
unsigned int len, u32 options) \
{ \
PRINT_POS; \
@@ -273,7 +277,7 @@ APPEND_CMD_PTR_TO_IMM(load, LOAD);
APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD);
#define APPEND_CMD_PTR_EXTLEN(cmd, op) \
-static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \
+static inline void append_##cmd##_extlen(u32 * const desc, dma_addr_t ptr, \
unsigned int len, u32 options) \
{ \
PRINT_POS; \
@@ -287,7 +291,7 @@ APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR)
* the size of its type
*/
#define APPEND_CMD_PTR_LEN(cmd, op, type) \
-static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \
+static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \
type len, u32 options) \
{ \
PRINT_POS; \
@@ -304,7 +308,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
* from length of immediate data provided, e.g., split keys
*/
#define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
unsigned int data_len, \
unsigned int len, u32 options) \
{ \
@@ -315,7 +319,7 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
APPEND_CMD_PTR_TO_IMM2(key, KEY);
#define APPEND_CMD_RAW_IMM(cmd, op, type) \
-static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
+static inline void append_##cmd##_imm_##type(u32 * const desc, type immediate, \
u32 options) \
{ \
PRINT_POS; \
--
2.4.4
^ permalink raw reply related
* Re: [PATCH] crypto: fix AEAD tag memory handling
From: Stephan Mueller @ 2016-11-09 13:20 UTC (permalink / raw)
To: Mat Martineau; +Cc: herbert, linux-crypto
In-Reply-To: <alpine.OSX.2.20.1610311456250.4111@mjmartin-mac01.local>
[-- Attachment #1: Type: text/plain, Size: 1231 bytes --]
Am Montag, 31. Oktober 2016, 16:18:32 CET schrieb Mat Martineau:
Hi Mat,
>
> My main concern is getting the semantics correct and consistent in a
> single patch series. It would be a big problem to explain that AF_ALG AEAD
> read and write works one way in 4.x, another way in 4.y, and some
> different way in 4.z.
I do have a patch now available that exactly does what you suggest. See the
patch attached. It works with the following exception.
In the case of sendpage and using an in-place cipher operation, the patch
breaks as follows. When the caller sends the same buffer for a sendpage
operation, the cipher operation now will write the ciphertext to the beginning
of the buffer where the AAD used to be. The subsequent tag calculation will
now use the data it finds where the AAD is expected. As the cipher operation
has already replaced the AAD with the ciphertext, the tag calculation will
take the ciphertext as AAD and thus calculate a wrong tag.
Thus, the only way to avoid that would be to duplicate the AAD into an
internal buffer. But that would defeat the entire purpose of sendpage.
The patch, however, works with sendmsg as well as sendpage when the src and
dst buffers are different.
Ciao
Stephan
[-- Attachment #2: fix_read_offset.diff --]
[-- Type: text/x-patch, Size: 7509 bytes --]
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index c54bcb8..c8efd01 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -32,6 +32,7 @@ struct aead_sg_list {
struct aead_async_rsgl {
struct af_alg_sgl sgl;
struct list_head list;
+ bool new_page;
};
struct aead_async_req {
@@ -405,6 +406,61 @@ static void aead_async_cb(struct crypto_async_request *_req, int err)
iocb->ki_complete(iocb, err, err);
}
+/**
+ * scatterwalk_get_part() - get subset a scatterlist
+ *
+ * @dst: destination SGL to receive the pointers from source SGL
+ * @src: source SGL
+ * @len: data length in bytes to get from source SGL
+ * @max_sgs: number of SGs present in dst SGL to prevent overstepping boundaries
+ *
+ * @return: number of SG entries in dst
+ */
+static inline int scatterwalk_get_part(struct scatterlist *dst,
+ struct scatterlist *src,
+ unsigned int len, unsigned int max_sgs)
+{
+ /* leave one SG entry for chaining */
+ unsigned int j = 1;
+
+ while (len && j < max_sgs) {
+ unsigned int todo = min_t(unsigned int, len, src->length);
+
+ sg_set_page(dst, sg_page(src), todo, src->offset);
+ if (src->length >= len) {
+ sg_mark_end(dst);
+ break;
+ }
+ len -= todo;
+ j++;
+ src = sg_next(src);
+ dst = sg_next(dst);
+ }
+
+ return j;
+}
+
+static inline int aead_alloc_rsgl(struct sock *sk, struct aead_async_rsgl **ret)
+{
+ struct aead_async_rsgl *rsgl =
+ sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
+ if (unlikely(!rsgl))
+ return -ENOMEM;
+ *ret = rsgl;
+ return 0;
+}
+
+static inline int aead_get_rsgl_areq(struct sock *sk,
+ struct aead_async_req *areq,
+ struct aead_async_rsgl **ret)
+{
+ if (list_empty(&areq->list)) {
+ *ret = &areq->first_rsgl;
+ return 0;
+ } else
+ return aead_alloc_rsgl(sk, ret);
+}
+
static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
int flags)
{
@@ -433,7 +489,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
if (!aead_sufficient_data(ctx))
goto unlock;
- used = ctx->used;
+ used = ctx->used - ctx->aead_assoclen;
if (ctx->enc)
outlen = used + as;
else
@@ -452,7 +508,6 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
aead_request_set_ad(req, ctx->aead_assoclen);
aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
aead_async_cb, sk);
- used -= ctx->aead_assoclen;
/* take over all tx sgls from ctx */
areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur,
@@ -467,21 +522,26 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
areq->tsgls = sgl->cur;
+ /* set AAD buffer */
+ err = aead_get_rsgl_areq(sk, areq, &rsgl);
+ if (err)
+ goto free;
+ list_add_tail(&rsgl->list, &areq->list);
+ sg_init_table(rsgl->sgl.sg, ALG_MAX_PAGES);
+ rsgl->sgl.npages = scatterwalk_get_part(rsgl->sgl.sg, sgl->sg,
+ ctx->aead_assoclen,
+ ALG_MAX_PAGES);
+ rsgl->new_page = false;
+ last_rsgl = rsgl;
+
/* create rx sgls */
while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
- if (list_empty(&areq->list)) {
- rsgl = &areq->first_rsgl;
-
- } else {
- rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
- if (unlikely(!rsgl)) {
- err = -ENOMEM;
- goto free;
- }
- }
+ err = aead_get_rsgl_areq(sk, areq, &rsgl);
+ if (err)
+ goto free;
rsgl->sgl.npages = 0;
list_add_tail(&rsgl->list, &areq->list);
@@ -491,6 +551,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
goto free;
usedpages += err;
+ rsgl->new_page = true;
/* chain the new scatterlist with previous one */
if (last_rsgl)
@@ -531,7 +592,8 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
free:
list_for_each_entry(rsgl, &areq->list, list) {
- af_alg_free_sg(&rsgl->sgl);
+ if (rsgl->new_page)
+ af_alg_free_sg(&rsgl->sgl);
if (rsgl != &areq->first_rsgl)
sock_kfree_s(sk, rsgl, sizeof(*rsgl));
}
@@ -545,6 +607,16 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
return err ? err : outlen;
}
+static inline int aead_get_rsgl_ctx(struct sock *sk, struct aead_ctx *ctx,
+ struct aead_async_rsgl **ret)
+{
+ if (list_empty(&ctx->list)) {
+ *ret = &ctx->first_rsgl;
+ return 0;
+ } else
+ return aead_alloc_rsgl(sk, ret);
+}
+
static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
{
struct sock *sk = sock->sk;
@@ -582,9 +654,6 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
goto unlock;
}
- /* data length provided by caller via sendmsg/sendpage */
- used = ctx->used;
-
/*
* Make sure sufficient data is present -- note, the same check is
* is also present in sendmsg/sendpage. The checks in sendpage/sendmsg
@@ -598,6 +667,12 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
goto unlock;
/*
+ * The cipher operation input data is reduced by the associated data
+ * as the destination buffer will not hold the AAD.
+ */
+ used = ctx->used - ctx->aead_assoclen;
+
+ /*
* Calculate the minimum output buffer size holding the result of the
* cipher operation. When encrypting data, the receiving buffer is
* larger by the tag length compared to the input buffer as the
@@ -611,25 +686,29 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
outlen = used - as;
/*
- * The cipher operation input data is reduced by the associated data
- * length as this data is processed separately later on.
+ * Pre-pend the AAD buffer from the source SGL to the destination SGL.
+ * As the AAD buffer is not touched by the AEAD operation, the source
+ * SG buffers remain unchanged.
*/
- used -= ctx->aead_assoclen;
+ err = aead_get_rsgl_ctx(sk, ctx, &rsgl);
+ if (err)
+ goto unlock;
+ list_add_tail(&rsgl->list, &ctx->list);
+ sg_init_table(rsgl->sgl.sg, ALG_MAX_PAGES);
+ rsgl->sgl.npages = scatterwalk_get_part(rsgl->sgl.sg, sgl->sg,
+ ctx->aead_assoclen,
+ ALG_MAX_PAGES);
+ rsgl->new_page = false;
+ last_rsgl = rsgl;
/* convert iovecs of output buffers into scatterlists */
while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
- if (list_empty(&ctx->list)) {
- rsgl = &ctx->first_rsgl;
- } else {
- rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
- if (unlikely(!rsgl)) {
- err = -ENOMEM;
- goto unlock;
- }
- }
+ err = aead_get_rsgl_ctx(sk, ctx, &rsgl);
+ if (err)
+ goto unlock;
rsgl->sgl.npages = 0;
list_add_tail(&rsgl->list, &ctx->list);
@@ -637,7 +716,10 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
if (err < 0)
goto unlock;
+
usedpages += err;
+ rsgl->new_page = true;
+
/* chain the new scatterlist with previous one */
if (last_rsgl)
af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
@@ -688,7 +770,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
unlock:
list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
- af_alg_free_sg(&rsgl->sgl);
+ if (rsgl->new_page)
+ af_alg_free_sg(&rsgl->sgl);
if (rsgl != &ctx->first_rsgl)
sock_kfree_s(sk, rsgl, sizeof(*rsgl));
list_del(&rsgl->list);
^ permalink raw reply related
* Re: [PATCH 11/14] Revert "crypto: caam - get rid of tasklet"
From: Thomas Gleixner @ 2016-11-09 23:17 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Horia Geantă, Herbert Xu, David S. Miller, linux-crypto
In-Reply-To: <20161109085322.GY1041@n2100.armlinux.org.uk>
[-- Attachment #1: Type: text/plain, Size: 1957 bytes --]
On Wed, 9 Nov 2016, Russell King - ARM Linux wrote:
> Please include Thomas in this.
Thanks!
> On Wed, Nov 09, 2016 at 10:46:21AM +0200, Horia Geantă wrote:
> > This reverts commit 66d2e2028091a074aa1290d2eeda5ddb1a6c329c.
> >
> > Quoting from Russell's findings:
> > https://www.mail-archive.com/linux-crypto@vger.kernel.org/msg21136.html
> >
> > [quote]
> > Okay, I've re-tested, using a different way of measuring, because using
> > openssl speed is impractical for off-loaded engines. I've decided to
> > use this way to measure the performance:
> >
> > dd if=/dev/zero bs=1048576 count=128 | /usr/bin/time openssl dgst -md5
> >
> > For the threaded IRQs case gives:
> >
> > 0.05user 2.74system 0:05.30elapsed 52%CPU (0avgtext+0avgdata 2400maxresident)k
> > 0.06user 2.52system 0:05.18elapsed 49%CPU (0avgtext+0avgdata 2404maxresident)k
> > 0.12user 2.60system 0:05.61elapsed 48%CPU (0avgtext+0avgdata 2460maxresident)k
> > => 5.36s => 25.0MB/s
> >
> > and the tasklet case:
> >
> > 0.08user 2.53system 0:04.83elapsed 54%CPU (0avgtext+0avgdata 2468maxresident)k
> > 0.09user 2.47system 0:05.16elapsed 49%CPU (0avgtext+0avgdata 2368maxresident)k
> > 0.10user 2.51system 0:04.87elapsed 53%CPU (0avgtext+0avgdata 2460maxresident)k
> > => 4.95 => 27.1MB/s
> >
> > which corresponds to an 8% slowdown for the threaded IRQ case. So,
> > tasklets are indeed faster than threaded IRQs.
Each operation involves:
submit job
hard irq handler
wakeup irq thread
context switch
wakeup openssl
context switch
and that repeats over and over.
Russell has provided me traces and the extra wakeup/context switch is what
causes the slowdown vs. the tasklet case which has only one wakeup/context
switch, unless it gets outsourced to ksoftirqd which might get even slower
than the threaded handler.
You might add something like that to the changelog so it's not just a
number comparison which does not explain why this happens.
Thanks,
tglx
^ permalink raw reply
* Re: [PATCH 11/14] Revert "crypto: caam - get rid of tasklet"
From: Thomas Gleixner @ 2016-11-09 23:19 UTC (permalink / raw)
To: Russell King - ARM Linux
Cc: Horia Geantă, Herbert Xu, David S. Miller, linux-crypto
In-Reply-To: <alpine.DEB.2.20.1611092358390.3501@nanos>
On Thu, 10 Nov 2016, Thomas Gleixner wrote:
> > > which corresponds to an 8% slowdown for the threaded IRQ case. So,
> > > tasklets are indeed faster than threaded IRQs.
Forgot to say, that this should be:
So, tasklets are indeed faster than threaded IRQs for this particular use
case. They are not generally faster.
Thanks,
tglx
^ permalink raw reply
* Re: [PATCH] crypto: fix AEAD tag memory handling
From: Mat Martineau @ 2016-11-10 0:26 UTC (permalink / raw)
To: Stephan Mueller; +Cc: herbert, linux-crypto
In-Reply-To: <13567923.CP2Rp1drvf@positron.chronox.de>
Stephan,
On Wed, 9 Nov 2016, Stephan Mueller wrote:
> Am Montag, 31. Oktober 2016, 16:18:32 CET schrieb Mat Martineau:
>
> Hi Mat,
>>
>> My main concern is getting the semantics correct and consistent in a
>> single patch series. It would be a big problem to explain that AF_ALG AEAD
>> read and write works one way in 4.x, another way in 4.y, and some
>> different way in 4.z.
>
> I do have a patch now available that exactly does what you suggest. See the
> patch attached. It works with the following exception.
>
> In the case of sendpage and using an in-place cipher operation, the patch
> breaks as follows. When the caller sends the same buffer for a sendpage
> operation, the cipher operation now will write the ciphertext to the beginning
> of the buffer where the AAD used to be. The subsequent tag calculation will
> now use the data it finds where the AAD is expected. As the cipher operation
> has already replaced the AAD with the ciphertext, the tag calculation will
> take the ciphertext as AAD and thus calculate a wrong tag.
>
> Thus, the only way to avoid that would be to duplicate the AAD into an
> internal buffer. But that would defeat the entire purpose of sendpage.
The use case for an "in place" operation would be to have the ciphertext
overwrite the plaintext, correct? If the src and dst overlap, does it make
sense to require the plaintext and ciphertext to be in exactly the same
location?
> The patch, however, works with sendmsg as well as sendpage when the src and
> dst buffers are different.
Thanks - I tested your patch and found that it works as expected with
sendmsg.
Regards,
--
Mat Martineau
Intel OTC
^ permalink raw reply
* [PATCH 3/3] crypto: AF_ALG - fix AEAD AIO handling of zero buffer
From: Stephan Mueller @ 2016-11-10 3:32 UTC (permalink / raw)
To: herbert; +Cc: mathew.j.martineau, linux-crypto
In-Reply-To: <3302668.EORF2jai5r@positron.chronox.de>
Handle the case when the caller provided a zero buffer to
sendmsg/sendpage. Such scenario is legal for AEAD ciphers when no
plaintext / ciphertext and no AAD is provided and the caller only
requests the generation of the tag value.
Signed-off-by: Stephan Mueller <smueller@chronox.de>
---
crypto/algif_aead.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 0212cc2..d52e8b2 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -510,12 +510,13 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
aead_async_cb, sk);
/* take over all tx sgls from ctx */
- areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur,
+ areq->tsgl = sock_kmalloc(sk,
+ sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
GFP_KERNEL);
if (unlikely(!areq->tsgl))
goto free;
- sg_init_table(areq->tsgl, sgl->cur);
+ sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
for (i = 0; i < sgl->cur; i++)
sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
sgl->sg[i].length, sgl->sg[i].offset);
--
2.7.4
^ permalink raw reply related
* [PATCH 2/3] crypto: AF_ALG - disregard AAD buffer space for output
From: Stephan Mueller @ 2016-11-10 3:32 UTC (permalink / raw)
To: herbert; +Cc: mathew.j.martineau, linux-crypto
In-Reply-To: <3302668.EORF2jai5r@positron.chronox.de>
The kernel crypto API AEAD cipher operation generates output such that
space for the AAD is reserved in the output buffer without being
touched. The processed ciphertext/plaintext is appended to the reserved
AAD buffer.
The user space interface followed that approach. However, this is a
violation of the POSIX read definition which requires that any read data
is placed at the beginning of the caller-provided buffer. As the kernel
crypto API would leave room for the AAD, the old approach did not fully
comply with the POSIX specification.
The patch changes the user space AF_ALG AEAD interface such that the
processed ciphertext/plaintext are now placed at the beginning of the
user buffer provided with the read system call. That means the user
space interface now deviates from the in-kernel output buffer handling.
For the cipher operation, the AAD buffer provided during input is
pointed to by a new SGL which is chained with the output buffer SGL.
With this approach, only pointers to one copy of the AAD are maintained
to avoid data duplication.
With this solution, the caller must not use sendpage with the exact same
buffers for input and output. The following rationale applies: When
the caller sends the same buffer for input/output to the sendpage
operation, the cipher operation now will write the ciphertext to the
beginning of the buffer where the AAD used to be. The subsequent tag
calculation will now use the data it finds where the AAD is expected.
As the cipher operation has already replaced the AAD with the ciphertext,
the tag calculation will take the ciphertext as AAD and thus calculate
a wrong tag.
Reported-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Stephan Mueller <smueller@chronox.de>
---
crypto/algif_aead.c | 143 +++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 113 insertions(+), 30 deletions(-)
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index c54bcb8..0212cc2 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -32,6 +32,7 @@ struct aead_sg_list {
struct aead_async_rsgl {
struct af_alg_sgl sgl;
struct list_head list;
+ bool new_page;
};
struct aead_async_req {
@@ -405,6 +406,61 @@ static void aead_async_cb(struct crypto_async_request *_req, int err)
iocb->ki_complete(iocb, err, err);
}
+/**
+ * scatterwalk_get_part() - get subset a scatterlist
+ *
+ * @dst: destination SGL to receive the pointers from source SGL
+ * @src: source SGL
+ * @len: data length in bytes to get from source SGL
+ * @max_sgs: number of SGs present in dst SGL to prevent overstepping boundaries
+ *
+ * @return: number of SG entries in dst
+ */
+static inline int scatterwalk_get_part(struct scatterlist *dst,
+ struct scatterlist *src,
+ unsigned int len, unsigned int max_sgs)
+{
+ /* leave one SG entry for chaining */
+ unsigned int j = 1;
+
+ while (len && j < max_sgs) {
+ unsigned int todo = min_t(unsigned int, len, src->length);
+
+ sg_set_page(dst, sg_page(src), todo, src->offset);
+ if (src->length >= len) {
+ sg_mark_end(dst);
+ break;
+ }
+ len -= todo;
+ j++;
+ src = sg_next(src);
+ dst = sg_next(dst);
+ }
+
+ return j;
+}
+
+static inline int aead_alloc_rsgl(struct sock *sk, struct aead_async_rsgl **ret)
+{
+ struct aead_async_rsgl *rsgl =
+ sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
+ if (unlikely(!rsgl))
+ return -ENOMEM;
+ *ret = rsgl;
+ return 0;
+}
+
+static inline int aead_get_rsgl_areq(struct sock *sk,
+ struct aead_async_req *areq,
+ struct aead_async_rsgl **ret)
+{
+ if (list_empty(&areq->list)) {
+ *ret = &areq->first_rsgl;
+ return 0;
+ } else
+ return aead_alloc_rsgl(sk, ret);
+}
+
static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
int flags)
{
@@ -433,7 +489,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
if (!aead_sufficient_data(ctx))
goto unlock;
- used = ctx->used;
+ used = ctx->used - ctx->aead_assoclen;
if (ctx->enc)
outlen = used + as;
else
@@ -452,7 +508,6 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
aead_request_set_ad(req, ctx->aead_assoclen);
aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
aead_async_cb, sk);
- used -= ctx->aead_assoclen;
/* take over all tx sgls from ctx */
areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur,
@@ -467,21 +522,26 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
areq->tsgls = sgl->cur;
+ /* set AAD buffer */
+ err = aead_get_rsgl_areq(sk, areq, &rsgl);
+ if (err)
+ goto free;
+ list_add_tail(&rsgl->list, &areq->list);
+ sg_init_table(rsgl->sgl.sg, ALG_MAX_PAGES);
+ rsgl->sgl.npages = scatterwalk_get_part(rsgl->sgl.sg, sgl->sg,
+ ctx->aead_assoclen,
+ ALG_MAX_PAGES);
+ rsgl->new_page = false;
+ last_rsgl = rsgl;
+
/* create rx sgls */
while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
- if (list_empty(&areq->list)) {
- rsgl = &areq->first_rsgl;
-
- } else {
- rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
- if (unlikely(!rsgl)) {
- err = -ENOMEM;
- goto free;
- }
- }
+ err = aead_get_rsgl_areq(sk, areq, &rsgl);
+ if (err)
+ goto free;
rsgl->sgl.npages = 0;
list_add_tail(&rsgl->list, &areq->list);
@@ -491,6 +551,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
goto free;
usedpages += err;
+ rsgl->new_page = true;
/* chain the new scatterlist with previous one */
if (last_rsgl)
@@ -507,7 +568,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
if (used < less) {
err = -EINVAL;
- goto unlock;
+ goto free;
}
used -= less;
outlen -= less;
@@ -531,7 +592,8 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
free:
list_for_each_entry(rsgl, &areq->list, list) {
- af_alg_free_sg(&rsgl->sgl);
+ if (rsgl->new_page)
+ af_alg_free_sg(&rsgl->sgl);
if (rsgl != &areq->first_rsgl)
sock_kfree_s(sk, rsgl, sizeof(*rsgl));
}
@@ -545,6 +607,16 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
return err ? err : outlen;
}
+static inline int aead_get_rsgl_ctx(struct sock *sk, struct aead_ctx *ctx,
+ struct aead_async_rsgl **ret)
+{
+ if (list_empty(&ctx->list)) {
+ *ret = &ctx->first_rsgl;
+ return 0;
+ } else
+ return aead_alloc_rsgl(sk, ret);
+}
+
static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
{
struct sock *sk = sock->sk;
@@ -582,9 +654,6 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
goto unlock;
}
- /* data length provided by caller via sendmsg/sendpage */
- used = ctx->used;
-
/*
* Make sure sufficient data is present -- note, the same check is
* is also present in sendmsg/sendpage. The checks in sendpage/sendmsg
@@ -598,6 +667,12 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
goto unlock;
/*
+ * The cipher operation input data is reduced by the associated data
+ * as the destination buffer will not hold the AAD.
+ */
+ used = ctx->used - ctx->aead_assoclen;
+
+ /*
* Calculate the minimum output buffer size holding the result of the
* cipher operation. When encrypting data, the receiving buffer is
* larger by the tag length compared to the input buffer as the
@@ -611,25 +686,29 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
outlen = used - as;
/*
- * The cipher operation input data is reduced by the associated data
- * length as this data is processed separately later on.
+ * Pre-pend the AAD buffer from the source SGL to the destination SGL.
+ * As the AAD buffer is not touched by the AEAD operation, the source
+ * SG buffers remain unchanged.
*/
- used -= ctx->aead_assoclen;
+ err = aead_get_rsgl_ctx(sk, ctx, &rsgl);
+ if (err)
+ goto unlock;
+ list_add_tail(&rsgl->list, &ctx->list);
+ sg_init_table(rsgl->sgl.sg, ALG_MAX_PAGES);
+ rsgl->sgl.npages = scatterwalk_get_part(rsgl->sgl.sg, sgl->sg,
+ ctx->aead_assoclen,
+ ALG_MAX_PAGES);
+ rsgl->new_page = false;
+ last_rsgl = rsgl;
/* convert iovecs of output buffers into scatterlists */
while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
- if (list_empty(&ctx->list)) {
- rsgl = &ctx->first_rsgl;
- } else {
- rsgl = sock_kmalloc(sk, sizeof(*rsgl), GFP_KERNEL);
- if (unlikely(!rsgl)) {
- err = -ENOMEM;
- goto unlock;
- }
- }
+ err = aead_get_rsgl_ctx(sk, ctx, &rsgl);
+ if (err)
+ goto unlock;
rsgl->sgl.npages = 0;
list_add_tail(&rsgl->list, &ctx->list);
@@ -637,7 +716,10 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
if (err < 0)
goto unlock;
+
usedpages += err;
+ rsgl->new_page = true;
+
/* chain the new scatterlist with previous one */
if (last_rsgl)
af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
@@ -688,7 +770,8 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
unlock:
list_for_each_entry_safe(rsgl, tmp, &ctx->list, list) {
- af_alg_free_sg(&rsgl->sgl);
+ if (rsgl->new_page)
+ af_alg_free_sg(&rsgl->sgl);
if (rsgl != &ctx->first_rsgl)
sock_kfree_s(sk, rsgl, sizeof(*rsgl));
list_del(&rsgl->list);
--
2.7.4
^ permalink raw reply related
* [PATCH 1/3] crypto: AF_ALG - fix AEAD tag memory handling
From: Stephan Mueller @ 2016-11-10 3:31 UTC (permalink / raw)
To: herbert; +Cc: mathew.j.martineau, linux-crypto
In-Reply-To: <3302668.EORF2jai5r@positron.chronox.de>
For encryption, the AEAD ciphers require AAD || PT as input and generate
AAD || CT || Tag as output and vice versa for decryption. Prior to this
patch, the AF_ALG interface for AEAD ciphers requires the buffer to be
present as input for encryption. Similarly, the output buffer for
decryption required the presence of the tag buffer too. This implies
that the kernel reads / writes data buffers from/to kernel space
even though this operation is not required.
This patch changes the AF_ALG AEAD interface to be consistent with the
in-kernel AEAD cipher requirements.
In addition, the code now handles the situation where the provided
output buffer is too small by reducing the size of the processed
input buffer accordingly. Due to this handling, he changes are
transparent to user space with one exception: the return code of recv
indicates the mount of output buffer. That output buffer has a different
size compared to before the patch which implies that the return code of
recv will also be different. For example, a decryption operation uses 16
bytes AAD, 16 bytes CT and 16 bytes tag, the AF_ALG AEAD interface
before showed a recv return code of 48 (bytes) whereas after this patch,
the return code is 32 since the tag is not returned any more.
Reported-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Stephan Mueller <smueller@chronox.de>
---
crypto/algif_aead.c | 77 ++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 55 insertions(+), 22 deletions(-)
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 80a0f1a..c54bcb8 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -81,7 +81,11 @@ static inline bool aead_sufficient_data(struct aead_ctx *ctx)
{
unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
- return ctx->used >= ctx->aead_assoclen + as;
+ /*
+ * The minimum amount of memory needed for an AEAD cipher is
+ * the AAD and in case of decryption the tag.
+ */
+ return ctx->used >= ctx->aead_assoclen + (ctx->enc ? 0 : as);
}
static void aead_reset_ctx(struct aead_ctx *ctx)
@@ -426,12 +430,15 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
goto unlock;
}
- used = ctx->used;
- outlen = used;
-
if (!aead_sufficient_data(ctx))
goto unlock;
+ used = ctx->used;
+ if (ctx->enc)
+ outlen = used + as;
+ else
+ outlen = used - as;
+
req = sock_kmalloc(sk, reqlen, GFP_KERNEL);
if (unlikely(!req))
goto unlock;
@@ -445,7 +452,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
aead_request_set_ad(req, ctx->aead_assoclen);
aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
aead_async_cb, sk);
- used -= ctx->aead_assoclen + (ctx->enc ? as : 0);
+ used -= ctx->aead_assoclen;
/* take over all tx sgls from ctx */
areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur,
@@ -461,7 +468,7 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
areq->tsgls = sgl->cur;
/* create rx sgls */
- while (iov_iter_count(&msg->msg_iter)) {
+ while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
@@ -491,16 +498,20 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
last_rsgl = rsgl;
- /* we do not need more iovecs as we have sufficient memory */
- if (outlen <= usedpages)
- break;
-
iov_iter_advance(&msg->msg_iter, err);
}
- err = -EINVAL;
+
/* ensure output buffer is sufficiently large */
- if (usedpages < outlen)
- goto free;
+ if (usedpages < outlen) {
+ size_t less = outlen - usedpages;
+
+ if (used < less) {
+ err = -EINVAL;
+ goto unlock;
+ }
+ used -= less;
+ outlen -= less;
+ }
aead_request_set_crypt(req, areq->tsgl, areq->first_rsgl.sgl.sg, used,
areq->iv);
@@ -571,6 +582,7 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
goto unlock;
}
+ /* data length provided by caller via sendmsg/sendpage */
used = ctx->used;
/*
@@ -585,16 +597,27 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
if (!aead_sufficient_data(ctx))
goto unlock;
- outlen = used;
+ /*
+ * Calculate the minimum output buffer size holding the result of the
+ * cipher operation. When encrypting data, the receiving buffer is
+ * larger by the tag length compared to the input buffer as the
+ * encryption operation generates the tag. For decryption, the input
+ * buffer provides the tag which is consumed resulting in only the
+ * plaintext without a buffer for the tag returned to the caller.
+ */
+ if (ctx->enc)
+ outlen = used + as;
+ else
+ outlen = used - as;
/*
* The cipher operation input data is reduced by the associated data
* length as this data is processed separately later on.
*/
- used -= ctx->aead_assoclen + (ctx->enc ? as : 0);
+ used -= ctx->aead_assoclen;
/* convert iovecs of output buffers into scatterlists */
- while (iov_iter_count(&msg->msg_iter)) {
+ while (outlen > usedpages && iov_iter_count(&msg->msg_iter)) {
size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
(outlen - usedpages));
@@ -621,16 +644,26 @@ static int aead_recvmsg_sync(struct socket *sock, struct msghdr *msg, int flags)
last_rsgl = rsgl;
- /* we do not need more iovecs as we have sufficient memory */
- if (outlen <= usedpages)
- break;
iov_iter_advance(&msg->msg_iter, err);
}
- err = -EINVAL;
/* ensure output buffer is sufficiently large */
- if (usedpages < outlen)
- goto unlock;
+ if (usedpages < outlen) {
+ size_t less = outlen - usedpages;
+
+ if (used < less) {
+ err = -EINVAL;
+ goto unlock;
+ }
+
+ /*
+ * Caller has smaller output buffer than needed, reduce
+ * the input data length to be processed to fit the provided
+ * output buffer.
+ */
+ used -= less;
+ outlen -= less;
+ }
sg_mark_end(sgl->sg + sgl->cur - 1);
aead_request_set_crypt(&ctx->aead_req, sgl->sg, ctx->first_rsgl.sgl.sg,
--
2.7.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox