linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] Improve DMA chaining for ahash requests
@ 2016-08-18 12:12 Romain Perier
  2016-08-18 12:12 ` [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests Romain Perier
  2016-08-18 12:12 ` [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests Romain Perier
  0 siblings, 2 replies; 5+ messages in thread
From: Romain Perier @ 2016-08-18 12:12 UTC (permalink / raw)
  To: linux-arm-kernel

This series contain performance improvement regarding ahash requests.
So far, ahash requests were systematically not chained at the DMA level.
However, in some case, like this is the case by using IPSec, some ahash
requests can be processed directly by the engine, and don't have
intermediaire partial update states.

This series firstly re-work the way outer IVs are copied from the SRAM
into the dma pool. To do so, we introduce a common dma pool for all type
of requests that contains outer results (like IV or digest). Then, for
ahash requests that can be processed directly by the engine, outer
results are copied from the SRAM into the common dma pool. These requests
are then allowed to be chained at the DMA level.


Benchmarking results with iperf throught IPSec
==============================================
		ESP			AH

Before		373 Mbits/s		530 Mbits/s
After		413 Mbits/s		578 Mbits/s
Improvement	+11%			+9%


Romain Perier (2):
  crypto: marvell - Use an unique pool to copy results of requests
  crypto: marvell - Don't break chain for computable last ahash requests

 drivers/crypto/marvell/cesa.c   |  4 +--
 drivers/crypto/marvell/cesa.h   |  6 ++--
 drivers/crypto/marvell/cipher.c |  2 +-
 drivers/crypto/marvell/hash.c   | 69 ++++++++++++++++++++++++++++++++---------
 drivers/crypto/marvell/tdma.c   | 16 +++++-----
 5 files changed, 68 insertions(+), 29 deletions(-)

-- 
2.8.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests
  2016-08-18 12:12 [PATCH 0/2] Improve DMA chaining for ahash requests Romain Perier
@ 2016-08-18 12:12 ` Romain Perier
  2016-08-20  6:26   ` Boris Brezillon
  2016-08-18 12:12 ` [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests Romain Perier
  1 sibling, 1 reply; 5+ messages in thread
From: Romain Perier @ 2016-08-18 12:12 UTC (permalink / raw)
  To: linux-arm-kernel

So far, we used a dedicated dma pool to copy the result of outer IV for
cipher requests. Instead of using a dma pool per outer data, we prefer
use a common dma pool that contains the part of the SRAM that is likely
to be used by the 'complete' operation, later. In this way, any type of
result can be retrieved by DMA for cipher or ahash requests.

Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
---
 drivers/crypto/marvell/cesa.c   |  4 ++--
 drivers/crypto/marvell/cesa.h   |  6 +++---
 drivers/crypto/marvell/cipher.c |  2 +-
 drivers/crypto/marvell/tdma.c   | 16 ++++++++--------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 37dadb2..4d308ad 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -375,8 +375,8 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
 	if (!dma->padding_pool)
 		return -ENOMEM;
 
-	dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
-	if (!dma->iv_pool)
+	dma->result_pool = dmam_pool_create("cesa_result", dev, 96, 1, 0);
+	if (!dma->result_pool)
 		return -ENOMEM;
 
 	cesa->dma = dma;
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index e423d33..3be1aa3 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
 #define CESA_TDMA_DUMMY				0
 #define CESA_TDMA_DATA				1
 #define CESA_TDMA_OP				2
-#define CESA_TDMA_IV				3
+#define CESA_TDMA_RESULT			3
 
 /**
  * struct mv_cesa_tdma_desc - TDMA descriptor
@@ -393,7 +393,7 @@ struct mv_cesa_dev_dma {
 	struct dma_pool *op_pool;
 	struct dma_pool *cache_pool;
 	struct dma_pool *padding_pool;
-	struct dma_pool *iv_pool;
+	struct dma_pool *result_pool;
 };
 
 /**
@@ -839,7 +839,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
 	memset(chain, 0, sizeof(*chain));
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
 			  u32 size, u32 flags, gfp_t gfp_flags);
 
 struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index d19dc96..bd575b1 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -373,7 +373,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
 
 	/* Add output data for IV */
 	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
-	ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
+	ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
 				    ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
 
 	if (ret)
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 9fd7a5f..499a1d3 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -69,8 +69,8 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
 		if (type == CESA_TDMA_OP)
 			dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
 				      le32_to_cpu(tdma->src));
-		else if (type == CESA_TDMA_IV)
-			dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
+		else if (type == CESA_TDMA_RESULT)
+			dma_pool_free(cesa_dev->dma->result_pool, tdma->data,
 				      le32_to_cpu(tdma->dst));
 
 		tdma = tdma->next;
@@ -209,29 +209,29 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 	return new_tdma;
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
 			  u32 size, u32 flags, gfp_t gfp_flags)
 {
 
 	struct mv_cesa_tdma_desc *tdma;
-	u8 *iv;
+	u8 *result;
 	dma_addr_t dma_handle;
 
 	tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
 	if (IS_ERR(tdma))
 		return PTR_ERR(tdma);
 
-	iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
-	if (!iv)
+	result = dma_pool_alloc(cesa_dev->dma->result_pool, gfp_flags, &dma_handle);
+	if (!result)
 		return -ENOMEM;
 
 	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
 	tdma->src = src;
 	tdma->dst = cpu_to_le32(dma_handle);
-	tdma->data = iv;
+	tdma->data = result;
 
 	flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
-	tdma->flags = flags | CESA_TDMA_IV;
+	tdma->flags = flags | CESA_TDMA_RESULT;
 	return 0;
 }
 
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests
  2016-08-18 12:12 [PATCH 0/2] Improve DMA chaining for ahash requests Romain Perier
  2016-08-18 12:12 ` [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests Romain Perier
@ 2016-08-18 12:12 ` Romain Perier
  2016-08-20  7:17   ` Boris Brezillon
  1 sibling, 1 reply; 5+ messages in thread
From: Romain Perier @ 2016-08-18 12:12 UTC (permalink / raw)
  To: linux-arm-kernel

Currently, the driver breaks chain for all kind of hash requests in order
to don't override intermediate states of partial ahash updates. However,
some final ahash requests can be directly processed by the engine, and
so without intermediate state. This is typically the case for most for
the HMAC requests processed via IPSec.

This commits adds a TDMA descriptor to copy outer results for thise kind
of request into the "result" dma pool, then it allow to chain these
requests at the DMA level. The 'complete' operation is also updated to
retrieve the MAC digest from the right location.

Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
---
 drivers/crypto/marvell/hash.c | 69 +++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 9f28468..1a91662 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -312,24 +312,48 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
 	int i;
 
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
-	for (i = 0; i < digsize / 4; i++)
-		creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
 
-	if (creq->last_req) {
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
+	    !(creq->base.chain.last->flags & CESA_TDMA_BREAK_CHAIN)) {
+		struct mv_cesa_tdma_desc *tdma = NULL;
+
+		for (tdma = creq->base.chain.first; tdma; tdma = tdma->next) {
+			u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
+			if (type ==  CESA_TDMA_RESULT)
+				break;
+		}
+
+		BUG_ON(!tdma);
+
 		/*
-		 * Hardware's MD5 digest is in little endian format, but
-		 * SHA in big endian format
+		 * Result is already in the correct endianess when the SA is
+		 * used
 		 */
-		if (creq->algo_le) {
-			__le32 *result = (void *)ahashreq->result;
+		__le32 *data = tdma->data + 0x40;
+		for (i = 0; i < digsize / 4; i++)
+			creq->state[i] = cpu_to_le32(data[i]);
 
-			for (i = 0; i < digsize / 4; i++)
-				result[i] = cpu_to_le32(creq->state[i]);
-		} else {
-			__be32 *result = (void *)ahashreq->result;
+		memcpy(ahashreq->result, data, digsize);
+	} else {
+		for (i = 0; i < digsize / 4; i++)
+			creq->state[i] = readl_relaxed(engine->regs +
+						       CESA_IVDIG(i));
+		if (creq->last_req) {
+			/*
+			* Hardware's MD5 digest is in little endian format, but
+			* SHA in big endian format
+			*/
+			if (creq->algo_le) {
+				__le32 *result = (void *)ahashreq->result;
+
+				for (i = 0; i < digsize / 4; i++)
+					result[i] = cpu_to_le32(creq->state[i]);
+			} else {
+				__be32 *result = (void *)ahashreq->result;
 
-			for (i = 0; i < digsize / 4; i++)
-				result[i] = cpu_to_be32(creq->state[i]);
+				for (i = 0; i < digsize / 4; i++)
+					result[i] = cpu_to_be32(creq->state[i]);
+			}
 		}
 	}
 
@@ -504,6 +528,11 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
 						CESA_SA_DESC_CFG_LAST_FRAG,
 				      CESA_SA_DESC_CFG_FRAG_MSK);
 
+		ret = mv_cesa_dma_add_result_op(chain,
+						CESA_SA_MAC_IIV_SRAM_OFFSET, 96,
+						CESA_TDMA_SRC_IN_SRAM, flags);
+		if (ret)
+			return ERR_PTR(-ENOMEM);
 		return op;
 	}
 
@@ -564,6 +593,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	struct mv_cesa_op_ctx *op = NULL;
 	unsigned int frag_len;
 	int ret;
+	u32 type;
 
 	basereq->chain.first = NULL;
 	basereq->chain.last = NULL;
@@ -635,6 +665,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		goto err_free_tdma;
 	}
 
+	type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
+
 	if (op) {
 		/* Add dummy desc to wait for crypto operation end */
 		ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
@@ -648,8 +680,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	else
 		creq->cache_ptr = 0;
 
-	basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
-				       CESA_TDMA_BREAK_CHAIN);
+	basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
+	/*
+	 * If results are copied via DMA, this means that this
+	 * request can be directly processed by the engine,
+	 * without partial updates. So we can chain it at the
+	 * DMA level with other requests.
+	 */
+	if (type != CESA_TDMA_RESULT)
+		basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
 
 	return 0;
 
-- 
2.8.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests
  2016-08-18 12:12 ` [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests Romain Perier
@ 2016-08-20  6:26   ` Boris Brezillon
  0 siblings, 0 replies; 5+ messages in thread
From: Boris Brezillon @ 2016-08-20  6:26 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Romain,

On Thu, 18 Aug 2016 14:12:13 +0200
Romain Perier <romain.perier@free-electrons.com> wrote:

> So far, we used a dedicated dma pool to copy the result of outer IV for
> cipher requests. Instead of using a dma pool per outer data, we prefer
> use a common dma pool that contains the part of the SRAM that is likely
> to be used by the 'complete' operation, later. In this way, any type of
> result can be retrieved by DMA for cipher or ahash requests.

Can't we just re-use the op_pool (and drop the cesa_iv/result pool)? It
may be suboptimal in term of memory usage, but adding more pools also
adds some overhead.

> 
> Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
> ---
>  drivers/crypto/marvell/cesa.c   |  4 ++--
>  drivers/crypto/marvell/cesa.h   |  6 +++---
>  drivers/crypto/marvell/cipher.c |  2 +-
>  drivers/crypto/marvell/tdma.c   | 16 ++++++++--------
>  4 files changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
> index 37dadb2..4d308ad 100644
> --- a/drivers/crypto/marvell/cesa.c
> +++ b/drivers/crypto/marvell/cesa.c
> @@ -375,8 +375,8 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
>  	if (!dma->padding_pool)
>  		return -ENOMEM;
>  
> -	dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
> -	if (!dma->iv_pool)
> +	dma->result_pool = dmam_pool_create("cesa_result", dev, 96, 1, 0);

It's better to use a sizeof(xxx) here to avoid bugs if the context size
grows.

The rest looks good.

Thanks,

Boris

> +	if (!dma->result_pool)
>  		return -ENOMEM;
>  
>  	cesa->dma = dma;
> diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
> index e423d33..3be1aa3 100644
> --- a/drivers/crypto/marvell/cesa.h
> +++ b/drivers/crypto/marvell/cesa.h
> @@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
>  #define CESA_TDMA_DUMMY				0
>  #define CESA_TDMA_DATA				1
>  #define CESA_TDMA_OP				2
> -#define CESA_TDMA_IV				3
> +#define CESA_TDMA_RESULT			3
>  
>  /**
>   * struct mv_cesa_tdma_desc - TDMA descriptor
> @@ -393,7 +393,7 @@ struct mv_cesa_dev_dma {
>  	struct dma_pool *op_pool;
>  	struct dma_pool *cache_pool;
>  	struct dma_pool *padding_pool;
> -	struct dma_pool *iv_pool;
> +	struct dma_pool *result_pool;
>  };
>  
>  /**
> @@ -839,7 +839,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
>  	memset(chain, 0, sizeof(*chain));
>  }
>  
> -int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
> +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
>  			  u32 size, u32 flags, gfp_t gfp_flags);
>  
>  struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
> diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
> index d19dc96..bd575b1 100644
> --- a/drivers/crypto/marvell/cipher.c
> +++ b/drivers/crypto/marvell/cipher.c
> @@ -373,7 +373,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
>  
>  	/* Add output data for IV */
>  	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
> -	ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
> +	ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
>  				    ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
>  
>  	if (ret)
> diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
> index 9fd7a5f..499a1d3 100644
> --- a/drivers/crypto/marvell/tdma.c
> +++ b/drivers/crypto/marvell/tdma.c
> @@ -69,8 +69,8 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
>  		if (type == CESA_TDMA_OP)
>  			dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
>  				      le32_to_cpu(tdma->src));
> -		else if (type == CESA_TDMA_IV)
> -			dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
> +		else if (type == CESA_TDMA_RESULT)
> +			dma_pool_free(cesa_dev->dma->result_pool, tdma->data,
>  				      le32_to_cpu(tdma->dst));
>  
>  		tdma = tdma->next;
> @@ -209,29 +209,29 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
>  	return new_tdma;
>  }
>  
> -int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
> +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
>  			  u32 size, u32 flags, gfp_t gfp_flags)
>  {
>  
>  	struct mv_cesa_tdma_desc *tdma;
> -	u8 *iv;
> +	u8 *result;
>  	dma_addr_t dma_handle;
>  
>  	tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
>  	if (IS_ERR(tdma))
>  		return PTR_ERR(tdma);
>  
> -	iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
> -	if (!iv)
> +	result = dma_pool_alloc(cesa_dev->dma->result_pool, gfp_flags, &dma_handle);
> +	if (!result)
>  		return -ENOMEM;
>  
>  	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
>  	tdma->src = src;
>  	tdma->dst = cpu_to_le32(dma_handle);
> -	tdma->data = iv;
> +	tdma->data = result;
>  
>  	flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
> -	tdma->flags = flags | CESA_TDMA_IV;
> +	tdma->flags = flags | CESA_TDMA_RESULT;
>  	return 0;
>  }
>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests
  2016-08-18 12:12 ` [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests Romain Perier
@ 2016-08-20  7:17   ` Boris Brezillon
  0 siblings, 0 replies; 5+ messages in thread
From: Boris Brezillon @ 2016-08-20  7:17 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, 18 Aug 2016 14:12:14 +0200
Romain Perier <romain.perier@free-electrons.com> wrote:

> Currently, the driver breaks chain for all kind of hash requests in order
> to don't override intermediate states of partial ahash updates. However,
> some final ahash requests can be directly processed by the engine, and
> so without intermediate state. This is typically the case for most for
> the HMAC requests processed via IPSec.
> 
> This commits adds a TDMA descriptor to copy outer results for thise kind
> of request into the "result" dma pool, then it allow to chain these
> requests at the DMA level. The 'complete' operation is also updated to
> retrieve the MAC digest from the right location.
> 
> Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
> ---
>  drivers/crypto/marvell/hash.c | 69 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 54 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
> index 9f28468..1a91662 100644
> --- a/drivers/crypto/marvell/hash.c
> +++ b/drivers/crypto/marvell/hash.c
> @@ -312,24 +312,48 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
>  	int i;
>  
>  	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
> -	for (i = 0; i < digsize / 4; i++)
> -		creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
>  
> -	if (creq->last_req) {
> +	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
> +	    !(creq->base.chain.last->flags & CESA_TDMA_BREAK_CHAIN)) {
> +		struct mv_cesa_tdma_desc *tdma = NULL;
> +
> +		for (tdma = creq->base.chain.first; tdma; tdma = tdma->next) {
> +			u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
> +			if (type ==  CESA_TDMA_RESULT)
> +				break;
> +		}
> +
> +		BUG_ON(!tdma);

Let's try to use BUG_ON() only when we can't recover from a failure.
This is not the case here, just print an error message, or even better, 
move this check in ->process() where you can return an error code
(note that this requires changing a bit the way you are handling
errors in mv_cesa_tdma_process()).

> +
>  		/*
> -		 * Hardware's MD5 digest is in little endian format, but
> -		 * SHA in big endian format
> +		 * Result is already in the correct endianess when the SA is
> +		 * used
>  		 */
> -		if (creq->algo_le) {
> -			__le32 *result = (void *)ahashreq->result;
> +		__le32 *data = tdma->data + 0x40;
> +		for (i = 0; i < digsize / 4; i++)
> +			creq->state[i] = cpu_to_le32(data[i]);
>  
> -			for (i = 0; i < digsize / 4; i++)
> -				result[i] = cpu_to_le32(creq->state[i]);
> -		} else {
> -			__be32 *result = (void *)ahashreq->result;
> +		memcpy(ahashreq->result, data, digsize);

Is it safe to do that when you're in big endian (that's not a
rhetorical question, I always have a hard time figuring when endianess
conversion should be done)?

> +	} else {
> +		for (i = 0; i < digsize / 4; i++)
> +			creq->state[i] = readl_relaxed(engine->regs +
> +						       CESA_IVDIG(i));
> +		if (creq->last_req) {
> +			/*
> +			* Hardware's MD5 digest is in little endian format, but
> +			* SHA in big endian format
> +			*/
> +			if (creq->algo_le) {
> +				__le32 *result = (void *)ahashreq->result;
> +
> +				for (i = 0; i < digsize / 4; i++)
> +					result[i] = cpu_to_le32(creq->state[i]);
> +			} else {
> +				__be32 *result = (void *)ahashreq->result;
>  
> -			for (i = 0; i < digsize / 4; i++)
> -				result[i] = cpu_to_be32(creq->state[i]);
> +				for (i = 0; i < digsize / 4; i++)
> +					result[i] = cpu_to_be32(creq->state[i]);
> +			}
>  		}
>  	}
>  
> @@ -504,6 +528,11 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
>  						CESA_SA_DESC_CFG_LAST_FRAG,
>  				      CESA_SA_DESC_CFG_FRAG_MSK);
>  
> +		ret = mv_cesa_dma_add_result_op(chain,
> +						CESA_SA_MAC_IIV_SRAM_OFFSET, 96,
> +						CESA_TDMA_SRC_IN_SRAM, flags);
> +		if (ret)
> +			return ERR_PTR(-ENOMEM);
>  		return op;
>  	}
>  
> @@ -564,6 +593,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
>  	struct mv_cesa_op_ctx *op = NULL;
>  	unsigned int frag_len;
>  	int ret;
> +	u32 type;

How about naming this variable break_chain? This would be clearer IMO.

	bool break_chain = false;

>  
>  	basereq->chain.first = NULL;
>  	basereq->chain.last = NULL;
> @@ -635,6 +665,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
>  		goto err_free_tdma;
>  	}
>  
> +	type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
> +

	if (basereq->chain.last->flags & CESA_TDMA_TYPE_MSK)
		break_chain = true;

>  	if (op) {
>  		/* Add dummy desc to wait for crypto operation end */
>  		ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
> @@ -648,8 +680,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
>  	else
>  		creq->cache_ptr = 0;
>  
> -	basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
> -				       CESA_TDMA_BREAK_CHAIN);
> +	basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
> +	/*
> +	 * If results are copied via DMA, this means that this
> +	 * request can be directly processed by the engine,
> +	 * without partial updates. So we can chain it at the
> +	 * DMA level with other requests.
> +	 */

Move the comment where you're assigning break_chain to true.

> +	if (type != CESA_TDMA_RESULT)

	if (break_chain)

> +		basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
>  
>  	return 0;
>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-08-20  7:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-08-18 12:12 [PATCH 0/2] Improve DMA chaining for ahash requests Romain Perier
2016-08-18 12:12 ` [PATCH 1/2] crypto: marvell - Use an unique pool to copy results of requests Romain Perier
2016-08-20  6:26   ` Boris Brezillon
2016-08-18 12:12 ` [PATCH 2/2] crypto: marvell - Don't break chain for computable last ahash requests Romain Perier
2016-08-20  7:17   ` Boris Brezillon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).