From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stephan Mueller Subject: Re: [PATCH v2 2/2] crypto: algif - change algif_skcipher to be asynchronous Date: Tue, 10 Mar 2015 08:41:30 +0100 Message-ID: <26409073.mC6eRvfGxi@tauon> References: <20150309205514.9648.701.stgit@tstruk-mobl1> <20150309205525.9648.96093.stgit@tstruk-mobl1> Mime-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: herbert@gondor.apana.org.au, davem@davemloft.net, linux-crypto@vger.kernel.org, qat-linux@intel.com To: Tadeusz Struk Return-path: Received: from mail.eperm.de ([89.247.134.16]:46467 "EHLO mail.eperm.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751807AbbCJHld convert rfc822-to-8bit (ORCPT ); Tue, 10 Mar 2015 03:41:33 -0400 In-Reply-To: <20150309205525.9648.96093.stgit@tstruk-mobl1> Sender: linux-crypto-owner@vger.kernel.org List-ID: Am Montag, 9. M=E4rz 2015, 13:55:25 schrieb Tadeusz Struk: Hi Tadeusz, >The way the algif_skcipher works currently is that on sendmsg/sendpage >it builds an sgl for the input data and then on read/recvmsg it sends >the job for encryption putting the user to sleep till the data is >processed. This way it can only handle one job at a given time. >This patch changes it to be asynchronous by adding AIO support. > >Signed-off-by: Tadeusz Struk >--- > crypto/algif_skcipher.c | 233 >++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 226 >insertions(+), 7 deletions(-) > >diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c >index 0c8a1e5..790ed35 100644 >--- a/crypto/algif_skcipher.c >+++ b/crypto/algif_skcipher.c >@@ -39,6 +39,7 @@ struct skcipher_ctx { > > struct af_alg_completion completion; > >+ atomic_t inflight; > unsigned used; > > unsigned int len; >@@ -49,9 +50,65 @@ struct skcipher_ctx { > struct ablkcipher_request req; > }; > >+struct skcipher_async_rsgl { >+ struct af_alg_sgl sgl; >+ struct list_head list; >+}; >+ >+struct skcipher_async_req { >+ struct kiocb *iocb; >+ struct skcipher_async_rsgl first_sgl; >+ struct list_head list; >+ struct scatterlist *tsg; >+ char iv[]; >+}; >+ >+#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)are= q >+ \ + crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))) >+ >+#define GET_REQ_SIZE(ctx) \ >+ crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)) >+ >+#define GET_IV_SIZE(ctx) \ >+ crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req)) Wouldn't be an inline function better? >+ > #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \ > sizeof(struct scatterlist) - 1) > >+static void skcipher_free_async_sgls(struct skcipher_async_req *sreq) >+{ >+ struct skcipher_async_rsgl *rsgl, *tmp; >+ struct scatterlist *sgl; >+ struct scatterlist *sg; >+ int i, n; >+ >+ list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) { >+ af_alg_free_sg(&rsgl->sgl); >+ if (rsgl !=3D &sreq->first_sgl) >+ kfree(rsgl); >+ } >+ sgl =3D sreq->tsg; >+ n =3D sg_nents(sgl); >+ for_each_sg(sgl, sg, n, i) >+ put_page(sg_page(sg)); >+ >+ kfree(sreq->tsg); >+} >+ >+static void skcipher_async_cb(struct crypto_async_request *req, int >err) +{ >+ struct sock *sk =3D req->data; >+ struct alg_sock *ask =3D alg_sk(sk); >+ struct skcipher_ctx *ctx =3D ask->private; >+ struct skcipher_async_req *sreq =3D GET_SREQ(req, ctx); >+ struct kiocb *iocb =3D sreq->iocb; >+ >+ atomic_dec(&ctx->inflight); >+ skcipher_free_async_sgls(sreq); >+ kfree(req); >+ aio_complete(iocb, err, err); >+} >+ > static inline int skcipher_sndbuf(struct sock *sk) > { > struct alg_sock *ask =3D alg_sk(sk); >@@ -96,7 +153,7 @@ static int skcipher_alloc_sgl(struct sock *sk) > return 0; > } > >-static void skcipher_pull_sgl(struct sock *sk, int used) >+static void skcipher_pull_sgl(struct sock *sk, int used, int put) > { > struct alg_sock *ask =3D alg_sk(sk); > struct skcipher_ctx *ctx =3D ask->private; >@@ -123,8 +180,8 @@ static void skcipher_pull_sgl(struct sock *sk, int >used) > > if (sg[i].length) > return; >- >- put_page(sg_page(sg + i)); >+ if (put) >+ put_page(sg_page(sg + i)); > sg_assign_page(sg + i, NULL); > } > >@@ -143,7 +200,7 @@ static void skcipher_free_sgl(struct sock *sk) > struct alg_sock *ask =3D alg_sk(sk); > struct skcipher_ctx *ctx =3D ask->private; > >- skcipher_pull_sgl(sk, ctx->used); >+ skcipher_pull_sgl(sk, ctx->used, 1); > } > > static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) >@@ -424,8 +481,149 @@ unlock: > return err ?: size; > } > >-static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock= , >- struct msghdr *msg, size_t ignored, int=20 flags) >+static int skcipher_all_sg_nents(struct skcipher_ctx *ctx) >+{ >+ struct skcipher_sg_list *sgl; >+ struct scatterlist *sg; >+ int nents =3D 0; >+ >+ list_for_each_entry(sgl, &ctx->tsgl, list) { >+ sg =3D sgl->sg; >+ >+ while (!sg->length) >+ sg++; >+ >+ nents +=3D sg_nents(sg); >+ } >+ return nents; >+} >+ >+static int skcipher_recvmsg_async(struct kiocb *iocb, struct socket >*sock, + struct msghdr *msg, int flags) >+{ >+ struct sock *sk =3D sock->sk; >+ struct alg_sock *ask =3D alg_sk(sk); >+ struct skcipher_ctx *ctx =3D ask->private; >+ struct skcipher_sg_list *sgl; >+ struct scatterlist *sg; >+ struct skcipher_async_req *sreq; >+ struct ablkcipher_request *req; >+ struct skcipher_async_rsgl *last_rsgl =3D NULL; >+ unsigned int len =3D 0, tx_nents =3D skcipher_all_sg_nents(ctx); >+ unsigned int reqlen =3D sizeof(struct skcipher_async_req) + >+ GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx); >+ int i =3D 0; >+ int err =3D -ENOMEM; >+ >+ lock_sock(sk); >+ req =3D kmalloc(reqlen, GFP_KERNEL); >+ if (unlikely(!req)) >+ goto unlock; >+ >+ sreq =3D GET_SREQ(req, ctx); >+ sreq->iocb =3D iocb; >+ memset(&sreq->first_sgl, '\0', sizeof(struct=20 skcipher_async_rsgl)); >+ INIT_LIST_HEAD(&sreq->list); >+ sreq->tsg =3D kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL); Why do you use kcalloc here and not kmalloc (why is there a memset=20 needed considering that sg_init_table is called? >+ if (unlikely(!sreq->tsg)) { >+ kfree(req); >+ goto unlock; >+ } >+ sg_init_table(sreq->tsg, tx_nents); >+ memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx)); >+ ablkcipher_request_set_tfm(req, crypto_ablkcipher_reqtfm(&ctx- >req)); >+ ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, >+ skcipher_async_cb, sk); >+ >+ while (iov_iter_count(&msg->msg_iter)) { >+ struct skcipher_async_rsgl *rsgl; >+ unsigned long used; >+ >+ if (!ctx->used) { >+ err =3D skcipher_wait_for_data(sk, flags); >+ if (err) >+ goto free; >+ } >+ sgl =3D list_first_entry(&ctx->tsgl, >+ struct skcipher_sg_list, list); >+ sg =3D sgl->sg; >+ >+ while (!sg->length) >+ sg++; >+ >+ used =3D min_t(unsigned long, ctx->used, >+ iov_iter_count(&msg->msg_iter)); >+ used =3D min_t(unsigned long, used, sg->length); >+ >+ if (i =3D=3D tx_nents) { >+ struct scatterlist *tmp; >+ int x; >+ /* Ran out of tx slots in async request >+ * need to expand */ >+ tmp =3D kcalloc(tx_nents * 2, sizeof(*tmp), >+ GFP_KERNEL); Why is the memory increased by a factor of two? Could you please point me to the logic that limits the allocation to=20 prevent user space eating up kernel memory? Same as above: Why do you use kcalloc here and not kmalloc (why is ther= e=20 a memset needed considering that sg_init_table is called? >+ if (!tmp) >+ goto free; >+ >+ sg_init_table(tmp, tx_nents * 2); >+ for (x =3D 0; x < tx_nents; x++) >+ sg_set_page(&tmp[x], sg_page(&sreq- >tsg[x]), >+ sreq->tsg[x].length, >+ sreq->tsg[x].offset); >+ kfree(sreq->tsg); It seems I miss a point here, but the code is freeing the existing sreg= - >tsg scatterlist just to replace it with the scatterlist that is twice=20 as big. The kernel had to fill the scatterlist that is twice as big wit= h=20 seemingly the same data we already filled our list with (according to=20 the sg_set_page call below). If I understand that right, isn't that unnecessary work? Why not just=20 allocate just one chunk of memory for a scatterlist holding the new=20 data, fill it and chain it with the existing scatterlist? >+ sreq->tsg =3D tmp; >+ tx_nents *=3D 2; >+ } >+ /* Need to take over the tx sgl from ctx >+ * to the asynch req - these sgls will be freed later */ >+ sg_set_page(sreq->tsg + i++, sg_page(sg), sg->length, >+ sg->offset); >+ >+ if (list_empty(&sreq->list)) { >+ rsgl =3D &sreq->first_sgl; >+ list_add_tail(&rsgl->list, &sreq->list); >+ } else { >+ rsgl =3D kzalloc(sizeof(*rsgl), GFP_KERNEL); Why is kzalloc called? Shouldn't kmalloc suffice? >+ if (!rsgl) { >+ err =3D -ENOMEM; >+ goto free; >+ } >+ list_add_tail(&rsgl->list, &sreq->list); >+ } >+ >+ used =3D af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used); >+ err =3D used; >+ if (used < 0) >+ goto free; >+ if (last_rsgl) >+ af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl); >+ >+ last_rsgl =3D rsgl; >+ len +=3D used; >+ skcipher_pull_sgl(sk, used, 0); >+ iov_iter_advance(&msg->msg_iter, used); >+ } >+ >+ ablkcipher_request_set_crypt(req, sreq->tsg, sreq- >first_sgl.sgl.sg, >+ len, sreq->iv); >+ err =3D ctx->enc ? crypto_ablkcipher_encrypt(req) : >+ crypto_ablkcipher_decrypt(req); >+ if (err =3D=3D -EINPROGRESS) { >+ atomic_inc(&ctx->inflight); >+ err =3D -EIOCBQUEUED; >+ goto unlock; >+ } To avoid races, shouldn't the atomic_inc of inflight happen before the=20 encryption / decryption call? >+free: >+ skcipher_free_async_sgls(sreq); >+ kfree(req); >+unlock: >+ skcipher_wmem_wakeup(sk); >+ release_sock(sk); >+ return err; >+} >+ >+static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr >*msg, + int flags) > { > struct sock *sk =3D sock->sk; > struct alg_sock *ask =3D alg_sk(sk); >@@ -484,7 +682,7 @@ free: > goto unlock; > > copied +=3D used; >- skcipher_pull_sgl(sk, used); >+ skcipher_pull_sgl(sk, used, 1); > iov_iter_advance(&msg->msg_iter, used); > } > >@@ -497,6 +695,13 @@ unlock: > return copied ?: err; > } > >+static int skcipher_recvmsg(struct kiocb *iocb, struct socket *sock, >+ struct msghdr *msg, size_t ignored, int=20 flags) >+{ >+ return is_sync_kiocb(iocb) ? >+ skcipher_recvmsg_sync(sock, msg, flags) : >+ skcipher_recvmsg_async(iocb, sock, msg, flags); >+} > > static unsigned int skcipher_poll(struct file *file, struct socket >*sock, poll_table *wait) >@@ -555,12 +760,25 @@ static int skcipher_setkey(void *private, const >u8 *key, unsigned int keylen) return crypto_ablkcipher_setkey(private, >key, keylen); > } > >+static void skcipher_wait(struct sock *sk) >+{ >+ struct alg_sock *ask =3D alg_sk(sk); >+ struct skcipher_ctx *ctx =3D ask->private; >+ int ctr =3D 0; >+ >+ while (atomic_read(&ctx->inflight) && ctr++ < 100) >+ msleep(100); What is the reason for taking 100? Why not 50 or 200? >+} >+ > static void skcipher_sock_destruct(struct sock *sk) > { > struct alg_sock *ask =3D alg_sk(sk); > struct skcipher_ctx *ctx =3D ask->private; > struct crypto_ablkcipher *tfm =3D crypto_ablkcipher_reqtfm(&ctx- >req); > >+ if (atomic_read(&ctx->inflight)) >+ skcipher_wait(sk); >+ > skcipher_free_sgl(sk); > sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm)); > sock_kfree_s(sk, ctx, ctx->len); >@@ -592,6 +810,7 @@ static int skcipher_accept_parent(void *private, >struct sock *sk) ctx->more =3D 0; > ctx->merge =3D 0; > ctx->enc =3D 0; >+ atomic_set(&ctx->inflight, 0); > af_alg_init_completion(&ctx->completion); > > ask->private =3D ctx; > >-- >To unsubscribe from this list: send the line "unsubscribe linux-crypto= " >in the body of a message to majordomo@vger.kernel.org >More majordomo info at http://vger.kernel.org/majordomo-info.html Ciao Stephan