From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
borisp@nvidia.com, john.fastabend@gmail.com, maximmi@nvidia.com,
tariqt@nvidia.com, vfedorenko@novek.ru,
Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net-next v2 11/11] tls: rx: decrypt into a fresh skb
Date: Thu, 14 Jul 2022 22:22:35 -0700 [thread overview]
Message-ID: <20220715052235.1452170-12-kuba@kernel.org> (raw)
In-Reply-To: <20220715052235.1452170-1-kuba@kernel.org>
We currently CoW Rx skbs whenever we can't decrypt to a user
space buffer. The skbs can be enormous (64kB) and CoW does
a linear alloc which has a strong chance of failing under
memory pressure. Or even without, skb_cow_data() assumes
GFP_ATOMIC.
Allocate a new frag'd skb and decrypt into it. We finally
take advantage of the decrypted skb getting returned via
darg.
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
net/tls/tls.h | 3 ++
net/tls/tls_sw.c | 106 ++++++++++++++++++++++++++++++-----------------
2 files changed, 72 insertions(+), 37 deletions(-)
diff --git a/net/tls/tls.h b/net/tls/tls.h
index c818dc68955d..3740740504e3 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -39,6 +39,9 @@
#include <linux/skmsg.h>
#include <net/tls.h>
+#define TLS_PAGE_ORDER (min_t(unsigned int, PAGE_ALLOC_COSTLY_ORDER, \
+ TLS_MAX_PAYLOAD_SIZE >> PAGE_SHIFT))
+
#define __TLS_INC_STATS(net, field) \
__SNMP_INC_STATS((net)->mib.tls_statistics, field)
#define TLS_INC_STATS(net, field) \
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1c9a0705ee63..859ea02022c0 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1383,6 +1383,29 @@ static int tls_setup_from_iter(struct iov_iter *from,
return rc;
}
+static struct sk_buff *
+tls_alloc_clrtxt_skb(struct sock *sk, struct sk_buff *skb,
+ unsigned int full_len)
+{
+ struct strp_msg *clr_rxm;
+ struct sk_buff *clr_skb;
+ int err;
+
+ clr_skb = alloc_skb_with_frags(0, full_len, TLS_PAGE_ORDER,
+ &err, sk->sk_allocation);
+ if (!clr_skb)
+ return NULL;
+
+ skb_copy_header(clr_skb, skb);
+ clr_skb->len = full_len;
+ clr_skb->data_len = full_len;
+
+ clr_rxm = strp_msg(clr_skb);
+ clr_rxm->offset = 0;
+
+ return clr_skb;
+}
+
/* Decrypt handlers
*
* tls_decrypt_sg() and tls_decrypt_device() are decrypt handlers.
@@ -1410,34 +1433,40 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
struct tls_prot_info *prot = &tls_ctx->prot_info;
int n_sgin, n_sgout, aead_size, err, pages = 0;
struct sk_buff *skb = tls_strp_msg(ctx);
- struct strp_msg *rxm = strp_msg(skb);
- struct tls_msg *tlm = tls_msg(skb);
+ const struct strp_msg *rxm = strp_msg(skb);
+ const struct tls_msg *tlm = tls_msg(skb);
struct aead_request *aead_req;
- struct sk_buff *unused;
struct scatterlist *sgin = NULL;
struct scatterlist *sgout = NULL;
const int data_len = rxm->full_len - prot->overhead_size;
int tail_pages = !!prot->tail_size;
struct tls_decrypt_ctx *dctx;
+ struct sk_buff *clear_skb;
int iv_offset = 0;
u8 *mem;
+ n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
+ rxm->full_len - prot->prepend_size);
+ if (n_sgin < 1)
+ return n_sgin ?: -EBADMSG;
+
if (darg->zc && (out_iov || out_sg)) {
+ clear_skb = NULL;
+
if (out_iov)
n_sgout = 1 + tail_pages +
iov_iter_npages_cap(out_iov, INT_MAX, data_len);
else
n_sgout = sg_nents(out_sg);
- n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
- rxm->full_len - prot->prepend_size);
} else {
- n_sgout = 0;
darg->zc = false;
- n_sgin = skb_cow_data(skb, 0, &unused);
- }
- if (n_sgin < 1)
- return -EBADMSG;
+ clear_skb = tls_alloc_clrtxt_skb(sk, skb, rxm->full_len);
+ if (!clear_skb)
+ return -ENOMEM;
+
+ n_sgout = 1 + skb_shinfo(clear_skb)->nr_frags;
+ }
/* Increment to accommodate AAD */
n_sgin = n_sgin + 1;
@@ -1449,8 +1478,10 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout),
sk->sk_allocation);
- if (!mem)
- return -ENOMEM;
+ if (!mem) {
+ err = -ENOMEM;
+ goto exit_free_skb;
+ }
/* Segment the allocated memory */
aead_req = (struct aead_request *)mem;
@@ -1499,33 +1530,31 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
if (err < 0)
goto exit_free;
- if (n_sgout) {
- if (out_iov) {
- sg_init_table(sgout, n_sgout);
- sg_set_buf(&sgout[0], dctx->aad, prot->aad_size);
+ if (clear_skb) {
+ sg_init_table(sgout, n_sgout);
+ sg_set_buf(&sgout[0], dctx->aad, prot->aad_size);
- err = tls_setup_from_iter(out_iov, data_len,
- &pages, &sgout[1],
- (n_sgout - 1 - tail_pages));
- if (err < 0)
- goto fallback_to_reg_recv;
+ err = skb_to_sgvec(clear_skb, &sgout[1], prot->prepend_size,
+ data_len + prot->tail_size);
+ if (err < 0)
+ goto exit_free;
+ } else if (out_iov) {
+ sg_init_table(sgout, n_sgout);
+ sg_set_buf(&sgout[0], dctx->aad, prot->aad_size);
- if (prot->tail_size) {
- sg_unmark_end(&sgout[pages]);
- sg_set_buf(&sgout[pages + 1], &dctx->tail,
- prot->tail_size);
- sg_mark_end(&sgout[pages + 1]);
- }
- } else if (out_sg) {
- memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
- } else {
- goto fallback_to_reg_recv;
+ err = tls_setup_from_iter(out_iov, data_len, &pages, &sgout[1],
+ (n_sgout - 1 - tail_pages));
+ if (err < 0)
+ goto exit_free_pages;
+
+ if (prot->tail_size) {
+ sg_unmark_end(&sgout[pages]);
+ sg_set_buf(&sgout[pages + 1], &dctx->tail,
+ prot->tail_size);
+ sg_mark_end(&sgout[pages + 1]);
}
- } else {
-fallback_to_reg_recv:
- sgout = sgin;
- pages = 0;
- darg->zc = false;
+ } else if (out_sg) {
+ memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
}
/* Prepare and submit AEAD request */
@@ -1534,7 +1563,8 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
if (err)
goto exit_free_pages;
- darg->skb = tls_strp_msg(ctx);
+ darg->skb = clear_skb ?: tls_strp_msg(ctx);
+ clear_skb = NULL;
if (unlikely(darg->async)) {
err = tls_strp_msg_hold(sk, skb, &ctx->async_hold);
@@ -1552,6 +1582,8 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
put_page(sg_page(&sgout[pages]));
exit_free:
kfree(mem);
+exit_free_skb:
+ consume_skb(clear_skb);
return err;
}
--
2.36.1
next prev parent reply other threads:[~2022-07-15 5:23 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-15 5:22 [PATCH net-next v2 00/11] tls: rx: avoid skb_cow_data() Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 01/11] tls: rx: allow only one reader at a time Jakub Kicinski
2022-07-20 8:37 ` Eric Dumazet
2022-07-20 16:59 ` Jakub Kicinski
2022-07-20 17:09 ` Eric Dumazet
2022-07-20 17:19 ` Eric Dumazet
2022-07-15 5:22 ` [PATCH net-next v2 02/11] tls: rx: don't try to keep the skbs always on the list Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 03/11] tls: rx: don't keep decrypted skbs on ctx->recv_pkt Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 04/11] tls: rx: remove the message decrypted tracking Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 05/11] tls: rx: factor out device darg update Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 06/11] tls: rx: read the input skb from ctx->recv_pkt Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 07/11] tls: rx: return the decrypted skb via darg Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 08/11] tls: rx: async: adjust record geometry immediately Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 09/11] tls: rx: async: hold onto the input skb Jakub Kicinski
2022-07-15 5:22 ` [PATCH net-next v2 10/11] tls: rx: async: don't put async zc on the list Jakub Kicinski
2022-07-15 5:22 ` Jakub Kicinski [this message]
2022-07-18 10:40 ` [PATCH net-next v2 00/11] tls: rx: avoid skb_cow_data() patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220715052235.1452170-12-kuba@kernel.org \
--to=kuba@kernel.org \
--cc=borisp@nvidia.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=john.fastabend@gmail.com \
--cc=maximmi@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=tariqt@nvidia.com \
--cc=vfedorenko@novek.ru \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).