* [PATCH RFC ipsec-next] IPsec offload, part one
@ 2017-01-04 8:23 Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 1/5] esp4: Avoid skb_cow_data whenever possible Steffen Klassert
` (5 more replies)
0 siblings, 6 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 8:23 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Steffen Klassert, Sowmini Varadhan, Ilan Tayari
This is the first part of the IPsec offload work we
talked at the IPsec workshop at the last netdev
conference. I plan to apply this to ipsec-next
after this round of review.
Patch 1 and 2 try to avoid skb linearization in
the ESP layer.
Patch 3 introduces a hepler to seup the esp trailer.
Patch 4 prepares the generic network code for
IPsec GRO. The main reason why we need this, is
that we need to reinject the decrypted inner
packet back to the GRO layer.
Patch 5 introduces GRO handlers for ESP, GRO
can enabled with a IPsec offload config option.
This config option will also be used for the
upcomming hardware offload.
David, patch 3 touches generic networking code.
Is it ok to integrate such a generic preparation
patch into an IPsec pull request, or do you
prefer to get it as a separate patch?
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH RFC ipsec-next 1/5] esp4: Avoid skb_cow_data whenever possible
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
@ 2017-01-04 8:23 ` Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 2/5] esp6: " Steffen Klassert
` (4 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 8:23 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Steffen Klassert, Sowmini Varadhan, Ilan Tayari
This patch tries to avoid skb_cow_data on esp4.
On the encrypt side we add the IPsec tailbits
to the linear part of the buffer if there is
space on it. If there is no space on the linear
part, we add a page fragment with the tailbits to
the buffer and use separate src and dst scatterlists.
On the decrypt side, we leave the buffer as it is
if it is not cloned.
With this, we can avoid a linearization of the buffer
in most of the cases.
Joint work with:
Sowmini Varadhan <sowmini.varadhan@oracle.com>
Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
include/net/xfrm.h | 2 +
net/ipv4/esp4.c | 338 +++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 266 insertions(+), 74 deletions(-)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 31947b9..ce53cfe 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -213,6 +213,8 @@ struct xfrm_state {
/* Last used time */
unsigned long lastused;
+ struct page_frag xfrag;
+
/* Reference to data common to all the instances of this
* transformer. */
const struct xfrm_type *type;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 20fb25e..9e8d971 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -18,6 +18,8 @@
#include <net/protocol.h>
#include <net/udp.h>
+#include <linux/highmem.h>
+
struct esp_skb_cb {
struct xfrm_skb_cb xfrm;
void *tmp;
@@ -92,11 +94,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+{
+ struct esp_output_extra *extra = esp_tmp_extra(tmp);
+ struct crypto_aead *aead = x->data;
+ int extralen = 0;
+ u8 *iv;
+ struct aead_request *req;
+ struct scatterlist *sg;
+
+ if (x->props.flags & XFRM_STATE_ESN)
+ extralen += sizeof(*extra);
+
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
+ req = esp_tmp_req(aead, iv);
+
+ /* Unref skb_frag_pages in the src scatterlist if necessary.
+ * Skip the first sg which comes from skb->data.
+ */
+ if (req->src != req->dst)
+ for (sg = sg_next(req->src); sg; sg = sg_next(sg))
+ put_page(sg_page(sg));
+}
+
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ void *tmp;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
- kfree(ESP_SKB_CB(skb)->tmp);
+ tmp = ESP_SKB_CB(skb)->tmp;
+ esp_ssg_unref(x, tmp);
+ kfree(tmp);
xfrm_output_resume(skb, err);
}
@@ -120,6 +151,29 @@ static void esp_output_restore_header(struct sk_buff *skb)
sizeof(__be32));
}
+static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
+ struct ip_esp_hdr *esph,
+ struct esp_output_extra *extra)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ extra->esphoff = (unsigned char *)esph -
+ skb_transport_header(skb);
+ esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+ extra->seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ }
+
+ esph->spi = x->id.spi;
+
+ return esph;
+}
+
static void esp_output_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -130,16 +184,18 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
- int err;
struct esp_output_extra *extra;
+ int err = -ENOMEM;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct aead_request *req;
- struct scatterlist *sg;
+ struct scatterlist *sg, *dsg;
struct sk_buff *trailer;
+ struct page *page;
void *tmp;
u8 *iv;
u8 *tail;
+ u8 *vaddr;
int blksize;
int clen;
int alen;
@@ -149,7 +205,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
int nfrags;
int assoclen;
int extralen;
+ int tailen;
__be64 seqno;
+ __u8 proto = *skb_mac_header(skb);
/* skb is pure payload to encrypt */
@@ -169,12 +227,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
plen = clen - skb->len - tfclen;
-
- err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
-
+ tailen = tfclen + plen + alen;
assoclen = sizeof(*esph);
extralen = 0;
@@ -183,35 +236,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
assoclen += sizeof(__be32);
}
- tmp = esp_alloc_tmp(aead, nfrags, extralen);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
- }
-
- extra = esp_tmp_extra(tmp);
- iv = esp_tmp_iv(aead, tmp, extralen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
-
- /* Fill padding... */
- tail = skb_tail_pointer(trailer);
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = *skb_mac_header(skb);
- pskb_put(skb, trailer, clen - skb->len + alen);
-
- skb_push(skb, -skb_network_offset(skb));
- esph = ip_esp_hdr(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
+ esph = ip_esp_hdr(skb);
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
@@ -230,7 +256,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
uh = (struct udphdr *)esph;
uh->source = sport;
uh->dest = dport;
- uh->len = htons(skb->len - skb_transport_offset(skb));
+ uh->len = htons(skb->len + tailen
+ - skb_transport_offset(skb));
uh->check = 0;
switch (encap_type) {
@@ -248,31 +275,170 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
*skb_mac_header(skb) = IPPROTO_UDP;
}
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ if (!skb_cloned(skb)) {
+ if (tailen <= skb_availroom(skb)) {
+ nfrags = 1;
+ trailer = skb;
+ tail = skb_tail_pointer(trailer);
- aead_request_set_callback(req, 0, esp_output_done, skb);
+ goto skip_cow;
+ } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)
+ && !skb_has_frag_list(skb)) {
+ int allocsize;
+ struct sock *sk = skb->sk;
+ struct page_frag *pfrag = &x->xfrag;
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * encryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- extra->esphoff = (unsigned char *)esph -
- skb_transport_header(skb);
- esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
- extra->seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ allocsize = ALIGN(tailen, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ goto cow;
+ }
+
+ page = pfrag->page;
+ get_page(page);
+
+ vaddr = kmap_atomic(page);
+
+ tail = vaddr + pfrag->offset;
+
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+
+ kunmap_atomic(vaddr);
+
+ nfrags = skb_shinfo(skb)->nr_frags;
+
+ __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
+ tailen);
+ skb_shinfo(skb)->nr_frags = ++nfrags;
+
+ pfrag->offset = pfrag->offset + allocsize;
+ nfrags++;
+
+ skb->len += tailen;
+ skb->data_len += tailen;
+ skb->truesize += tailen;
+ if (sk)
+ atomic_add(tailen, &sk->sk_wmem_alloc);
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esph->spi = x->id.spi;
+
+ tmp = esp_alloc_tmp(aead, nfrags + 2, extralen);
+ if (!tmp) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = &sg[nfrags];
+
+ esph = esp_output_set_extra(skb, esph, extra);
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ spin_unlock_bh(&x->lock);
+
+ goto skip_cow2;
+ }
}
+cow:
+ err = skb_cow_data(skb, tailen, &trailer);
+ if (err < 0)
+ goto error;
+ nfrags = err;
+ tail = skb_tail_pointer(trailer);
+ esph = ip_esp_hdr(skb);
+
+skip_cow:
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+ pskb_put(skb, trailer, clen - skb->len + alen);
+
+ skb_push(skb, -skb_network_offset(skb));
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
esph->spi = x->id.spi;
+ tmp = esp_alloc_tmp(aead, nfrags, extralen);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = sg;
+
+ esph = esp_output_set_extra(skb, esph, extra);
+
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + clen + alen);
- aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+skip_cow2:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ else
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
aead_request_set_ad(req, assoclen);
seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
@@ -298,6 +464,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esp_output_restore_header(skb);
}
+ if (sg != dsg)
+ esp_ssg_unref(x, tmp);
kfree(tmp);
error:
@@ -401,6 +569,23 @@ static void esp_input_restore_header(struct sk_buff *skb)
__skb_pull(skb, 4);
}
+static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ }
+}
+
static void esp_input_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -437,12 +622,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
if (elen <= 0)
goto out;
- err = skb_cow_data(skb, 0, &trailer);
- if (err < 0)
- goto out;
-
- nfrags = err;
-
assoclen = sizeof(*esph);
seqhilen = 0;
@@ -451,6 +630,26 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
assoclen += seqhilen;
}
+ if (!skb_cloned(skb)) {
+ if (!skb_is_nonlinear(skb)) {
+ nfrags = 1;
+
+ goto skip_cow;
+ } else if (!skb_has_frag_list(skb)) {
+ nfrags = skb_shinfo(skb)->nr_frags;
+ nfrags++;
+
+ goto skip_cow;
+ }
+ }
+
+ err = skb_cow_data(skb, 0, &trailer);
+ if (err < 0)
+ goto out;
+
+ nfrags = err;
+
+skip_cow:
err = -ENOMEM;
tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
@@ -462,26 +661,17 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- skb->ip_summed = CHECKSUM_NONE;
+ esp_input_set_header(skb, seqhi);
- esph = (struct ip_esp_hdr *)skb->data;
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
- aead_request_set_callback(req, 0, esp_input_done, skb);
+ skb->ip_summed = CHECKSUM_NONE;
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * decryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)skb_push(skb, 4);
- *seqhi = esph->spi;
- esph->spi = esph->seq_no;
- esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
- }
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ else
+ aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
aead_request_set_ad(req, assoclen);
--
1.9.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH RFC ipsec-next 2/5] esp6: Avoid skb_cow_data whenever possible
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 1/5] esp4: Avoid skb_cow_data whenever possible Steffen Klassert
@ 2017-01-04 8:23 ` Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 3/5] esp: Introduce a helper to setup the trailer Steffen Klassert
` (3 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 8:23 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Steffen Klassert, Sowmini Varadhan, Ilan Tayari
This patch tries to avoid skb_cow_data on esp6.
On the encrypt side we add the IPsec tailbits
to the linear part of the buffer if there is
space on it. If there is no space on the linear
part, we add a page fragment with the tailbits to
the buffer and use separate src and dst scatterlists.
On the decrypt side, we leave the buffer as it is
if it is not cloned.
With this, we can avoid a linearization of the buffer
in most of the cases.
Joint work with:
Sowmini Varadhan <sowmini.varadhan@oracle.com>
Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv6/esp6.c | 302 +++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 246 insertions(+), 56 deletions(-)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index cbcdd5d..a428ac6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -44,6 +44,8 @@
#include <net/protocol.h>
#include <linux/icmpv6.h>
+#include <linux/highmem.h>
+
struct esp_skb_cb {
struct xfrm_skb_cb xfrm;
void *tmp;
@@ -114,11 +116,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+{
+ __be32 *seqhi;
+ struct crypto_aead *aead = x->data;
+ int seqhilen = 0;
+ u8 *iv;
+ struct aead_request *req;
+ struct scatterlist *sg;
+
+ if (x->props.flags & XFRM_STATE_ESN)
+ seqhilen += sizeof(__be32);
+
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+
+ /* Unref skb_frag_pages in the src scatterlist if necessary.
+ * Skip the first sg which comes from skb->data.
+ */
+ if (req->src != req->dst)
+ for (sg = sg_next(req->src); sg; sg = sg_next(sg))
+ put_page(sg_page(sg));
+}
+
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ void *tmp;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
- kfree(ESP_SKB_CB(skb)->tmp);
+ tmp = ESP_SKB_CB(skb)->tmp;
+ esp_ssg_unref(x, tmp);
+ kfree(tmp);
xfrm_output_resume(skb, err);
}
@@ -138,6 +169,27 @@ static void esp_output_restore_header(struct sk_buff *skb)
esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
}
+static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
+ struct ip_esp_hdr *esph,
+ __be32 *seqhi)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
+ *seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ }
+
+ esph->spi = x->id.spi;
+
+ return esph;
+}
+
static void esp_output_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -152,8 +204,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct aead_request *req;
- struct scatterlist *sg;
+ struct scatterlist *sg, *dsg;
struct sk_buff *trailer;
+ struct page *page;
void *tmp;
int blksize;
int clen;
@@ -164,10 +217,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
int nfrags;
int assoclen;
int seqhilen;
+ int tailen;
u8 *iv;
u8 *tail;
+ u8 *vaddr;
__be32 *seqhi;
__be64 seqno;
+ __u8 proto = *skb_mac_header(skb);
/* skb is pure payload to encrypt */
aead = x->data;
@@ -186,11 +242,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
plen = clen - skb->len - tfclen;
-
- err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
+ tailen = tfclen + plen + alen;
assoclen = sizeof(*esph);
seqhilen = 0;
@@ -200,19 +252,130 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
+ *skb_mac_header(skb) = IPPROTO_ESP;
+ esph = ip_esp_hdr(skb);
+
+ if (!skb_cloned(skb)) {
+ if (tailen <= skb_availroom(skb)) {
+ nfrags = 1;
+ trailer = skb;
+ tail = skb_tail_pointer(trailer);
+
+ goto skip_cow;
+ } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)
+ && !skb_has_frag_list(skb)) {
+ int allocsize;
+ struct sock *sk = skb->sk;
+ struct page_frag *pfrag = &x->xfrag;
+
+ allocsize = ALIGN(tailen, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ goto cow;
+ }
+
+ page = pfrag->page;
+ get_page(page);
+
+ vaddr = kmap_atomic(page);
+
+ tail = vaddr + pfrag->offset;
+
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+
+ kunmap_atomic(vaddr);
+
+ nfrags = skb_shinfo(skb)->nr_frags;
+
+ __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
+ tailen);
+ skb_shinfo(skb)->nr_frags = ++nfrags;
+
+ pfrag->offset = pfrag->offset + allocsize;
+ nfrags++;
+
+ skb->len += tailen;
+ skb->data_len += tailen;
+ skb->truesize += tailen;
+ if (sk)
+ atomic_add(tailen, &sk->sk_wmem_alloc);
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esph->spi = x->id.spi;
+
+ tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen);
+ if (!tmp) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = &sg[nfrags];
+
+ esph = esp_output_set_esn(skb, esph, seqhi);
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ spin_unlock_bh(&x->lock);
+
+ goto skip_cow2;
+ }
}
- seqhi = esp_tmp_seqhi(tmp);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
+cow:
+ err = skb_cow_data(skb, tailen, &trailer);
+ if (err < 0)
+ goto error;
+ nfrags = err;
- /* Fill padding... */
tail = skb_tail_pointer(trailer);
+ esph = ip_esp_hdr(skb);
+
+skip_cow:
+ /* Fill padding... */
if (tfclen) {
memset(tail, 0, tfclen);
tail += tfclen;
@@ -223,36 +386,40 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
tail[i] = i + 1;
} while (0);
tail[plen - 2] = plen - 2;
- tail[plen - 1] = *skb_mac_header(skb);
+ tail[plen - 1] = proto;
pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
- esph = ip_esp_hdr(skb);
- *skb_mac_header(skb) = IPPROTO_ESP;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esph->spi = x->id.spi;
- aead_request_set_callback(req, 0, esp_output_done, skb);
-
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * encryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
- *seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto error;
}
- esph->spi = x->id.spi;
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = sg;
+
+ esph = esp_output_set_esn(skb, esph, seqhi);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + clen + alen);
- aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+skip_cow2:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ else
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
aead_request_set_ad(req, assoclen);
seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
@@ -278,6 +445,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esp_output_restore_header(skb);
}
+ if (sg != dsg)
+ esp_ssg_unref(x, tmp);
kfree(tmp);
error:
@@ -343,6 +512,23 @@ static void esp_input_restore_header(struct sk_buff *skb)
__skb_pull(skb, 4);
}
+static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ }
+}
+
static void esp_input_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -378,14 +564,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
}
- nfrags = skb_cow_data(skb, 0, &trailer);
- if (nfrags < 0) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = -ENOMEM;
-
assoclen = sizeof(*esph);
seqhilen = 0;
@@ -394,6 +572,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
assoclen += seqhilen;
}
+ if (!skb_cloned(skb)) {
+ if (!skb_is_nonlinear(skb)) {
+ nfrags = 1;
+
+ goto skip_cow;
+ } else if (!skb_has_frag_list(skb)) {
+ nfrags = skb_shinfo(skb)->nr_frags;
+ nfrags++;
+
+ goto skip_cow;
+ }
+ }
+
+ nfrags = skb_cow_data(skb, 0, &trailer);
+ if (nfrags < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+skip_cow:
+ ret = -ENOMEM;
tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
goto out;
@@ -404,26 +603,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- skb->ip_summed = CHECKSUM_NONE;
+ esp_input_set_header(skb, seqhi);
- esph = (struct ip_esp_hdr *)skb->data;
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
- aead_request_set_callback(req, 0, esp_input_done, skb);
+ skb->ip_summed = CHECKSUM_NONE;
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * decryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)skb_push(skb, 4);
- *seqhi = esph->spi;
- esph->spi = esph->seq_no;
- esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
- }
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ else
+ aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
aead_request_set_ad(req, assoclen);
--
1.9.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH RFC ipsec-next 3/5] esp: Introduce a helper to setup the trailer
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 1/5] esp4: Avoid skb_cow_data whenever possible Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 2/5] esp6: " Steffen Klassert
@ 2017-01-04 8:23 ` Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO Steffen Klassert
` (2 subsequent siblings)
5 siblings, 0 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 8:23 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Steffen Klassert, Sowmini Varadhan, Ilan Tayari
We need to setup the trailer in two different cases,
so add a helper to avoid code duplication.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/esp4.c | 44 +++++++++++++++++++-------------------------
net/ipv6/esp6.c | 44 +++++++++++++++++++-------------------------
2 files changed, 38 insertions(+), 50 deletions(-)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 9e8d971..b1e2444 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -182,6 +182,22 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
esp_output_done(base, err);
}
+static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
+{
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+}
+
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct esp_output_extra *extra;
@@ -304,18 +320,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
tail = vaddr + pfrag->offset;
- /* Fill padding... */
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = proto;
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
kunmap_atomic(vaddr);
@@ -395,20 +400,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esph = ip_esp_hdr(skb);
skip_cow:
- /* Fill padding... */
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = proto;
- pskb_put(skb, trailer, clen - skb->len + alen);
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
+ pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
esph->spi = x->id.spi;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a428ac6..ff54faa 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -198,6 +198,22 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
esp_output_done(base, err);
}
+static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
+{
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+}
+
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
@@ -284,18 +300,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
tail = vaddr + pfrag->offset;
- /* Fill padding... */
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = proto;
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
kunmap_atomic(vaddr);
@@ -375,20 +380,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esph = ip_esp_hdr(skb);
skip_cow:
- /* Fill padding... */
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = proto;
- pskb_put(skb, trailer, clen - skb->len + alen);
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
+ pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
--
1.9.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
` (2 preceding siblings ...)
2017-01-04 8:23 ` [PATCH RFC ipsec-next 3/5] esp: Introduce a helper to setup the trailer Steffen Klassert
@ 2017-01-04 8:23 ` Steffen Klassert
2017-01-04 12:34 ` Eric Dumazet
2017-01-04 8:23 ` [PATCH RFC ipsec-next 5/5] esp: Add a software GRO codepath Steffen Klassert
2017-01-18 7:59 ` [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
5 siblings, 1 reply; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 8:23 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Steffen Klassert, Sowmini Varadhan, Ilan Tayari
This patch prepares the generic codepath for IPsec GRO.
We introduce a new GRO_CONSUMED notifier to reflect that
IPsec can return asynchronous. On IPsec GRO we grab the
packet and reinject it back to layer 2 after IPsec
processing. We also use one xfrm_gro bit on the sk_buff
that will be set from IPsec to notify about GRO. If this
bit is set, we call napi_gro_receive for the backlog device
instead of __netif_receive_skb.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
include/linux/netdevice.h | 1 +
include/linux/skbuff.h | 14 +++++++++++++-
net/core/dev.c | 17 ++++++++++++++++-
net/xfrm/Kconfig | 4 ++++
4 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ecd78b3..89bad76 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -352,6 +352,7 @@ enum gro_result {
GRO_HELD,
GRO_NORMAL,
GRO_DROP,
+ GRO_CONSUMED,
};
typedef enum gro_result gro_result_t;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b53c0cf..a78cd90 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -749,7 +749,10 @@ struct sk_buff {
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
#endif
- /* 2, 4 or 5 bit hole */
+#ifdef CONFIG_XFRM_OFFLOAD
+ __u8 xfrm_gro:1;
+#endif
+ /* 1 to 5 bits hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -3698,6 +3701,15 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
#endif
}
+static inline bool skb_xfrm_gro(struct sk_buff *skb)
+{
+#ifdef CONFIG_XFRM_OFFLOAD
+ return skb->xfrm_gro;
+#else
+ return false;
+#endif
+}
+
/* Keeps track of mac header offset relative to skb->head.
* It is useful for TSO of Tunneling protocol. e.g. GRE.
* For non-tunnel skb it points to skb_mac_header() and for
diff --git a/net/core/dev.c b/net/core/dev.c
index 56818f7..ecbaaf7f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4525,6 +4525,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
}
rcu_read_unlock();
+ if (PTR_ERR(pp) == -EINPROGRESS) {
+ ret = GRO_CONSUMED;
+ goto ok;
+ }
+
if (&ptype->list == head)
goto normal;
@@ -4623,6 +4628,9 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
skb_dst_drop(skb);
+#ifdef CONFIG_XFRM_OFFLOAD
+ secpath_put(skb->sp);
+#endif
kmem_cache_free(skbuff_head_cache, skb);
} else {
__kfree_skb(skb);
@@ -4631,6 +4639,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
case GRO_HELD:
case GRO_MERGED:
+ case GRO_CONSUMED:
break;
}
@@ -4701,6 +4710,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_MERGED:
+ case GRO_CONSUMED:
break;
}
@@ -4843,7 +4853,12 @@ static int process_backlog(struct napi_struct *napi, int quota)
while ((skb = __skb_dequeue(&sd->process_queue))) {
rcu_read_lock();
- __netif_receive_skb(skb);
+
+ if (skb_xfrm_gro(skb))
+ napi_gro_receive(napi, skb);
+ else
+ __netif_receive_skb(skb);
+
rcu_read_unlock();
input_queue_head_incr(sd);
if (++work >= quota)
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index bda1a13..442ac61 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -5,6 +5,10 @@ config XFRM
bool
depends on NET
+config XFRM_OFFLOAD
+ bool
+ depends on XFRM
+
config XFRM_ALGO
tristate
select XFRM
--
1.9.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH RFC ipsec-next 5/5] esp: Add a software GRO codepath
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
` (3 preceding siblings ...)
2017-01-04 8:23 ` [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO Steffen Klassert
@ 2017-01-04 8:23 ` Steffen Klassert
2017-01-18 7:59 ` [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
5 siblings, 0 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 8:23 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Steffen Klassert, Sowmini Varadhan, Ilan Tayari
This patch adds GRO callbacks for ESP on ipv4 and ipv6.
In case the GRO layer detects an ESP packet, the
esp{4,6}_gro_receive() function calls the xfrm input layer
which decapsulates the packet and reinject it into
layer 2 by calling netif_rx().
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/Kconfig | 6 +++
net/ipv4/Makefile | 1 +
net/ipv4/esp4.c | 5 +++
net/ipv4/esp4_offload.c | 94 +++++++++++++++++++++++++++++++++++++++
net/ipv4/ip_vti.c | 4 ++
net/ipv4/xfrm4_input.c | 6 +++
net/ipv4/xfrm4_mode_transport.c | 3 +-
net/ipv6/Kconfig | 6 +++
net/ipv6/Makefile | 1 +
net/ipv6/esp6.c | 5 +++
net/ipv6/esp6_offload.c | 98 +++++++++++++++++++++++++++++++++++++++++
net/ipv6/xfrm6_input.c | 5 +++
net/xfrm/xfrm_input.c | 27 +++++++++---
13 files changed, 253 insertions(+), 8 deletions(-)
create mode 100644 net/ipv4/esp4_offload.c
create mode 100644 net/ipv6/esp6_offload.c
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6e7baaf..27df99f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -360,6 +360,12 @@ config INET_ESP
If unsure, say Y.
+config INET_ESP_OFFLOAD
+ tristate "IP: ESP transformation offload"
+ depends on INET_ESP
+ select XFRM_OFFLOAD
+ default n
+
config INET_IPCOMP
tristate "IP: IPComp transformation"
select INET_XFRM_TUNNEL
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 48af58a..c6d4238 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IPVTI) += ip_vti.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_AH) += ah4.o
obj-$(CONFIG_INET_ESP) += esp4.o
+obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b1e2444..4563aeb 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -629,6 +629,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = 1;
goto skip_cow;
+ } else if (skb_xfrm_gro(skb)) {
+ nfrags = skb_shinfo(skb)->nr_frags;
+ nfrags++;
+
+ goto skip_cow;
} else if (!skb_has_frag_list(skb)) {
nfrags = skb_shinfo(skb)->nr_frags;
nfrags++;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
new file mode 100644
index 0000000..7277d15
--- /dev/null
+++ b/net/ipv4/esp4_offload.c
@@ -0,0 +1,94 @@
+/*
+ * IPV4 GSO/GRO offload support
+ * Linux INET implementation
+ *
+ * Copyright (C) 2016 secunet Security Networks AG
+ * Author: Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * ESP GRO support
+ */
+
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <linux/scatterlist.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/udp.h>
+
+static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ int err;
+ if (NAPI_GRO_CB(skb)->flush)
+ goto out;
+
+ skb_pull(skb, skb_gro_offset(skb));
+ skb->xfrm_gro = 1;
+
+ err = xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, -2);
+ if (err == -EOPNOTSUPP) {
+ skb_push(skb, skb_gro_offset(skb));
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+ skb->xfrm_gro = 0;
+ goto out;
+ }
+
+ return ERR_PTR(-EINPROGRESS);
+out:
+ return NULL;
+}
+
+static int esp4_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct crypto_aead *aead = x->data;
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + nhoff);
+ struct packet_offload *ptype;
+ int err = -ENOENT;
+ __be16 type = skb->protocol;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype != NULL)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*esph) + crypto_aead_ivsize(aead));
+
+ rcu_read_unlock();
+
+ return err;
+}
+
+static const struct net_offload esp4_offload = {
+ .callbacks = {
+ .gro_receive = esp4_gro_receive,
+ .gro_complete = esp4_gro_complete,
+ },
+};
+
+static int __init esp4_offload_init(void)
+{
+ return inet_add_offload(&esp4_offload, IPPROTO_ESP);
+}
+
+static void __exit esp4_offload_exit(void)
+{
+ inet_del_offload(&esp4_offload, IPPROTO_ESP);
+}
+
+module_init(esp4_offload_init);
+module_exit(esp4_offload_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 8b14f14..f275feb 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -60,6 +60,10 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
+ /* encap_type < -1 indicates a GRO call, we don't support this. */
+ if (encap_type < -1)
+ return -EOPNOTSUPP;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 62e1e72..6fe1a68 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -53,6 +53,12 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
iph->tot_len = htons(skb->len);
ip_send_check(iph);
+
+ if (skb_xfrm_gro(skb)) {
+ skb_mac_header_rebuild(skb);
+ return 0;
+ }
+
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
xfrm4_rcv_encap_finish);
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index fd840c7..7b447f3 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -50,7 +50,8 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
skb->network_header = skb->transport_header;
}
ip_hdr(skb)->tot_len = htons(skb->len + ihl);
- skb_reset_transport_header(skb);
+ if (!skb_xfrm_gro(skb))
+ skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ec1267e..8b0713b 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -75,6 +75,12 @@ config INET6_ESP
If unsure, say Y.
+config INET6_ESP_OFFLOAD
+ tristate "IPv6: ESP transformation offload"
+ depends on INET6_ESP
+ select XFRM_OFFLOAD
+ default n
+
config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
select INET6_XFRM_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index a9e9fec..217e9ff 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -30,6 +30,7 @@ ipv6-objs += $(ipv6-y)
obj-$(CONFIG_INET6_AH) += ah6.o
obj-$(CONFIG_INET6_ESP) += esp6.o
+obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o
obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o
obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ff54faa..1cca0f1 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -571,6 +571,11 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
nfrags = 1;
goto skip_cow;
+ } else if (skb_xfrm_gro(skb)) {
+ nfrags = skb_shinfo(skb)->nr_frags;
+ nfrags++;
+
+ goto skip_cow;
} else if (!skb_has_frag_list(skb)) {
nfrags = skb_shinfo(skb)->nr_frags;
nfrags++;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
new file mode 100644
index 0000000..e0006cf
--- /dev/null
+++ b/net/ipv6/esp6_offload.c
@@ -0,0 +1,98 @@
+/*
+ * IPV6 GSO/GRO offload support
+ * Linux INET implementation
+ *
+ * Copyright (C) 2016 secunet Security Networks AG
+ * Author: Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * ESP GRO support
+ */
+
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/protocol.h>
+#include <crypto/aead.h>
+#include <crypto/authenc.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <linux/scatterlist.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+#include <linux/icmpv6.h>
+
+static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
+ struct sk_buff *skb)
+{
+ int err;
+ if (NAPI_GRO_CB(skb)->flush)
+ goto out;
+
+ skb_pull(skb, skb_gro_offset(skb));
+ skb->xfrm_gro = 1;
+
+ XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ err = xfrm_input(skb, IPPROTO_ESP, 0, -2);
+ if (err == -EOPNOTSUPP) {
+ skb_push(skb, skb_gro_offset(skb));
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+ skb->xfrm_gro = 0;
+ goto out;
+ }
+
+ return ERR_PTR(-EINPROGRESS);
+out:
+ return NULL;
+}
+
+static int esp6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct crypto_aead *aead = x->data;
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + nhoff);
+ struct packet_offload *ptype;
+ int err = -ENOENT;
+ __be16 type = skb->protocol;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype != NULL)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*esph) + crypto_aead_ivsize(aead));
+
+ rcu_read_unlock();
+
+ return err;
+}
+
+static const struct net_offload esp6_offload = {
+ .callbacks = {
+ .gro_receive = esp6_gro_receive,
+ .gro_complete = esp6_gro_complete,
+ },
+};
+
+static int __init esp6_offload_init(void)
+{
+ return inet6_add_offload(&esp6_offload, IPPROTO_ESP);
+}
+
+static void __exit esp6_offload_exit(void)
+{
+ inet6_del_offload(&esp6_offload, IPPROTO_ESP);
+}
+
+module_init(esp6_offload_init);
+module_exit(esp6_offload_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index b578956..8ed16c8 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -44,6 +44,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
ipv6_hdr(skb)->payload_len = htons(skb->len);
__skb_push(skb, skb->data - skb_network_header(skb));
+ if (skb_xfrm_gro(skb)) {
+ skb_mac_header_rebuild(skb);
+ return -1;
+ }
+
NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_rcv_finish);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6e3f025..893263e 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -193,13 +193,17 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
int decaps = 0;
int async = 0;
- /* A negative encap_type indicates async resumption. */
if (encap_type < 0) {
- async = 1;
- x = xfrm_input_state(skb);
- seq = XFRM_SKB_CB(skb)->seq.input.low;
- family = x->outer_mode->afinfo->family;
- goto resume;
+ /* An encap_type of -1 indicates async resumption. */
+ if (encap_type == -1) {
+ async = 1;
+ x = xfrm_input_state(skb);
+ seq = XFRM_SKB_CB(skb)->seq.input.low;
+ family = x->outer_mode->afinfo->family;
+ goto resume;
+ }
+ /* encap_type < -1 indicates a GRO call. */
+ encap_type = 0;
}
daddr = (xfrm_address_t *)(skb_network_header(skb) +
@@ -374,7 +378,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
netif_rx(skb);
return 0;
} else {
- return x->inner_mode->afinfo->transport_finish(skb, async);
+ int xfrm_gro = skb_xfrm_gro(skb);
+
+ err = x->inner_mode->afinfo->transport_finish(skb, async);
+ if (xfrm_gro) {
+ skb_dst_drop(skb);
+ netif_rx(skb);
+ return 0;
+ }
+
+ return err;
}
drop_unlock:
--
1.9.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO
2017-01-04 8:23 ` [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO Steffen Klassert
@ 2017-01-04 12:34 ` Eric Dumazet
2017-01-04 13:26 ` Steffen Klassert
0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2017-01-04 12:34 UTC (permalink / raw)
To: Steffen Klassert; +Cc: David Miller, netdev, Sowmini Varadhan, Ilan Tayari
On Wed, 2017-01-04 at 09:23 +0100, Steffen Klassert wrote:
> This patch prepares the generic codepath for IPsec GRO.
> We introduce a new GRO_CONSUMED notifier to reflect that
> IPsec can return asynchronous. On IPsec GRO we grab the
> packet and reinject it back to layer 2 after IPsec
> processing. We also use one xfrm_gro bit on the sk_buff
> that will be set from IPsec to notify about GRO. If this
> bit is set, we call napi_gro_receive for the backlog device
> instead of __netif_receive_skb.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> ---
> include/linux/netdevice.h | 1 +
> include/linux/skbuff.h | 14 +++++++++++++-
> net/core/dev.c | 17 ++++++++++++++++-
> net/xfrm/Kconfig | 4 ++++
> 4 files changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index ecd78b3..89bad76 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -352,6 +352,7 @@ enum gro_result {
> GRO_HELD,
> GRO_NORMAL,
> GRO_DROP,
> + GRO_CONSUMED,
> };
> typedef enum gro_result gro_result_t;
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index b53c0cf..a78cd90 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -749,7 +749,10 @@ struct sk_buff {
> #ifdef CONFIG_NET_SWITCHDEV
> __u8 offload_fwd_mark:1;
> #endif
> - /* 2, 4 or 5 bit hole */
> +#ifdef CONFIG_XFRM_OFFLOAD
> + __u8 xfrm_gro:1;
Arg, yet another skb bit.
> +#endif
> + /* 1 to 5 bits hole */
>
> #ifdef CONFIG_NET_SCHED
> __u16 tc_index; /* traffic control index */
> @@ -3698,6 +3701,15 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
> #endif
> }
>
>
> @@ -4843,7 +4853,12 @@ static int process_backlog(struct napi_struct *napi, int quota)
>
> while ((skb = __skb_dequeue(&sd->process_queue))) {
> rcu_read_lock();
> - __netif_receive_skb(skb);
> +
> + if (skb_xfrm_gro(skb))
> + napi_gro_receive(napi, skb);
> + else
> + __netif_receive_skb(skb);
> +
But napi here is device independent. It is a fake NAPI, per cpu.
I am not sure of the various implications of using it at this point,
this looks quite dangerous/invasive to me, compared to the gro_cells
infra which was designed to have no impact on core code paths.
To me, the caller should call napi_gro_receive() skb, instead of letting
core networking stack add this extra skb bit and extra conditional.
( Also I am not sure device dismantle will be properly managed by your
changes. It is really tricky to get it right. )
Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO
2017-01-04 12:34 ` Eric Dumazet
@ 2017-01-04 13:26 ` Steffen Klassert
2017-01-09 9:59 ` Steffen Klassert
0 siblings, 1 reply; 10+ messages in thread
From: Steffen Klassert @ 2017-01-04 13:26 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev, Sowmini Varadhan, Ilan Tayari
On Wed, Jan 04, 2017 at 04:34:15AM -0800, Eric Dumazet wrote:
> On Wed, 2017-01-04 at 09:23 +0100, Steffen Klassert wrote:
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index b53c0cf..a78cd90 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -749,7 +749,10 @@ struct sk_buff {
> > #ifdef CONFIG_NET_SWITCHDEV
> > __u8 offload_fwd_mark:1;
> > #endif
> > - /* 2, 4 or 5 bit hole */
> > +#ifdef CONFIG_XFRM_OFFLOAD
> > + __u8 xfrm_gro:1;
>
>
>
> Arg, yet another skb bit.
I'm not particularly proud of this, but
I've seen no other options to keep this
information across the layers. This is
probably the reason for all these skb bits.
>
> > +#endif
> > + /* 1 to 5 bits hole */
> >
> > #ifdef CONFIG_NET_SCHED
> > __u16 tc_index; /* traffic control index */
> > @@ -3698,6 +3701,15 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
> > #endif
> > }
> >
>
> >
> > @@ -4843,7 +4853,12 @@ static int process_backlog(struct napi_struct *napi, int quota)
> >
> > while ((skb = __skb_dequeue(&sd->process_queue))) {
> > rcu_read_lock();
> > - __netif_receive_skb(skb);
> > +
> > + if (skb_xfrm_gro(skb))
> > + napi_gro_receive(napi, skb);
> > + else
> > + __netif_receive_skb(skb);
> > +
>
>
> But napi here is device independent. It is a fake NAPI, per cpu.
>
> I am not sure of the various implications of using it at this point,
> this looks quite dangerous/invasive to me, compared to the gro_cells
> infra which was designed to have no impact on core code paths.
>
> To me, the caller should call napi_gro_receive() skb, instead of letting
> core networking stack add this extra skb bit and extra conditional.
I had a quick look at the gro_cells, it looks like I could avoid
at least the extra codition with this.
I'll see if I get it to work with gro_cells, thanks for pointing
to it!
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO
2017-01-04 13:26 ` Steffen Klassert
@ 2017-01-09 9:59 ` Steffen Klassert
0 siblings, 0 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-09 9:59 UTC (permalink / raw)
To: Eric Dumazet; +Cc: David Miller, netdev, Sowmini Varadhan, Ilan Tayari
On Wed, Jan 04, 2017 at 02:26:09PM +0100, Steffen Klassert wrote:
> On Wed, Jan 04, 2017 at 04:34:15AM -0800, Eric Dumazet wrote:
> > >
> > > @@ -4843,7 +4853,12 @@ static int process_backlog(struct napi_struct *napi, int quota)
> > >
> > > while ((skb = __skb_dequeue(&sd->process_queue))) {
> > > rcu_read_lock();
> > > - __netif_receive_skb(skb);
> > > +
> > > + if (skb_xfrm_gro(skb))
> > > + napi_gro_receive(napi, skb);
> > > + else
> > > + __netif_receive_skb(skb);
> > > +
> >
> >
> > But napi here is device independent. It is a fake NAPI, per cpu.
> >
> > I am not sure of the various implications of using it at this point,
> > this looks quite dangerous/invasive to me, compared to the gro_cells
> > infra which was designed to have no impact on core code paths.
> >
> > To me, the caller should call napi_gro_receive() skb, instead of letting
> > core networking stack add this extra skb bit and extra conditional.
>
> I had a quick look at the gro_cells, it looks like I could avoid
> at least the extra codition with this.
On a second view, it does not look so promising. I don't
have a netdevice where I can hang this off. Looks like I
need such a fake per cpu NAPI as the backlog is. I could
create my own one, then I would not have to add this
condition to core networking.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH RFC ipsec-next] IPsec offload, part one
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
` (4 preceding siblings ...)
2017-01-04 8:23 ` [PATCH RFC ipsec-next 5/5] esp: Add a software GRO codepath Steffen Klassert
@ 2017-01-18 7:59 ` Steffen Klassert
5 siblings, 0 replies; 10+ messages in thread
From: Steffen Klassert @ 2017-01-18 7:59 UTC (permalink / raw)
To: David Miller, netdev; +Cc: Sowmini Varadhan, Ilan Tayari
On Wed, Jan 04, 2017 at 09:23:45AM +0100, Steffen Klassert wrote:
> This is the first part of the IPsec offload work we
> talked at the IPsec workshop at the last netdev
> conference. I plan to apply this to ipsec-next
> after this round of review.
>
> Patch 1 and 2 try to avoid skb linearization in
> the ESP layer.
>
> Patch 3 introduces a hepler to seup the esp trailer.
I have not received any comments about patches 1-3,
so I've applied them to ipsec-next.
>
> Patch 4 prepares the generic network code for
> IPsec GRO. The main reason why we need this, is
> that we need to reinject the decrypted inner
> packet back to the GRO layer.
>
> Patch 5 introduces GRO handlers for ESP, GRO
> can enabled with a IPsec offload config option.
> This config option will also be used for the
> upcomming hardware offload.
Patches 4 and 5 will be refactored according to
the comments from Eric.
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2017-01-18 7:59 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-01-04 8:23 [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 1/5] esp4: Avoid skb_cow_data whenever possible Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 2/5] esp6: " Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 3/5] esp: Introduce a helper to setup the trailer Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 4/5] net: Prepare for IPsec GRO Steffen Klassert
2017-01-04 12:34 ` Eric Dumazet
2017-01-04 13:26 ` Steffen Klassert
2017-01-09 9:59 ` Steffen Klassert
2017-01-04 8:23 ` [PATCH RFC ipsec-next 5/5] esp: Add a software GRO codepath Steffen Klassert
2017-01-18 7:59 ` [PATCH RFC ipsec-next] IPsec offload, part one Steffen Klassert
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).