From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mx0a-00082601.pphosted.com ([67.231.145.42]:42618 "EHLO mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751372AbeCTXV6 (ORCPT ); Tue, 20 Mar 2018 19:21:58 -0400 Received: from pps.filterd (m0044008.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.16.0.22/8.16.0.22) with SMTP id w2KN6t1H006576 for ; Tue, 20 Mar 2018 16:21:58 -0700 Received: from mail.thefacebook.com ([199.201.64.23]) by mx0a-00082601.pphosted.com with ESMTP id 2gu8qg0qdv-3 (version=TLSv1 cipher=ECDHE-RSA-AES256-SHA bits=256 verify=NOT) for ; Tue, 20 Mar 2018 16:21:58 -0700 From: Yonghong Song To: , , , , , CC: Subject: [PATCH net-next v3 1/2] net: permit skb_segment on head_frag frag_list skb Date: Tue, 20 Mar 2018 16:21:55 -0700 Message-ID: <20180320232156.3455738-2-yhs@fb.com> In-Reply-To: <20180320232156.3455738-1-yhs@fb.com> References: <20180320232156.3455738-1-yhs@fb.com> MIME-Version: 1.0 Content-Type: text/plain Sender: netdev-owner@vger.kernel.org List-ID: One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at function skb_segment(), line 3667. The bpf program attaches to clsact ingress, calls bpf_skb_change_proto to change protocol from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect to send the changed packet out. 3472 struct sk_buff *skb_segment(struct sk_buff *head_skb, 3473 netdev_features_t features) 3474 { 3475 struct sk_buff *segs = NULL; 3476 struct sk_buff *tail = NULL; ... 3665 while (pos < offset + len) { 3666 if (i >= nfrags) { 3667 BUG_ON(skb_headlen(list_skb)); 3668 3669 i = 0; 3670 nfrags = skb_shinfo(list_skb)->nr_frags; 3671 frag = skb_shinfo(list_skb)->frags; 3672 frag_skb = list_skb; ... call stack: ... #1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525 #2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc #3 [ffff883ffef03640] oops_end at ffffffff8101d7e7 #4 [ffff883ffef03668] die at ffffffff8101deb2 #5 [ffff883ffef03698] do_trap at ffffffff8101a700 #6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe #7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0 #8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab [exception RIP: skb_segment+3044] RIP: ffffffff817e4dd4 RSP: ffff883ffef03860 RFLAGS: 00010216 RAX: 0000000000002bf6 RBX: ffff883feb7aaa00 RCX: 0000000000000011 RDX: ffff883fb87910c0 RSI: 0000000000000011 RDI: ffff883feb7ab500 RBP: ffff883ffef03928 R8: 0000000000002ce2 R9: 00000000000027da R10: 000001ea00000000 R11: 0000000000002d82 R12: ffff883f90a1ee80 R13: ffff883fb8791120 R14: ffff883feb7abc00 R15: 0000000000002ce2 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7 --- --- ... The triggering input skb has the following properties: list_skb = skb->frag_list; skb->nfrags != NULL && skb_headlen(list_skb) != 0 and skb_segment() is not able to handle a frag_list skb if its headlen (list_skb->len - list_skb->data_len) is not 0. This patch addressed the issue by handling skb_headlen(list_skb) != 0 case properly if list_skb->head_frag is true, which is expected in most cases. The head frag is processed before list_skb->frags are processed. Reported-by: Diptanu Gon Choudhury Signed-off-by: Yonghong Song --- net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 715c134..59bbc06 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3475,7 +3475,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; - skb_frag_t *frag = skb_shinfo(head_skb)->frags; + skb_frag_t *frag = skb_shinfo(head_skb)->frags, *head_frag = NULL; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); struct sk_buff *frag_skb = head_skb; @@ -3664,19 +3664,39 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(list_skb)); - i = 0; + if (skb_headlen(list_skb)) { + struct page *page; + + BUG_ON(!list_skb->head_frag); + + page = virt_to_head_page(list_skb->head); + if (!head_frag) { + head_frag = kmalloc(sizeof(skb_frag_t), + GFP_KERNEL); + if (!head_frag) + goto err; + } + head_frag->page.p = page; + head_frag->page_offset = list_skb->data - + (unsigned char *)page_address(page); + head_frag->size = skb_headlen(list_skb); + /* set i = -1 so we will pick head_frag + * instead of skb_shinfo(list_skb)->frags + * when i == -1. + */ + i = -1; + } nfrags = skb_shinfo(list_skb)->nr_frags; - frag = skb_shinfo(list_skb)->frags; - frag_skb = list_skb; - - BUG_ON(!nfrags); - - if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || - skb_zerocopy_clone(nskb, frag_skb, - GFP_ATOMIC)) - goto err; + if (nfrags) { + frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; + + if (skb_orphan_frags(frag_skb, GFP_ATOMIC) || + skb_zerocopy_clone(nskb, frag_skb, + GFP_ATOMIC)) + goto err; + } list_skb = list_skb->next; } @@ -3689,7 +3709,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, goto err; } - *nskb_frag = *frag; + *nskb_frag = (i == -1) ? *head_frag : *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); @@ -3702,7 +3722,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (pos + size <= offset + len) { i++; - frag++; + if (i != 0) + frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); @@ -3774,10 +3795,12 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, swap(tail->destructor, head_skb->destructor); swap(tail->sk, head_skb->sk); } + kfree(head_frag); return segs; err: kfree_skb_list(segs); + kfree(head_frag); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(skb_segment); -- 2.9.5