Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next v3 1/2] net: permit skb_segment on head_frag frag_list skb
From: Yonghong Song @ 2018-03-20 23:21 UTC (permalink / raw)
  To: edumazet, ast, daniel, diptanu, netdev, alexander.duyck; +Cc: kernel-team
In-Reply-To: <20180320232156.3455738-1-yhs@fb.com>

One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.

3472 struct sk_buff *skb_segment(struct sk_buff *head_skb,
3473                             netdev_features_t features)
3474 {
3475         struct sk_buff *segs = NULL;
3476         struct sk_buff *tail = NULL;
...
3665                 while (pos < offset + len) {
3666                         if (i >= nfrags) {
3667                                 BUG_ON(skb_headlen(list_skb));
3668
3669                                 i = 0;
3670                                 nfrags = skb_shinfo(list_skb)->nr_frags;
3671                                 frag = skb_shinfo(list_skb)->frags;
3672                                 frag_skb = list_skb;
...

call stack:
...
 #1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525
 #2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc
 #3 [ffff883ffef03640] oops_end at ffffffff8101d7e7
 #4 [ffff883ffef03668] die at ffffffff8101deb2
 #5 [ffff883ffef03698] do_trap at ffffffff8101a700
 #6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe
 #7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0
 #8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab
    [exception RIP: skb_segment+3044]
    RIP: ffffffff817e4dd4  RSP: ffff883ffef03860  RFLAGS: 00010216
    RAX: 0000000000002bf6  RBX: ffff883feb7aaa00  RCX: 0000000000000011
    RDX: ffff883fb87910c0  RSI: 0000000000000011  RDI: ffff883feb7ab500
    RBP: ffff883ffef03928   R8: 0000000000002ce2   R9: 00000000000027da
    R10: 000001ea00000000  R11: 0000000000002d82  R12: ffff883f90a1ee80
    R13: ffff883fb8791120  R14: ffff883feb7abc00  R15: 0000000000002ce2
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7
--- <IRQ stack> ---
...

The triggering input skb has the following properties:
    list_skb = skb->frag_list;
    skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.

This patch addressed the issue by handling skb_headlen(list_skb) != 0
case properly if list_skb->head_frag is true, which is expected in
most cases. The head frag is processed before list_skb->frags
are processed.

Reported-by: Diptanu Gon Choudhury <diptanu@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
---
 net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 715c134..59bbc06 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3475,7 +3475,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
 	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
-	skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+	skb_frag_t *frag = skb_shinfo(head_skb)->frags, *head_frag = NULL;
 	unsigned int mss = skb_shinfo(head_skb)->gso_size;
 	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
 	struct sk_buff *frag_skb = head_skb;
@@ -3664,19 +3664,39 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
-				BUG_ON(skb_headlen(list_skb));
-
 				i = 0;
+				if (skb_headlen(list_skb)) {
+					struct page *page;
+
+					BUG_ON(!list_skb->head_frag);
+
+					page = virt_to_head_page(list_skb->head);
+					if (!head_frag) {
+						head_frag = kmalloc(sizeof(skb_frag_t),
+								    GFP_KERNEL);
+						if (!head_frag)
+							goto err;
+					}
+					head_frag->page.p = page;
+					head_frag->page_offset = list_skb->data -
+						(unsigned char *)page_address(page);
+					head_frag->size = skb_headlen(list_skb);
+					/* set i = -1 so we will pick head_frag
+					 * instead of skb_shinfo(list_skb)->frags
+					 * when i == -1.
+					 */
+					i = -1;
+				}
 				nfrags = skb_shinfo(list_skb)->nr_frags;
-				frag = skb_shinfo(list_skb)->frags;
-				frag_skb = list_skb;
-
-				BUG_ON(!nfrags);
-
-				if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-				    skb_zerocopy_clone(nskb, frag_skb,
-						       GFP_ATOMIC))
-					goto err;
+				if (nfrags) {
+					frag = skb_shinfo(list_skb)->frags;
+					frag_skb = list_skb;
+
+					if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
+					    skb_zerocopy_clone(nskb, frag_skb,
+							       GFP_ATOMIC))
+						goto err;
+				}
 
 				list_skb = list_skb->next;
 			}
@@ -3689,7 +3709,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 				goto err;
 			}
 
-			*nskb_frag = *frag;
+			*nskb_frag = (i == -1) ? *head_frag : *frag;
 			__skb_frag_ref(nskb_frag);
 			size = skb_frag_size(nskb_frag);
 
@@ -3702,7 +3722,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 
 			if (pos + size <= offset + len) {
 				i++;
-				frag++;
+				if (i != 0)
+					frag++;
 				pos += size;
 			} else {
 				skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
@@ -3774,10 +3795,12 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 		swap(tail->destructor, head_skb->destructor);
 		swap(tail->sk, head_skb->sk);
 	}
+	kfree(head_frag);
 	return segs;
 
 err:
 	kfree_skb_list(segs);
+	kfree(head_frag);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(skb_segment);
-- 
2.9.5

^ permalink raw reply related

* [PATCH net-next v3 0/2] net: permit skb_segment on head_frag frag_list skb
From: Yonghong Song @ 2018-03-20 23:21 UTC (permalink / raw)
  To: edumazet, ast, daniel, diptanu, netdev, alexander.duyck; +Cc: kernel-team

One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.
 ...
    3665                 while (pos < offset + len) {
    3666                         if (i >= nfrags) {
    3667                                 BUG_ON(skb_headlen(list_skb));
 ...

The triggering input skb has the following properties:
    list_skb = skb->frag_list;
    skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.

Patch #1 provides a simple solution to avoid BUG_ON. If
list_skb->head_frag is true, its page-backed frag will
be processed before the list_skb->frags.
Patch #2 provides a test case in test_bpf module which
constructs a skb and calls skb_segment() directly. The test
case is able to trigger the BUG_ON without Patch #1.

The patch has been tested in the following setup:
  ipv6_host <-> nat_server <-> ipv4_host
where nat_server has a bpf program doing ipv4<->ipv6
translation and forwarding through clsact hook
bpf_skb_change_proto.

Changelog:
v2 -> v3:
  . Use starting frag index -1 (instead of 0) to
    special process head_frag before other frags in the skb,
    suggested by Alexander Duyck.
v1 -> v2:
  . Removed never-hit BUG_ON, spotted by Linyu Yuan.


Yonghong Song (2):
  net: permit skb_segment on head_frag frag_list skb
  net: bpf: add a test for skb_segment in test_bpf module

 lib/test_bpf.c    | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/core/skbuff.c | 51 ++++++++++++++++++++++++++++-----------
 2 files changed, 107 insertions(+), 15 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [PATCH net-next v3 2/2] net: bpf: add a test for skb_segment in test_bpf module
From: Yonghong Song @ 2018-03-20 23:21 UTC (permalink / raw)
  To: edumazet, ast, daniel, diptanu, netdev, alexander.duyck; +Cc: kernel-team
In-Reply-To: <20180320232156.3455738-1-yhs@fb.com>

Without the previous commit,
"modprobe test_bpf" will have the following errors:
...
[   98.149165] ------------[ cut here ]------------
[   98.159362] kernel BUG at net/core/skbuff.c:3667!
[   98.169756] invalid opcode: 0000 [#1] SMP PTI
[   98.179370] Modules linked in:
[   98.179371]  test_bpf(+)
...
which triggers the bug the previous commit intends to fix.

The skbs are constructed to mimic what mlx5 may generate.
The packet size/header may not mimic real cases in production. But
the processing flow is similar.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 lib/test_bpf.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 2efb213..045d7d3 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6574,6 +6574,72 @@ static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static struct sk_buff *build_test_skb(void *page)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		/* this will set skb[i]->head_frag */
+		skb[i] = build_skb(page, headroom);
+		if (!skb[i])
+			return NULL;
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], skb_shinfo(skb[i])->nr_frags,
+				page, 0, 64, 64);
+		// skb: skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb;
+	void *page;
+	int ret = -1;
+
+	page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		pr_info("%s: failed to get_free_page!", __func__);
+		return ret;
+	}
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb(page);
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+	} else if (skb_segment(skb, features)) {
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	free_page((unsigned long)page);
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6632,8 +6698,11 @@ static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
+	if (ret)
+		return ret;
+
+	ret = test_skb_segment();
 	return ret;
 }
 
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH v5 0/2] Remove false-positive VLAs when using max()
From: Linus Torvalds @ 2018-03-20 23:23 UTC (permalink / raw)
  To: Kees Cook
  Cc: Al Viro, Florian Weimer, Andrew Morton, Josh Poimboeuf,
	Rasmus Villemoes, Randy Dunlap, Miguel Ojeda, Ingo Molnar,
	David Laight, Ian Abbott, linux-input, linux-btrfs,
	Network Development, Linux Kernel Mailing List, Kernel Hardening
In-Reply-To: <CAGXu5jJ=ZYpf=30H6hsWn-R-CEVYAgVMHxjmoLUC00QYq0r17g@mail.gmail.com>

On Sat, Mar 17, 2018 at 1:07 PM, Kees Cook <keescook@chromium.org> wrote:
>
> No luck! :( gcc 4.4 refuses to play along. And, hilariously, not only
> does it not change the complaint about __builtin_choose_expr(), it
> also thinks that's a VLA now.

Hmm. So thanks to the diseased mind of Martin Uecker, there's a better
test for "__is_constant()":

  /* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> */
  #define __is_constant(a) \
        (sizeof(int) == sizeof(*(1 ? ((void*)((a) * 0l)) : (int*)1)))

that is actually *specified* by the C standard to work, and doesn't
even depend on any gcc extensions.

The reason is some really subtle pointer conversion rules, where the
type of the ternary operator will depend on whether one of the
pointers is NULL or not.

And the definition of NULL, in turn, very much depends on "integer
constant expression that has the value 0".

Are you willing to do one final try on a generic min/max? Same as my
last patch, but using the above __is_constant() test instead of
__builtin_constant_p?

               Linus

^ permalink raw reply

* Re: [PATCH v5 0/2] Remove false-positive VLAs when using max()
From: Linus Torvalds @ 2018-03-20 23:26 UTC (permalink / raw)
  To: Kees Cook
  Cc: Al Viro, Florian Weimer, Andrew Morton, Josh Poimboeuf,
	Rasmus Villemoes, Randy Dunlap, Miguel Ojeda, Ingo Molnar,
	David Laight, Ian Abbott, linux-input, linux-btrfs,
	Network Development, Linux Kernel Mailing List, Kernel Hardening
In-Reply-To: <CA+55aFwxk=tUECYQkd4cog08qW4ZT=r2K7FQXzGnc-zuMc7JQA@mail.gmail.com>

On Tue, Mar 20, 2018 at 4:23 PM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> Hmm. So thanks to the diseased mind of Martin Uecker, there's a better
> test for "__is_constant()":
>
>   /* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> */
>   #define __is_constant(a) \
>         (sizeof(int) == sizeof(*(1 ? ((void*)((a) * 0l)) : (int*)1)))
>
> that is actually *specified* by the C standard to work, and doesn't
> even depend on any gcc extensions.

Well, it does depend on 'sizeof(*(void *)X)' being 1 and the compiler
not complaining about it, and that sizeof(int) is not 1.

But since we depend on those things in the kernel anyway, that's fine.

                Linus

^ permalink raw reply

* [PATCH] Bluetooth: Remove VLA usage in aes_cmac
From: Gustavo A. R. Silva @ 2018-03-20 23:34 UTC (permalink / raw)
  To: Marcel Holtmann, Johan Hedberg, David S. Miller
  Cc: linux-bluetooth, netdev, linux-kernel, Gustavo A. R. Silva

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation instead.

The use of stack Variable Length Arrays needs to be avoided, as they
can be a vector for stack exhaustion, which can be both a runtime bug
or a security flaw. Also, in general, as code evolves it is easy to
lose track of how big a VLA can get. Thus, we can end up having runtime
failures that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
---
 net/bluetooth/smp.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a2ddae2..23c694d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -173,7 +173,7 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		    size_t len, u8 mac[16])
 {
 	uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
-	SHASH_DESC_ON_STACK(desc, tfm);
+	struct shash_desc *shash;
 	int err;

 	if (len > CMAC_MSG_MAX)
@@ -184,8 +184,13 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		return -EINVAL;
 	}

-	desc->tfm = tfm;
-	desc->flags = 0;
+	shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
+			GFP_KERNEL);
+	if (!shash)
+		return -ENOMEM;
+
+	shash->tfm = tfm;
+	shash->flags = 0;

 	/* Swap key and message from LSB to MSB */
 	swap_buf(k, tmp, 16);
@@ -200,8 +205,9 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		return err;
 	}

-	err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
-	shash_desc_zero(desc);
+	err = crypto_shash_digest(shash, msg_msb, len, mac_msb);
+	shash_desc_zero(shash);
+	kfree(shash);
 	if (err) {
 		BT_ERR("Hash computation error %d", err);
 		return err;
-- 
2.7.4

^ permalink raw reply related

* Re: [net-next] intel: add SPDX identifiers to all the Intel drivers
From: Philippe Ombredanne @ 2018-03-20 23:46 UTC (permalink / raw)
  To: Allan, Bruce W
  Cc: Kirsher, Jeffrey T, Joe Perches, davem@davemloft.net,
	netdev@vger.kernel.org, nhorman@redhat.com, sassmann@redhat.com,
	jogreene@redhat.com, Kate Stewart, Greg Kroah-Hartman,
	Thomas Gleixner
In-Reply-To: <804857E1F29AAC47BF68C404FC60A184ED5CCD20@ORSMSX105.amr.corp.intel.com>

Allan,

On Tue, Mar 20, 2018 at 1:48 PM, Allan, Bruce W <bruce.w.allan@intel.com> wrote:
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>> On Behalf Of Jeff Kirsher
>> Sent: Tuesday, March 20, 2018 10:52 AM
>> To: Joe Perches <joe@perches.com>; davem@davemloft.net; Philippe
>> Ombredanne <pombredanne@nexb.com>
>> Cc: netdev@vger.kernel.org; nhorman@redhat.com; sassmann@redhat.com;
>> jogreene@redhat.com
>> Subject: Re: [net-next] intel: add SPDX identifiers to all the Intel drivers
>>
>> On Tue, 2018-03-20 at 10:41 -0700, Joe Perches wrote:
>> > On Tue, 2018-03-20 at 10:13 -0700, Jeff Kirsher wrote:
>> > > Add the SPDX identifiers to all the Intel wired LAN driver files,
>> > > as
>> > > outlined in Documentation/process/license-rules.rst.
>> >
>> > So far the Documentation does not show using the -only variant.
>> >
>> > For a discussion, please see:
>> > https://lkml.org/lkml/2018/2/8/311
>
> But the Linux Foundation, the authority maintaining the valid SPDX identifiers, indicates at https://spdx.org/licenses/ that "GPL-2.0" is deprecated while "GPL-2.0-only" (and others) is appropriate.
> Was there any mention in the thread or other conversations if/when the kernel's documentation (and all existing uses of "GPL-2.0" in the kernel) will be updated to "GPL-2.0-only"?

The kernel (as documented by Thomas [1]) is using for now the V2.6 of
the SPDX licenses list. [2] IMHO the reference should be the kernel
doc and nothing else to ensure consistency and avoid confusion (which
obviously was not avoided entirely here ;) ).

What happened is in late December a new version 3 was published by
SPDX and the v2.6 is no longer online. I will bring this up to the
SPDX group because we should be able to reference the version 2.6
online (it is still in git though [2]).

When the kernel maintainers decide to switch to V3.0 of the SPDX list,
the doc will be updated and then Joe's script could be applied at once
to update the past.

What matters most here is consistency: having some v2.6 and some v3.0
SPDX ids at once is not a happy thing IMHO.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/license-rules.rst
[2] https://github.com/spdx/license-list-data/tree/v2.6


>> :-( I had it originally as GPL-2.0 and then it was pointed out that it
>> was being deprecated, so rather than creating future thrash over the
>> change, figured I would be ahead of the game.
>>
>> >
>> > > diff --git a/drivers/net/ethernet/intel/e100.c
>> > > b/drivers/net/ethernet/intel/e100.c
>> >
>> > []
>> > > @@ -1,3 +1,4 @@
>> > > +// SPDX-License-Identifier: GPL-2.0-only
>> >
>> > etc...



-- 
Cordially
Philippe Ombredanne

^ permalink raw reply

* Re: [PATCH net-next v3 1/2] net: permit skb_segment on head_frag frag_list skb
From: Alexander Duyck @ 2018-03-20 23:50 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Eric Dumazet, ast, Daniel Borkmann, diptanu, Netdev, Kernel Team
In-Reply-To: <20180320232156.3455738-2-yhs@fb.com>

On Tue, Mar 20, 2018 at 4:21 PM, Yonghong Song <yhs@fb.com> wrote:
> One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
> function skb_segment(), line 3667. The bpf program attaches to
> clsact ingress, calls bpf_skb_change_proto to change protocol
> from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
> to send the changed packet out.
>
> 3472 struct sk_buff *skb_segment(struct sk_buff *head_skb,
> 3473                             netdev_features_t features)
> 3474 {
> 3475         struct sk_buff *segs = NULL;
> 3476         struct sk_buff *tail = NULL;
> ...
> 3665                 while (pos < offset + len) {
> 3666                         if (i >= nfrags) {
> 3667                                 BUG_ON(skb_headlen(list_skb));
> 3668
> 3669                                 i = 0;
> 3670                                 nfrags = skb_shinfo(list_skb)->nr_frags;
> 3671                                 frag = skb_shinfo(list_skb)->frags;
> 3672                                 frag_skb = list_skb;
> ...
>
> call stack:
> ...
>  #1 [ffff883ffef03558] __crash_kexec at ffffffff8110c525
>  #2 [ffff883ffef03620] crash_kexec at ffffffff8110d5cc
>  #3 [ffff883ffef03640] oops_end at ffffffff8101d7e7
>  #4 [ffff883ffef03668] die at ffffffff8101deb2
>  #5 [ffff883ffef03698] do_trap at ffffffff8101a700
>  #6 [ffff883ffef036e8] do_error_trap at ffffffff8101abfe
>  #7 [ffff883ffef037a0] do_invalid_op at ffffffff8101acd0
>  #8 [ffff883ffef037b0] invalid_op at ffffffff81a00bab
>     [exception RIP: skb_segment+3044]
>     RIP: ffffffff817e4dd4  RSP: ffff883ffef03860  RFLAGS: 00010216
>     RAX: 0000000000002bf6  RBX: ffff883feb7aaa00  RCX: 0000000000000011
>     RDX: ffff883fb87910c0  RSI: 0000000000000011  RDI: ffff883feb7ab500
>     RBP: ffff883ffef03928   R8: 0000000000002ce2   R9: 00000000000027da
>     R10: 000001ea00000000  R11: 0000000000002d82  R12: ffff883f90a1ee80
>     R13: ffff883fb8791120  R14: ffff883feb7abc00  R15: 0000000000002ce2
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>  #9 [ffff883ffef03930] tcp_gso_segment at ffffffff818713e7
> --- <IRQ stack> ---
> ...
>
> The triggering input skb has the following properties:
>     list_skb = skb->frag_list;
>     skb->nfrags != NULL && skb_headlen(list_skb) != 0
> and skb_segment() is not able to handle a frag_list skb
> if its headlen (list_skb->len - list_skb->data_len) is not 0.
>
> This patch addressed the issue by handling skb_headlen(list_skb) != 0
> case properly if list_skb->head_frag is true, which is expected in
> most cases. The head frag is processed before list_skb->frags
> are processed.
>
> Reported-by: Diptanu Gon Choudhury <diptanu@fb.com>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  net/core/skbuff.c | 51 +++++++++++++++++++++++++++++++++++++--------------
>  1 file changed, 37 insertions(+), 14 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 715c134..59bbc06 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3475,7 +3475,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>         struct sk_buff *segs = NULL;
>         struct sk_buff *tail = NULL;
>         struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
> -       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
> +       skb_frag_t *frag = skb_shinfo(head_skb)->frags, *head_frag = NULL;

I think you misunderstood me. I wasn't saying you allocate head_frag.
I was saying you could move the declaration down.

>         unsigned int mss = skb_shinfo(head_skb)->gso_size;
>         unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
>         struct sk_buff *frag_skb = head_skb;
> @@ -3664,19 +3664,39 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>
>                 while (pos < offset + len) {

So right here in the loop you could add a "skb_frag_t head_frag;" just
so we declare it here and save ourselves the stack space.

>                         if (i >= nfrags) {
> -                               BUG_ON(skb_headlen(list_skb));
> -
>                                 i = 0;
> +                               if (skb_headlen(list_skb)) {
> +                                       struct page *page;
> +
> +                                       BUG_ON(!list_skb->head_frag);
> +
> +                                       page = virt_to_head_page(list_skb->head);
> +                                       if (!head_frag) {
> +                                               head_frag = kmalloc(sizeof(skb_frag_t),
> +                                                                   GFP_KERNEL);
> +                                               if (!head_frag)
> +                                                       goto err;
> +                                       }

Please no memory allocation. I just meant you could allocate it on the
stack later.

> +                                       head_frag->page.p = page;
> +                                       head_frag->page_offset = list_skb->data -
> +                                               (unsigned char *)page_address(page);
> +                                       head_frag->size = skb_headlen(list_skb);
> +                                       /* set i = -1 so we will pick head_frag
> +                                        * instead of skb_shinfo(list_skb)->frags
> +                                        * when i == -1.
> +                                        */
> +                                       i = -1;
> +                               }

So it took me a bit to pick up on the fact that line below wasn't
removed. So we are basically trying to do this all in one pass now. Do
I have that right?

One thing you could look at doing to save yourself the extra "if"
later would be to pull frag pointer before you go through skb_headlen
check above. Then if you are going to use a head_frag you could just
do a "i--; frag--;" combination just to rewind and make the room for
the increment to come later. That way you don't have an invalid frag
pointer floating around. That way you only have to do this once
instead of having to do a conditional check per fragment.

>                                 nfrags = skb_shinfo(list_skb)->nr_frags;
> -                               frag = skb_shinfo(list_skb)->frags;

This patch might be more readable if you were to just insert the
skb_headlen() bits down here and left the i=0 through frag = .. in one
piece.

> -                               frag_skb = list_skb;
> -
> -                               BUG_ON(!nfrags);
> -
> -                               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> -                                   skb_zerocopy_clone(nskb, frag_skb,
> -                                                      GFP_ATOMIC))
> -                                       goto err;
> +                               if (nfrags) {
> +                                       frag = skb_shinfo(list_skb)->frags;
> +                                       frag_skb = list_skb;
> +
> +                                       if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> +                                           skb_zerocopy_clone(nskb, frag_skb,
> +                                                              GFP_ATOMIC))
> +                                               goto err;
> +                               }
>
>                                 list_skb = list_skb->next;
>                         }
> @@ -3689,7 +3709,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>                                 goto err;
>                         }
>
> -                       *nskb_frag = *frag;
> +                       *nskb_frag = (i == -1) ? *head_frag : *frag;

So this would be better as "*nskb_frag = (i < 0) ? head_frag : *frag;".

>                         __skb_frag_ref(nskb_frag);
>                         size = skb_frag_size(nskb_frag);
>
> @@ -3702,7 +3722,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>
>                         if (pos + size <= offset + len) {
>                                 i++;
> -                               frag++;
> +                               if (i != 0)
> +                                       frag++;
>                                 pos += size;
>                         } else {
>                                 skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
> @@ -3774,10 +3795,12 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>                 swap(tail->destructor, head_skb->destructor);
>                 swap(tail->sk, head_skb->sk);
>         }
> +       kfree(head_frag);
>         return segs;
>
>  err:
>         kfree_skb_list(segs);
> +       kfree(head_frag);
>         return ERR_PTR(err);
>  }
>  EXPORT_SYMBOL_GPL(skb_segment);
> --
> 2.9.5
>

^ permalink raw reply

* Re: [PATCH v5 0/2] Remove false-positive VLAs when using max()
From: Al Viro @ 2018-03-21  0:05 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Kees Cook, Florian Weimer, Andrew Morton, Josh Poimboeuf,
	Rasmus Villemoes, Randy Dunlap, Miguel Ojeda, Ingo Molnar,
	David Laight, Ian Abbott, linux-input, linux-btrfs,
	Network Development, Linux Kernel Mailing List, Kernel Hardening
In-Reply-To: <CA+55aFwO8KZD_tZwqwL05FbzpKW4Ucd88C0tcR7LJ1utuy3WGg@mail.gmail.com>

On Tue, Mar 20, 2018 at 04:26:52PM -0700, Linus Torvalds wrote:
> On Tue, Mar 20, 2018 at 4:23 PM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > Hmm. So thanks to the diseased mind of Martin Uecker, there's a better
> > test for "__is_constant()":
> >
> >   /* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> */
> >   #define __is_constant(a) \
> >         (sizeof(int) == sizeof(*(1 ? ((void*)((a) * 0l)) : (int*)1)))
> >
> > that is actually *specified* by the C standard to work, and doesn't
> > even depend on any gcc extensions.
> 
> Well, it does depend on 'sizeof(*(void *)X)' being 1 and the compiler
> not complaining about it, and that sizeof(int) is not 1.
> 
> But since we depend on those things in the kernel anyway, that's fine.

It also depends upon "ICE for null pointer constant purposes" having the
same extensions as "ICE for enum purposes", etc., which is not obvious.

Back in 2007 or so we had a long thread regarding null pointer constants
in sparse; I probably still have notes from back then, but that'll take
some serious digging to find ;-/

What's more, gcc definitely has odd extensions.  Example I remember from
back then:
extern unsigned n;
struct {
	int x : 1 + n - n;
} y;

is accepted.  Used to be quietly accepted with -Wpedantic -std=c99, even,
but that got fixed - with -Wpedantic it does, at least, warn.  What is
and what is not recognized is fairly random - 1 + n - n + 1 + n - n
is recognized as "constant", 1 + n + n + 1 - n - n is not.  Of course,
neither is an ICE.

^ permalink raw reply

* [PATCH] kbuild: disable clang's default use of -fmerge-all-constants
From: Daniel Borkmann @ 2018-03-21  0:18 UTC (permalink / raw)
  To: torvalds
  Cc: ast, psodagud, netdev, Daniel Borkmann, Chenbo Feng,
	Richard Smith, Chandler Carruth, linux-kernel

Prasad reported that he has seen crashes in BPF subsystem with netd
on Android with arm64 in the form of (note, the taint is unrelated):

  [ 4134.721483] Unable to handle kernel paging request at virtual address 800000001
  [ 4134.820925] Mem abort info:
  [ 4134.901283]   Exception class = DABT (current EL), IL = 32 bits
  [ 4135.016736]   SET = 0, FnV = 0
  [ 4135.119820]   EA = 0, S1PTW = 0
  [ 4135.201431] Data abort info:
  [ 4135.301388]   ISV = 0, ISS = 0x00000021
  [ 4135.359599]   CM = 0, WnR = 0
  [ 4135.470873] user pgtable: 4k pages, 39-bit VAs, pgd = ffffffe39b946000
  [ 4135.499757] [0000000800000001] *pgd=0000000000000000, *pud=0000000000000000
  [ 4135.660725] Internal error: Oops: 96000021 [#1] PREEMPT SMP
  [ 4135.674610] Modules linked in:
  [ 4135.682883] CPU: 5 PID: 1260 Comm: netd Tainted: G S      W       4.14.19+ #1
  [ 4135.716188] task: ffffffe39f4aa380 task.stack: ffffff801d4e0000
  [ 4135.731599] PC is at bpf_prog_add+0x20/0x68
  [ 4135.741746] LR is at bpf_prog_inc+0x20/0x2c
  [ 4135.751788] pc : [<ffffff94ab7ad584>] lr : [<ffffff94ab7ad638>] pstate: 60400145
  [ 4135.769062] sp : ffffff801d4e3ce0
  [...]
  [ 4136.258315] Process netd (pid: 1260, stack limit = 0xffffff801d4e0000)
  [ 4136.273746] Call trace:
  [...]
  [ 4136.442494] 3ca0: ffffff94ab7ad584 0000000060400145 ffffffe3a01bf8f8 0000000000000006
  [ 4136.460936] 3cc0: 0000008000000000 ffffff94ab844204 ffffff801d4e3cf0 ffffff94ab7ad584
  [ 4136.479241] [<ffffff94ab7ad584>] bpf_prog_add+0x20/0x68
  [ 4136.491767] [<ffffff94ab7ad638>] bpf_prog_inc+0x20/0x2c
  [ 4136.504536] [<ffffff94ab7b5d08>] bpf_obj_get_user+0x204/0x22c
  [ 4136.518746] [<ffffff94ab7ade68>] SyS_bpf+0x5a8/0x1a88

Android's netd was basically pinning the uid cookie BPF map in BPF
fs (/sys/fs/bpf/traffic_cookie_uid_map) and later on retrieving it
again resulting in above panic. Issue is that the map was wrongly
identified as a prog! Above kernel was compiled with clang 4.0,
and it turns out that clang decided to merge the bpf_prog_iops and
bpf_map_iops into a single memory location, such that the two i_ops
could then not be distinguished anymore.

Reason for this miscompilation is that clang has the more aggressive
-fmerge-all-constants enabled by default. In fact, clang source code
has a comment about it in lib/AST/ExprConstant.cpp on why it is okay
to do so:

  Pointers with different bases cannot represent the same object.
  (Note that clang defaults to -fmerge-all-constants, which can
  lead to inconsistent results for comparisons involving the address
  of a constant; this generally doesn't matter in practice.)

The issue never appeared with gcc however, since gcc does not enable
-fmerge-all-constants by default and even *explicitly* states in
it's option description that using this flag results in non-conforming
behavior, quote from man gcc:

  Languages like C or C++ require each variable, including multiple
  instances of the same variable in recursive calls, to have distinct
  locations, so using this option results in non-conforming behavior.

There are also various clang bug reports open on that matter [1],
where clang developers acknowledge the non-conforming behavior,
and refer to disabling it with -fno-merge-all-constants. But even
if this gets fixed in clang today, there are already users out there
that triggered this. Thus, fix this issue by explicitly adding
-fno-merge-all-constants to the kernel's Makefile to generically
disable this optimization, since potentially other places in the
kernel could subtly break as well.

Note, there is also a flag called -fmerge-constants (not supported
by clang), which is more conservative and only applies to strings
and it's enabled in gcc's -O/-O2/-O3/-Os optimization levels. In
gcc's code, the two flags -fmerge-{all-,}constants share the same
variable internally, so when disabling it via -fno-merge-all-constants,
then we really don't merge any const data (e.g. strings), and text
size increases with gcc (14,927,214 -> 14,942,646 for vmlinux.o).

  $ gcc -fverbose-asm -O2 foo.c -S -o foo.S
    -> foo.S lists -fmerge-constants under options enabled
  $ gcc -fverbose-asm -O2 -fno-merge-all-constants foo.c -S -o foo.S
    -> foo.S doesn't list -fmerge-constants under options enabled
  $ gcc -fverbose-asm -O2 -fno-merge-all-constants -fmerge-constants foo.c -S -o foo.S
    -> foo.S lists -fmerge-constants under options enabled

Thus, as a workaround we need to set both -fno-merge-all-constants
*and* -fmerge-constants in the Makefile in order for text size to
stay as is.

  [1] https://bugs.llvm.org/show_bug.cgi?id=18538

Reported-by: Prasad Sodagudi <psodagud@codeaurora.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Chenbo Feng <fengc@google.com>
Cc: Richard Smith <richard-llvm@metafoo.co.uk>
Cc: Chandler Carruth <chandlerc@gmail.com>
Cc: linux-kernel@vger.kernel.org
Tested-by: Prasad Sodagudi <psodagud@codeaurora.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 [ Hi Linus, feel free to take this fix directly if you want.
   Alternatively, we could route it via bpf tree. Thanks a
   lot for your feedback! ]

 Makefile | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Makefile b/Makefile
index c4322de..af07bf5 100644
--- a/Makefile
+++ b/Makefile
@@ -826,6 +826,15 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= $(call cc-option,-fno-strict-overflow)
 
+# clang sets -fmerge-all-constants by default as optimization, but this
+# is non-conforming behavior for C and in fact breaks the kernel, so we
+# need to disable it here generally.
+KBUILD_CFLAGS	+= $(call cc-option,-fno-merge-all-constants)
+
+# for gcc -fno-merge-all-constants disables everything, but it is fine
+# to have actual conforming behavior enabled.
+KBUILD_CFLAGS	+= $(call cc-option,-fmerge-constants)
+
 # Make sure -fstack-check isn't enabled (like gentoo apparently did)
 KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
 
-- 
2.9.5

^ permalink raw reply related

* [PATCH net] net: dsa: Fix functional dsa-loop dependency on FIXED_PHY
From: Florian Fainelli @ 2018-03-21  0:31 UTC (permalink / raw)
  To: netdev; +Cc: Florian Fainelli, Andrew Lunn, Vivien Didelot, open list

We have a functional dependency on the FIXED_PHY MDIO bus because we register
fixed PHY devices "the old way" which only works if the code that does this has
had a chance to run before the fixed MDIO bus is probed. Make sure we account
for that and have dsa_loop_bdinfo.o be either built-in or modular depending on
whether CONFIG_FIXED_PHY reflects that too.

Fixes: 98cd1552ea27 ("net: dsa: Mock-up driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index d040aeb45172..15c2a831edf1 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_NET_DSA_BCM_SF2)	+= bcm-sf2.o
 bcm-sf2-objs			:= bcm_sf2.o bcm_sf2_cfp.o
-obj-$(CONFIG_NET_DSA_LOOP)	+= dsa_loop.o dsa_loop_bdinfo.o
+obj-$(CONFIG_NET_DSA_LOOP)	+= dsa_loop.o
+ifdef CONFIG_NET_DSA_LOOP
+obj-$(CONFIG_FIXED_PHY)		+= dsa_loop_bdinfo.o
+endif
 obj-$(CONFIG_NET_DSA_MT7530)	+= mt7530.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_QCA8K)	+= qca8k.o
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH] kbuild: disable clang's default use of -fmerge-all-constants
From: Linus Torvalds @ 2018-03-21  0:36 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Alexei Starovoitov, psodagud, Network Development, Chenbo Feng,
	Richard Smith, Chandler Carruth, Linux Kernel Mailing List
In-Reply-To: <20180321001824.8395-1-daniel@iogearbox.net>

On Tue, Mar 20, 2018 at 5:18 PM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> Prasad reported that he has seen crashes in BPF subsystem with netd
> on Android with arm64 in the form of (note, the taint is unrelated):

Ack. This looks good to me. And thanks for noticing the behavior wrt
the correct gcc merging.

>  [ Hi Linus, feel free to take this fix directly if you want.
>    Alternatively, we could route it via bpf tree. Thanks a
>    lot for your feedback! ]

So since it's your patch and the only known issue comes from the bpf
side, I think it should just go through the bpf tree, and I expect it
to get to me through all the usual channels.

Thanks,

                Linus

^ permalink raw reply

* Re: [PATCH] kbuild: disable clang's default use of -fmerge-all-constants
From: Daniel Borkmann @ 2018-03-21  0:38 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Alexei Starovoitov, psodagud, Network Development, Chenbo Feng,
	Richard Smith, Chandler Carruth, Linux Kernel Mailing List
In-Reply-To: <CA+55aFw94wSeOvs1JO2iPHXgpwWQ-EF6v0XYwjFrQN1wr5tY-A@mail.gmail.com>

On 03/21/2018 01:36 AM, Linus Torvalds wrote:
> On Tue, Mar 20, 2018 at 5:18 PM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>> Prasad reported that he has seen crashes in BPF subsystem with netd
>> on Android with arm64 in the form of (note, the taint is unrelated):
> 
> Ack. This looks good to me. And thanks for noticing the behavior wrt
> the correct gcc merging.
> 
>>  [ Hi Linus, feel free to take this fix directly if you want.
>>    Alternatively, we could route it via bpf tree. Thanks a
>>    lot for your feedback! ]
> 
> So since it's your patch and the only known issue comes from the bpf
> side, I think it should just go through the bpf tree, and I expect it
> to get to me through all the usual channels.

Yeah, that's fine, thanks for letting us know!

Best,
Daniel

^ permalink raw reply

* Re: [RFC PATCH 0/3] kernel: add support for 256-bit IO access
From: Andy Lutomirski @ 2018-03-21  0:39 UTC (permalink / raw)
  To: David Laight
  Cc: Andy Lutomirski, Ingo Molnar, Thomas Gleixner, Rahul Lakkireddy,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org, mingo@redhat.com, hpa@zytor.com,
	davem@davemloft.net, akpm@linux-foundation.org,
	torvalds@linux-foundation.org, ganeshgr@chelsio.com,
	nirranjan@chelsio.com, indranil@chelsio.com, Peter Zijlstra,
	Fenghua Yu, Eric 
In-Reply-To: <c1eac43cd8e143d09477a34ed6de6302@AcuMS.aculab.com>

On Tue, Mar 20, 2018 at 3:10 PM, David Laight <David.Laight@aculab.com> wrote:
> From: Andy Lutomirski
>> Sent: 20 March 2018 14:57
> ...
>> I'd rather see us finally finish the work that Rik started to rework
>> this differently.  I'd like kernel_fpu_begin() to look like:
>>
>> if (test_thread_flag(TIF_NEED_FPU_RESTORE)) {
>>   return; // we're already okay.  maybe we need to check
>> in_interrupt() or something, though?
>> } else {
>>   XSAVES/XSAVEOPT/XSAVE;
>>   set_thread_flag(TIF_NEED_FPU_RESTORE):
>> }
>>
>> and kernel_fpu_end() does nothing at all.
>
> I guess it might need to set (clear?) the CFLAGS bit for a process
> that isn't using the fpu at all - which seems a sensible feature.

What do you mean "CFLAGS"?

But we no longer have any concept of "isn't using the fpu at all" --
we got rid of that.

>
>> We take the full performance hit for a *single* kernel_fpu_begin() on
>> an otherwise short syscall or interrupt, but there's no additional
>> cost for more of them or for long-enough-running things that we
>> schedule in the middle.
>
> It might be worth adding a parameter to kernel_fpu_begin() to indicate
> which registers are needed, and a return value to say what has been
> granted.
> Then a driver could request AVX2 (for example) and use a fallback path
> if the register set isn't available (for any reason).
> A call from an ISR could always fail.

Last time I benchmarked it, XSAVEC on none of the state wasn't a whole
lot faster than XSAVEC for all of it.

>
>> As I remember, the main hangup was that this interacts a bit oddly
>> with PKRU, but that's manageable.
>
> WTF PKRU ??

PKRU is uniquely demented.  All the rest of the XSAVEC state only
affects code that explicitly references that state.  PKRU affects
every single access to user pages, so we need PKRU to match the
current task at all times in the kernel.  This means that, if we start
deferring XRSTORS until prepare_exit_to_usermode(), we need to start
restoring PKRU using WRPKRU in __switch_to().  Of course, *that*
interacts a bit oddly with XINUSE, but maybe we don't care.

Phooey on you, Intel, for putting PKRU into xstate and not giving a
fast instruction to control XINUSE.

^ permalink raw reply

* Re: [PATCH net-next v3 2/2] net: bpf: add a test for skb_segment in test_bpf module
From: Eric Dumazet @ 2018-03-21  0:44 UTC (permalink / raw)
  To: Yonghong Song, edumazet, ast, daniel, diptanu, netdev,
	alexander.duyck
  Cc: kernel-team
In-Reply-To: <20180320232156.3455738-3-yhs@fb.com>



On 03/20/2018 04:21 PM, Yonghong Song wrote:
> Without the previous commit,
> "modprobe test_bpf" will have the following errors:
> ...
> [   98.149165] ------------[ cut here ]------------
> [   98.159362] kernel BUG at net/core/skbuff.c:3667!
> [   98.169756] invalid opcode: 0000 [#1] SMP PTI
> [   98.179370] Modules linked in:
> [   98.179371]  test_bpf(+)
> ...
> which triggers the bug the previous commit intends to fix.
> 
> The skbs are constructed to mimic what mlx5 may generate.
> The packet size/header may not mimic real cases in production. But
> the processing flow is similar.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  lib/test_bpf.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 70 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/test_bpf.c b/lib/test_bpf.c
> index 2efb213..045d7d3 100644
> --- a/lib/test_bpf.c
> +++ b/lib/test_bpf.c
> @@ -6574,6 +6574,72 @@ static bool exclude_test(int test_id)
>  	return test_id < test_range[0] || test_id > test_range[1];
>  }
>  
> +static struct sk_buff *build_test_skb(void *page)
> +{
> +	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
> +	struct sk_buff *skb[2];
> +	int i, data_size = 8;
> +
> +	for (i = 0; i < 2; i++) {
> +		/* this will set skb[i]->head_frag */
> +		skb[i] = build_skb(page, headroom);
> +		if (!skb[i])
> +			return NULL;

You are using the same virtual address (page) for both skb ?

So we have 2 skbs having skb->head pointing to the same location ?

This is illegal.

Please use instead : skb = dev_alloc_skb(headroom + data_size)

> +
> +		skb_reserve(skb[i], headroom);
> +		skb_put(skb[i], data_size);
> +		skb[i]->protocol = htons(ETH_P_IP);
> +		skb_reset_network_header(skb[i]);
> +		skb_set_mac_header(skb[i], -ETH_HLEN);
> +
> +		skb_add_rx_frag(skb[i], 

skb_shinfo(skb[i])->nr_frags,

0 ?

> +				page, 0, 64, 64);

get_page(page) ?

> +		// skb: skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
> +	}
> +
> +	/* setup shinfo */
> +	skb_shinfo(skb[0])->gso_size = 1448;
> +	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
> +	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
> +	skb_shinfo(skb[0])->gso_segs = 0;
> +	skb_shinfo(skb[0])->frag_list = skb[1];
> +
> +	/* adjust skb[0]'s len */
> +	skb[0]->len += skb[1]->len;
> +	skb[0]->data_len += skb[1]->data_len;
> +	skb[0]->truesize += skb[1]->truesize;
> +
> +	return skb[0];
> +}
> +
> +static __init int test_skb_segment(void)
> +{
> +	netdev_features_t features;
> +	struct sk_buff *skb;
> +	void *page;
> +	int ret = -1;
> +
> +	page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!page) {
> +		pr_info("%s: failed to get_free_page!", __func__);
> +		return ret;
> +	}
> +
> +	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
> +	features |= NETIF_F_RXCSUM;
> +	skb = build_test_skb(page);
> +	if (!skb) {
> +		pr_info("%s: failed to build_test_skb", __func__);
> +	} else if (skb_segment(skb, features)) {
> +		ret = 0;
> +		pr_info("%s: success in skb_segment!", __func__);
> +	} else {
> +		pr_info("%s: failed in skb_segment!", __func__);
> +	}
> +	free_page((unsigned long)page);


Where are the skbs freed ?


> +	return ret;
> +}
> +
>  static __init int test_bpf(void)
>  {
>  	int i, err_cnt = 0, pass_cnt = 0;
> @@ -6632,8 +6698,11 @@ static int __init test_bpf_init(void)
>  		return ret;
>  
>  	ret = test_bpf();
> -
>  	destroy_bpf_tests();
> +	if (ret)
> +		return ret;
> +
> +	ret = test_skb_segment();
>  	return ret;
>  }
>  
> 

^ permalink raw reply

* Re: [net-next] intel: add SPDX identifiers to all the Intel drivers
From: Stephen Hemminger @ 2018-03-21  0:47 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: davem, netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180320171320.10826-1-jeffrey.t.kirsher@intel.com>

On Tue, 20 Mar 2018 10:13:20 -0700
Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:

> diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
> index 29486478836e..a750a5fe373a 100644
> --- a/drivers/net/ethernet/intel/e100.c
> +++ b/drivers/net/ethernet/intel/e100.c
> @@ -1,3 +1,4 @@
> +// SPDX-License-Identifier: GPL-2.0-only
>  /*******************************************************************************
>  
>    Intel PRO/100 Linux driver

SPDX makes things easier but I thought the advantage of SPDX
was getting rid of the boilerplate! Can you convince the lawyers
to drop the GPL boilerplate comment please.

It has already been done on other projects.

^ permalink raw reply

* Re: [PATCH] Bluetooth: Remove VLA usage in aes_cmac
From: Gustavo A. R. Silva @ 2018-03-21  0:57 UTC (permalink / raw)
  To: Gustavo A. R. Silva, Marcel Holtmann, Johan Hedberg,
	David S. Miller
  Cc: linux-bluetooth, netdev, linux-kernel
In-Reply-To: <20180320233444.GA14446@embeddedor.com>

Hi,

I've just discovered an issue in this patch. Please, drop it. I'll send 
v2 shortly.

Thanks
--
Gustavo

On 03/20/2018 06:34 PM, Gustavo A. R. Silva wrote:
> In preparation to enabling -Wvla, remove VLA and replace it
> with dynamic memory allocation instead.
> 
> The use of stack Variable Length Arrays needs to be avoided, as they
> can be a vector for stack exhaustion, which can be both a runtime bug
> or a security flaw. Also, in general, as code evolves it is easy to
> lose track of how big a VLA can get. Thus, we can end up having runtime
> failures that are hard to debug.
> 
> Also, fixed as part of the directive to remove all VLAs from
> the kernel: https://lkml.org/lkml/2018/3/7/621
> 
> Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
> ---
>   net/bluetooth/smp.c | 16 +++++++++++-----
>   1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
> index a2ddae2..23c694d 100644
> --- a/net/bluetooth/smp.c
> +++ b/net/bluetooth/smp.c
> @@ -173,7 +173,7 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
>   		    size_t len, u8 mac[16])
>   {
>   	uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
> -	SHASH_DESC_ON_STACK(desc, tfm);
> +	struct shash_desc *shash;
>   	int err;
>   
>   	if (len > CMAC_MSG_MAX)
> @@ -184,8 +184,13 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
>   		return -EINVAL;
>   	}
>   
> -	desc->tfm = tfm;
> -	desc->flags = 0;
> +	shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
> +			GFP_KERNEL);
> +	if (!shash)
> +		return -ENOMEM;
> +
> +	shash->tfm = tfm;
> +	shash->flags = 0;
>   
>   	/* Swap key and message from LSB to MSB */
>   	swap_buf(k, tmp, 16);
> @@ -200,8 +205,9 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
>   		return err;
>   	}
>   
> -	err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
> -	shash_desc_zero(desc);
> +	err = crypto_shash_digest(shash, msg_msb, len, mac_msb);
> +	shash_desc_zero(shash);
> +	kfree(shash);
>   	if (err) {
>   		BT_ERR("Hash computation error %d", err);
>   		return err;
> 

^ permalink raw reply

* Re: [PATCH] kbuild: disable clang's default use of -fmerge-all-constants
From: Alexei Starovoitov @ 2018-03-21  0:58 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: Linus Torvalds, Alexei Starovoitov, psodagud, Network Development,
	Chenbo Feng, Richard Smith, Chandler Carruth,
	Linux Kernel Mailing List
In-Reply-To: <bd7b0d23-519e-ca8b-a768-1440a4bc90d3@iogearbox.net>

On Wed, Mar 21, 2018 at 01:38:45AM +0100, Daniel Borkmann wrote:
> On 03/21/2018 01:36 AM, Linus Torvalds wrote:
> > On Tue, Mar 20, 2018 at 5:18 PM, Daniel Borkmann <daniel@iogearbox.net> wrote:
> >> Prasad reported that he has seen crashes in BPF subsystem with netd
> >> on Android with arm64 in the form of (note, the taint is unrelated):
> > 
> > Ack. This looks good to me. And thanks for noticing the behavior wrt
> > the correct gcc merging.
> > 
> >>  [ Hi Linus, feel free to take this fix directly if you want.
> >>    Alternatively, we could route it via bpf tree. Thanks a
> >>    lot for your feedback! ]
> > 
> > So since it's your patch and the only known issue comes from the bpf
> > side, I think it should just go through the bpf tree, and I expect it
> > to get to me through all the usual channels.
> 
> Yeah, that's fine, thanks for letting us know!

Applied to bpf tree, thanks everyone.

^ permalink raw reply

* Re: [net-next] intel: add SPDX identifiers to all the Intel drivers
From: Jeff Kirsher @ 2018-03-21  0:59 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180320174718.130cc253@xeon-e3>

[-- Attachment #1: Type: text/plain, Size: 1001 bytes --]

On Tue, 2018-03-20 at 17:47 -0700, Stephen Hemminger wrote:
> On Tue, 20 Mar 2018 10:13:20 -0700
> Jeff Kirsher <jeffrey.t.kirsher@intel.com> wrote:
> 
> > diff --git a/drivers/net/ethernet/intel/e100.c
> > b/drivers/net/ethernet/intel/e100.c
> > index 29486478836e..a750a5fe373a 100644
> > --- a/drivers/net/ethernet/intel/e100.c
> > +++ b/drivers/net/ethernet/intel/e100.c
> > @@ -1,3 +1,4 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> >  
> > /**********************************************************************
> > *********
> >   
> >     Intel PRO/100 Linux driver
> 
> SPDX makes things easier but I thought the advantage of SPDX
> was getting rid of the boilerplate! Can you convince the lawyers
> to drop the GPL boilerplate comment please.
> 
> It has already been done on other projects.

Its in the works.  Just waiting to hear back from the lawyers that it is ok
to remove the boiler plate headers, before I generate a patch to clean up
the Intel drivers.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* [PATCH v2] Bluetooth: Remove VLA usage in aes_cmac
From: Gustavo A. R. Silva @ 2018-03-21  1:05 UTC (permalink / raw)
  To: Marcel Holtmann, Johan Hedberg, David S. Miller
  Cc: linux-bluetooth, netdev, linux-kernel, Gustavo A. R. Silva

In preparation to enabling -Wvla, remove VLA and replace it
with dynamic memory allocation instead.

The use of stack Variable Length Arrays needs to be avoided, as they
can be a vector for stack exhaustion, which can be both a runtime bug
or a security flaw. Also, in general, as code evolves it is easy to
lose track of how big a VLA can get. Thus, we can end up having runtime
failures that are hard to debug.

Also, fixed as part of the directive to remove all VLAs from
the kernel: https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
---
Changes in v2:
 - Fix memory leak in previous patch.

 net/bluetooth/smp.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a2ddae2..0fa7035 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -173,7 +173,7 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		    size_t len, u8 mac[16])
 {
 	uint8_t tmp[16], mac_msb[16], msg_msb[CMAC_MSG_MAX];
-	SHASH_DESC_ON_STACK(desc, tfm);
+	struct shash_desc *shash;
 	int err;
 
 	if (len > CMAC_MSG_MAX)
@@ -184,8 +184,13 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 		return -EINVAL;
 	}
 
-	desc->tfm = tfm;
-	desc->flags = 0;
+	shash = kzalloc(sizeof(*shash) + crypto_shash_descsize(tfm),
+			GFP_KERNEL);
+	if (!shash)
+		return -ENOMEM;
+
+	shash->tfm = tfm;
+	shash->flags = 0;
 
 	/* Swap key and message from LSB to MSB */
 	swap_buf(k, tmp, 16);
@@ -197,11 +202,13 @@ static int aes_cmac(struct crypto_shash *tfm, const u8 k[16], const u8 *m,
 	err = crypto_shash_setkey(tfm, tmp, 16);
 	if (err) {
 		BT_ERR("cipher setkey failed: %d", err);
+		kfree(shash);
 		return err;
 	}
 
-	err = crypto_shash_digest(desc, msg_msb, len, mac_msb);
-	shash_desc_zero(desc);
+	err = crypto_shash_digest(shash, msg_msb, len, mac_msb);
+	shash_desc_zero(shash);
+	kfree(shash);
 	if (err) {
 		BT_ERR("Hash computation error %d", err);
 		return err;
-- 
2.7.4

^ permalink raw reply related

* Re: [net-next] intel: add SPDX identifiers to all the Intel drivers
From: Jeff Kirsher @ 2018-03-21  1:09 UTC (permalink / raw)
  To: Philippe Ombredanne, Allan, Bruce W
  Cc: Joe Perches, davem@davemloft.net, netdev@vger.kernel.org,
	nhorman@redhat.com, sassmann@redhat.com, jogreene@redhat.com,
	Kate Stewart, Greg Kroah-Hartman, Thomas Gleixner
In-Reply-To: <CAOFm3uGQU1cG0kmi6krA9ui_nHq8ptU0w0nVNUOow7UGErgnag@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 2868 bytes --]

On Tue, 2018-03-20 at 16:46 -0700, Philippe Ombredanne wrote:
> Allan,
> 
> On Tue, Mar 20, 2018 at 1:48 PM, Allan, Bruce W <bruce.w.allan@intel.com>
> wrote:
> > > -----Original Message-----
> > > From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.o
> > > rg]
> > > On Behalf Of Jeff Kirsher
> > > Sent: Tuesday, March 20, 2018 10:52 AM
> > > To: Joe Perches <joe@perches.com>; davem@davemloft.net; Philippe
> > > Ombredanne <pombredanne@nexb.com>
> > > Cc: netdev@vger.kernel.org; nhorman@redhat.com; sassmann@redhat.com;
> > > jogreene@redhat.com
> > > Subject: Re: [net-next] intel: add SPDX identifiers to all the Intel
> > > drivers
> > > 
> > > On Tue, 2018-03-20 at 10:41 -0700, Joe Perches wrote:
> > > > On Tue, 2018-03-20 at 10:13 -0700, Jeff Kirsher wrote:
> > > > > Add the SPDX identifiers to all the Intel wired LAN driver files,
> > > > > as
> > > > > outlined in Documentation/process/license-rules.rst.
> > > > 
> > > > So far the Documentation does not show using the -only variant.
> > > > 
> > > > For a discussion, please see:
> > > > https://lkml.org/lkml/2018/2/8/311
> > 
> > But the Linux Foundation, the authority maintaining the valid SPDX
> > identifiers, indicates at https://spdx.org/licenses/ that "GPL-2.0" is
> > deprecated while "GPL-2.0-only" (and others) is appropriate.
> > Was there any mention in the thread or other conversations if/when the
> > kernel's documentation (and all existing uses of "GPL-2.0" in the
> > kernel) will be updated to "GPL-2.0-only"?
> 
> The kernel (as documented by Thomas [1]) is using for now the V2.6 of
> the SPDX licenses list. [2] IMHO the reference should be the kernel
> doc and nothing else to ensure consistency and avoid confusion (which
> obviously was not avoided entirely here ;) ).
> 
> What happened is in late December a new version 3 was published by
> SPDX and the v2.6 is no longer online. I will bring this up to the
> SPDX group because we should be able to reference the version 2.6
> online (it is still in git though [2]).
> 
> When the kernel maintainers decide to switch to V3.0 of the SPDX list,
> the doc will be updated and then Joe's script could be applied at once
> to update the past.

I am fine with changing my patch back to v2.6 SPDX ids, as long as Joe's
script in the future won't touch the Intel wired LAN drivers, since we need
to retain copyright on several files through out our drivers.

> 
> What matters most here is consistency: having some v2.6 and some v3.0
> SPDX ids at once is not a happy thing IMHO.

I understand that having a mix of v2.6 and v3.0 SPDX at once is not a happy
thing.

> [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/D
> ocumentation/process/license-rules.rst
> [2] https://github.com/spdx/license-list-data/tree/v2.6

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: linux-next on x60: network manager often complains "network is disabled" after resume
From: Woody Suwalski @ 2018-03-21  1:11 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Rafael J. Wysocki, kernel list, Linux-pm mailing list,
	Netdev list
In-Reply-To: <6e43123a-5227-96c4-a1f5-4416bdb5b0db@gmail.com>

Woody Suwalski wrote:
> Pavel Machek wrote:
>> On Mon 2018-03-19 05:17:45, Woody Suwalski wrote:
>>> Pavel Machek wrote:
>>>> Hi!
>>>>
>>>> With recent linux-next, after resume networkmanager often claims that
>>>> "network is disabled". Sometimes suspend/resume clears that.
>>>>
>>>> Any ideas? Does it work for you?
>>>>                                     Pavel
>>> Tried the 4.16-rc6 with nm 1.4.4 - I do not see the issue.
>> Thanks for testing... but yes, 4.16 should be ok. If not fixed,
>> problem will appear in 4.17-rc1.
>>
> Works here OK. Tried ~10 suspends, all restarted OK.
> kernel next-20180320
> nmcli shows that Wifi always connects OK
>
> Woody
>
Contrary, it just happened to me on a 64-bit build 4.16-rc5 on T440.
I think that Dan's suspicion is correct - it is a snafu in the PM: 
trying to hibernate results in a message:
Failed to hibernate system via logind: There's already a shutdown or 
sleep operation in progress.

And ps shows "Ds /lib/systemd/systemd-sleep suspend"...

Woody

^ permalink raw reply

* Re: [net-next] intel: add SPDX identifiers to all the Intel drivers
From: Joe Perches @ 2018-03-21  1:14 UTC (permalink / raw)
  To: Jeff Kirsher, Philippe Ombredanne, Allan, Bruce W
  Cc: davem@davemloft.net, netdev@vger.kernel.org, nhorman@redhat.com,
	sassmann@redhat.com, jogreene@redhat.com, Kate Stewart,
	Greg Kroah-Hartman, Thomas Gleixner
In-Reply-To: <1521594560.15055.10.camel@intel.com>

On Tue, 2018-03-20 at 18:09 -0700, Jeff Kirsher wrote:
> On Tue, 2018-03-20 at 16:46 -0700, Philippe Ombredanne wrote:
> > When the kernel maintainers decide to switch to V3.0 of the SPDX list,
> > the doc will be updated and then Joe's script could be applied at once
> > to update the past.
> 
> I am fine with changing my patch back to v2.6 SPDX ids, as long as Joe's
> script in the future won't touch the Intel wired LAN drivers, since we need
> to retain copyright on several files through out our drivers.

Why would exempting intel wired drivers be
necessary or useful?

I think it would be better if the kernel
source files used a consistent tag format.

The script I wrote is basically a sed that
simply updates the SPDX license text.

That is not particular different that Thomas's
original script that added the SPDX tags.

I have no intention of claiming anything like
a copyright on the output of a trivial script.

^ permalink raw reply

* pull-request: bpf-next 2018-03-21
From: Daniel Borkmann @ 2018-03-21  1:29 UTC (permalink / raw)
  To: davem; +Cc: daniel, ast, netdev

Hi David,

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add a BPF hook for sendmsg and sendfile by reusing the ULP infrastructure
   and sockmap. Three helpers are added along with this, bpf_msg_apply_bytes(),
   bpf_msg_cork_bytes(), and bpf_msg_pull_data(). The first is used to tell
   for how many bytes the verdict should be applied to, the second to tell
   that x bytes need to be queued first to retrigger the BPF program for a
   verdict, and the third helper is mainly for the sendfile case to pull in
   data for making it private for reading and/or writing, from John.

2) Improve address to symbol resolution of user stack traces in BPF stackmap.
   Currently, the latter stores the address for each entry in the call trace,
   however to map these addresses to user space files, it is necessary to
   maintain the mapping from these virtual addresses to symbols in the binary
   which is not practical for system-wide profiling. Instead, this option for
   the stackmap rather stores the ELF build id and offset for the call trace
   entries, from Song.

3) Add support that allows BPF programs attached to perf events to read the
   address values recorded with the perf events. They are requested through
   PERF_SAMPLE_ADDR via perf_event_open(). Main motivation behind it is to
   support building memory or lock access profiling and tracing tools with
   the help of BPF, from Teng.

4) Several improvements to the tools/bpf/ Makefiles. The 'make bpf' in the
   tools directory does not provide the standard quiet output except for
   bpftool and it also does not respect specifying a build output directory.
   'make bpf_install' command neither respects specified destination nor
   prefix, all from Jiri. In addition, Jakub fixes several other minor issues
   in the Makefiles on top of that, e.g. fixing dependency paths, phony
   targets and more.

5) Various doc updates e.g. add a comment for BPF fs about reserved names
   to make the dentry lookup from there a bit more obvious, and a comment
   to the bpf_devel_QA file in order to explain the diff between native
   and bpf target clang usage with regards to pointer size, from Quentin
   and Daniel.

Please consider pulling these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git

Thanks a lot!

----------------------------------------------------------------

The following changes since commit a366e300ae9fc466d333e6d8f2bc5d58ed248041:

  ip6mr: remove synchronize_rcu() in favor of SOCK_RCU_FREE (2018-03-07 18:13:41 -0500)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git 

for you to fetch changes up to 78262f4575c29f185947fe58952cd1beabc74f82:

  bpf, doc: add description wrt native/bpf clang target and pointer size (2018-03-20 15:47:45 -0700)

----------------------------------------------------------------
Daniel Borkmann (6):
      Merge branch 'bpf-perf-sample-addr'
      Merge branch 'bpf-tools-makefile-improvements'
      Merge branch 'bpf-stackmap-build-id'
      Merge branch 'bpf-tools-build-improvements'
      Merge branch 'bpf-sockmap-ulp'
      bpf, doc: add description wrt native/bpf clang target and pointer size

Jakub Kicinski (4):
      tools: bpftool: fix dependency file path
      tools: bpftool: fix potential format truncation
      tools: bpf: cleanup PHONY target
      tools: bpf: remove feature detection output

Jiri Benc (7):
      tools: bpftool: silence 'missing initializer' warnings
      tools: bpf: respect output directory during build
      tools: bpf: consistent make bpf_install
      tools: bpf: make install should build first
      tools: bpf: call descend in Makefile
      tools: bpf: respect quiet/verbose build
      tools: bpf: silence make by not deleting intermediate file

John Fastabend (18):
      sock: make static tls function alloc_sg generic sock helper
      sockmap: convert refcnt to an atomic refcnt
      net: do_tcp_sendpages flag to avoid SKBTX_SHARED_FRAG
      net: generalize sk_alloc_sg to work with scatterlist rings
      bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data
      bpf: sockmap, add bpf_msg_apply_bytes() helper
      bpf: sockmap, add msg_cork_bytes() helper
      bpf: sk_msg program helper bpf_sk_msg_pull_data
      bpf: add map tests for BPF_PROG_TYPE_SK_MSG
      bpf: add verifier tests for BPF_PROG_TYPE_SK_MSG
      bpf: sockmap sample, add option to attach SK_MSG program
      bpf: sockmap sample, add sendfile test
      bpf: sockmap sample, add data verification option
      bpf: sockmap, add sample option to test apply_bytes helper
      bpf: sockmap sample support for bpf_msg_cork_bytes()
      bpf: sockmap add SK_DROP tests
      bpf: sockmap sample test for bpf_msg_pull_data
      bpf: sockmap test script

Quentin Monnet (1):
      bpf: comment why dots in filenames under BPF virtual FS are not allowed

Song Liu (2):
      bpf: extend stackmap to save binary_build_id+offset instead of address
      bpf: add selftest for stackmap with BPF_F_STACK_BUILD_ID

Teng Qin (2):
      bpf: add support to read sample address in bpf program
      samples/bpf: add example to test reading address

 Documentation/bpf/bpf_devel_QA.txt                 |  12 +
 include/linux/bpf.h                                |   1 +
 include/linux/bpf_types.h                          |   1 +
 include/linux/filter.h                             |  17 +
 include/linux/socket.h                             |   1 +
 include/net/sock.h                                 |   4 +
 include/uapi/linux/bpf.h                           |  47 +-
 include/uapi/linux/bpf_perf_event.h                |   1 +
 kernel/bpf/inode.c                                 |   3 +
 kernel/bpf/sockmap.c                               | 733 ++++++++++++++++++++-
 kernel/bpf/stackmap.c                              | 257 +++++++-
 kernel/bpf/syscall.c                               |  14 +-
 kernel/bpf/verifier.c                              |   5 +-
 kernel/trace/bpf_trace.c                           |  20 +-
 net/core/filter.c                                  | 273 +++++++-
 net/core/sock.c                                    |  61 ++
 net/ipv4/tcp.c                                     |   4 +-
 net/tls/tls_sw.c                                   |  69 +-
 samples/bpf/bpf_load.c                             |   8 +-
 samples/bpf/trace_event_kern.c                     |   4 +
 samples/bpf/trace_event_user.c                     |  15 +
 samples/sockmap/sockmap_kern.c                     | 197 ++++++
 samples/sockmap/sockmap_test.sh                    | 450 +++++++++++++
 samples/sockmap/sockmap_user.c                     | 301 ++++++++-
 tools/bpf/Makefile                                 |  78 ++-
 tools/bpf/bpftool/Makefile                         |   6 +-
 tools/bpf/bpftool/xlated_dumper.h                  |   2 +-
 tools/include/uapi/linux/bpf.h                     |  47 +-
 tools/lib/bpf/libbpf.c                             |   1 +
 tools/testing/selftests/bpf/Makefile               |  13 +-
 tools/testing/selftests/bpf/bpf_helpers.h          |  10 +
 tools/testing/selftests/bpf/sockmap_parse_prog.c   |  15 +-
 tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c |  33 +
 tools/testing/selftests/bpf/sockmap_verdict_prog.c |   7 +
 tools/testing/selftests/bpf/test_maps.c            |  55 +-
 tools/testing/selftests/bpf/test_progs.c           | 164 ++++-
 .../selftests/bpf/test_stacktrace_build_id.c       |  60 ++
 tools/testing/selftests/bpf/test_verifier.c        |  54 ++
 tools/testing/selftests/bpf/urandom_read.c         |  22 +
 39 files changed, 2879 insertions(+), 186 deletions(-)
 create mode 100755 samples/sockmap/sockmap_test.sh
 create mode 100644 tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
 create mode 100644 tools/testing/selftests/bpf/test_stacktrace_build_id.c
 create mode 100644 tools/testing/selftests/bpf/urandom_read.c

^ permalink raw reply

* pull-request: bpf 2018-03-21
From: Daniel Borkmann @ 2018-03-21  1:50 UTC (permalink / raw)
  To: davem; +Cc: daniel, ast, netdev

Hi David,

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Follow-up fix to the fault injection framework to prevent jump
   optimization on the kprobe by installing a dummy post-handler,
   from Masami.

2) Drop bpf_perf_prog_read_value helper from tracepoint type programs
   which was mistakenly added there and would otherwise crash due to
   wrong input context, from Yonghong.

3) Fix a crash in BPF fs when compiled with clang. Code appears to
   be fine just that clang tries to overly aggressive optimize in
   non C conform ways, therefore fix the kernel's Makefile to
   generally prevent such issues, from Daniel.

4) Skip unnecessary capability checks in bpf syscall, which is otherwise
   triggering unnecessary security hooks on capability checking and
   causing false alarms on unprivileged processes trying to access
   CAP_SYS_ADMIN restricted infra, from Chenbo.

5) Fix the test_bpf.ko module when CONFIG_BPF_JIT_ALWAYS_ON is set
   with regards to a test case that is really just supposed to fail
   on x8_64 JIT but not others, from Thadeu.

Please consider pulling these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git

Thanks a lot!

----------------------------------------------------------------

The following changes since commit 9e5fb7207024e53700bdac23f53d1e44d530a7f6:

  Merge branch 'bnxt_en-Bug-fixes' (2018-03-12 10:58:28 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git 

for you to fetch changes up to 87e0d4f0f37fb0c8c4aeeac46fff5e957738df79:

  kbuild: disable clang's default use of -fmerge-all-constants (2018-03-20 17:43:15 -0700)

----------------------------------------------------------------
Chenbo Feng (1):
      bpf: skip unnecessary capability check

Daniel Borkmann (1):
      kbuild: disable clang's default use of -fmerge-all-constants

Masami Hiramatsu (1):
      error-injection: Fix to prohibit jump optimization

Thadeu Lima de Souza Cascardo (1):
      test_bpf: Fix testing with CONFIG_BPF_JIT_ALWAYS_ON=y on other arches

Yonghong Song (1):
      trace/bpf: remove helper bpf_perf_prog_read_value from tracepoint type programs

 Makefile                 |  9 +++++++
 kernel/bpf/syscall.c     |  2 +-
 kernel/fail_function.c   | 10 +++++++
 kernel/trace/bpf_trace.c | 68 ++++++++++++++++++++++++++++--------------------
 lib/test_bpf.c           |  2 +-
 5 files changed, 61 insertions(+), 30 deletions(-)

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox