Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: KMSAN: uninit-value in pppoe_rcv
From: Guillaume Nault @ 2018-09-13 16:19 UTC (permalink / raw)
  To: Alexander Potapenko
  Cc: Eric Dumazet, syzbot+f5f6080811c849739212, LKML, mostrows,
	Networking, syzkaller-bugs
In-Reply-To: <CAG_fn=WXPsQBWR1GDsprY3aL4oW0v6EGhNB5wrvPJCOMh_U7wQ@mail.gmail.com>

On Thu, Sep 13, 2018 at 04:12:38PM +0200, Alexander Potapenko wrote:
> On Thu, Sep 13, 2018 at 3:57 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> >
> >
> > On 09/12/2018 03:38 AM, Alexander Potapenko wrote:
> > > On Wed, Sep 12, 2018 at 12:24 PM syzbot
> > > <syzbot+f5f6080811c849739212@syzkaller.appspotmail.com> wrote:
> > >>
> > >> Hello,
> > >>
> > >> syzbot found the following crash on:
> > >>
> > >> HEAD commit:    d2d741e5d189 kmsan: add initialization for shmem pages
> > >> git tree:       https://github.com/google/kmsan.git/master
> > >> console output: https://syzkaller.appspot.com/x/log.txt?x=1465fc37800000
> > >> kernel config:  https://syzkaller.appspot.com/x/.config?x=48f9de3384bcd0f
> > >> dashboard link: https://syzkaller.appspot.com/bug?extid=f5f6080811c849739212
> > >> compiler:       clang version 7.0.0 (trunk 329391)
> > >> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=14d6e607800000
> > >> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10a15b5b800000
> > >>
> > >> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> > >> Reported-by: syzbot+f5f6080811c849739212@syzkaller.appspotmail.com
> > >>
> > >> IPVS: ftp: loaded support on port[0] = 21
> > >> ==================================================================
> > >> BUG: KMSAN: uninit-value in __get_item drivers/net/ppp/pppoe.c:172 [inline]
> > >> BUG: KMSAN: uninit-value in get_item drivers/net/ppp/pppoe.c:236 [inline]
> > >> BUG: KMSAN: uninit-value in pppoe_rcv+0xcef/0x10e0
> > >> drivers/net/ppp/pppoe.c:450
> > >> CPU: 0 PID: 4543 Comm: syz-executor355 Not tainted 4.16.0+ #87
> > >> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> > >> Google 01/01/2011
> > >> Call Trace:
> > >>   __dump_stack lib/dump_stack.c:17 [inline]
> > >>   dump_stack+0x185/0x1d0 lib/dump_stack.c:53
> > >>   kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
> > >>   __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
> > >>   __get_item drivers/net/ppp/pppoe.c:172 [inline]
> > >>   get_item drivers/net/ppp/pppoe.c:236 [inline]
> > >>   pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450
> > >>   __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562
> > >>   __netif_receive_skb net/core/dev.c:4627 [inline]
> > >>   netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
> > >>   netif_receive_skb+0x230/0x240 net/core/dev.c:4725
> > >>   tun_rx_batched drivers/net/tun.c:1555 [inline]
> > >>   tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962
> > >>   tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
> > >>   call_write_iter include/linux/fs.h:1782 [inline]
> > >>   new_sync_write fs/read_write.c:469 [inline]
> > >>   __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
> > >>   vfs_write+0x463/0x8d0 fs/read_write.c:544
> > >>   SYSC_write+0x172/0x360 fs/read_write.c:589
> > >>   SyS_write+0x55/0x80 fs/read_write.c:581
> > >>   do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
> > >>   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> > >> RIP: 0033:0x4447c9
> > >> RSP: 002b:00007fff64c8fc28 EFLAGS: 00000297 ORIG_RAX: 0000000000000001
> > >> RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004447c9
> > >> RDX: 000000000000fd87 RSI: 0000000020000600 RDI: 0000000000000004
> > >> RBP: 00000000006cf018 R08: 00007fff64c8fda8 R09: 00007fff00006bda
> > >> R10: 0000000000005fe7 R11: 0000000000000297 R12: 00000000004020d0
> > >> R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000
> > >>
> > >> Uninit was created at:
> > >>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
> > >>   kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
> > >>   kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
> > >>   kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
> > >>   slab_post_alloc_hook mm/slab.h:445 [inline]
> > >>   slab_alloc_node mm/slub.c:2737 [inline]
> > >>   __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
> > >>   __kmalloc_reserve net/core/skbuff.c:138 [inline]
> > >>   __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
> > >>   alloc_skb include/linux/skbuff.h:984 [inline]
> > >>   alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
> > >>   sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
> > >>   tun_alloc_skb drivers/net/tun.c:1532 [inline]
> > >>   tun_get_user+0x2242/0x7c60 drivers/net/tun.c:1829
> > >>   tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
> > >>   call_write_iter include/linux/fs.h:1782 [inline]
> > >>   new_sync_write fs/read_write.c:469 [inline]
> > >>   __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
> > >>   vfs_write+0x463/0x8d0 fs/read_write.c:544
> > >>   SYSC_write+0x172/0x360 fs/read_write.c:589
> > >>   SyS_write+0x55/0x80 fs/read_write.c:581
> > >>   do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
> > >>   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> > >> ==================================================================
> > > I did a little digging before sending the bug upstream.
> > > If I add memset(obj, 0xfe, size) to __kmalloc_reserve(), these 0xfe
> > > bytes are visible in __get_item() at the place where KMSAN reports an
> > > error.
> > >
> > > The problem is somehow related to tun_get_user() creating a fragmented
> > > sk_buff - when I change the call to tun_alloc_skb() so that it
> > > allocates a single buffer the bug goes away.
> > >
> >
> > I guess the following patch would fix the issue
> >
> > (I will submit it more formally)
> No, as far as I can see it doesn't.
> Saving sid before __skb_pull() is still a good idea, but in this
> particular case |ph| doesn't change.

Yes, we probably need to save sid.

But I think the problem found by syzbot is related to
eth_hdr(skb)->h_source. PPPoE expects that Ethernet header has already
been parsed and is accessible at skb_mac_header(skb).
But here skb_mac_header(skb) == skb->data, and we may pull only 6 bytes
(sizeof(truct pppoe_hdr)). Therefore eth_hdr(skb)->h_source points past
skb's head length.

Not sure if something needs to be changed in tun.c for properly setting
skb_mac_header. But PPPoE has no reason to consider packets from
non-Ethernet devices anyway.

^ permalink raw reply

* Re: [PATCH net-next V2 00/11] vhost_net TX batching
From: David Miller @ 2018-09-13 16:28 UTC (permalink / raw)
  To: jasowang; +Cc: netdev, linux-kernel, kvm, virtualization, mst
In-Reply-To: <20180912031709.14112-1-jasowang@redhat.com>

From: Jason Wang <jasowang@redhat.com>
Date: Wed, 12 Sep 2018 11:16:58 +0800

> This series tries to batch submitting packets to underlayer socket
> through msg_control during sendmsg(). This is done by:
 ...

Series applied, thanks Jason.

^ permalink raw reply

* Re: [PATCH net-next v3 1/2] net: stmmac: Rework coalesce timer and fix multi-queue races
From: Neil Armstrong @ 2018-09-13 11:24 UTC (permalink / raw)
  To: Jose Abreu, netdev
  Cc: Florian Fainelli, Jerome Brunet, Martin Blumenstingl,
	David S. Miller, Joao Pinto, Giuseppe Cavallaro, Alexandre Torgue
In-Reply-To: <b7da85496a6f536decb54bc9cc8f9a77d3fef978.1536762575.git.joabreu@synopsys.com>

Hi Jose,

On 13/09/2018 10:02, Jose Abreu wrote:
> This follows David Miller advice and tries to fix coalesce timer in
> multi-queue scenarios.
> 
> We are now using per-queue coalesce values and per-queue TX timer.
> 
> Coalesce timer default values was changed to 1ms and the coalesce frames
> to 25.
> 
> Tested in B2B setup between XGMAC2 and GMAC5.

I will run a test today,

Thanks,
Neil

> 
> Signed-off-by: Jose Abreu <joabreu@synopsys.com>
> Cc: Florian Fainelli <f.fainelli@gmail.com>
> Cc: Neil Armstrong <narmstrong@baylibre.com>
> Cc: Jerome Brunet <jbrunet@baylibre.com>
> Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
> Cc: David S. Miller <davem@davemloft.net>
> Cc: Joao Pinto <jpinto@synopsys.com>
> Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
> Cc: Alexandre Torgue <alexandre.torgue@st.com>
> ---
>  drivers/net/ethernet/stmicro/stmmac/common.h      |   4 +-
>  drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  14 +-
>  drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 233 ++++++++++++----------
>  include/linux/stmmac.h                            |   1 +
>  4 files changed, 146 insertions(+), 106 deletions(-)

[...]

^ permalink raw reply

* Re: KMSAN: uninit-value in pppoe_rcv
From: Eric Dumazet @ 2018-09-13 16:35 UTC (permalink / raw)
  To: Alexander Potapenko, Eric Dumazet
  Cc: syzbot+f5f6080811c849739212, LKML, mostrows, Networking,
	syzkaller-bugs
In-Reply-To: <CAG_fn=WXPsQBWR1GDsprY3aL4oW0v6EGhNB5wrvPJCOMh_U7wQ@mail.gmail.com>



On 09/13/2018 07:12 AM, Alexander Potapenko wrote:
> On Thu, Sep 13, 2018 at 3:57 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>
>>
>>
>> On 09/12/2018 03:38 AM, Alexander Potapenko wrote:
>>> On Wed, Sep 12, 2018 at 12:24 PM syzbot
>>> <syzbot+f5f6080811c849739212@syzkaller.appspotmail.com> wrote:
>>>>
>>>> Hello,
>>>>
>>>> syzbot found the following crash on:
>>>>
>>>> HEAD commit:    d2d741e5d189 kmsan: add initialization for shmem pages
>>>> git tree:       https://github.com/google/kmsan.git/master
>>>> console output: https://syzkaller.appspot.com/x/log.txt?x=1465fc37800000
>>>> kernel config:  https://syzkaller.appspot.com/x/.config?x=48f9de3384bcd0f
>>>> dashboard link: https://syzkaller.appspot.com/bug?extid=f5f6080811c849739212
>>>> compiler:       clang version 7.0.0 (trunk 329391)
>>>> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=14d6e607800000
>>>> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10a15b5b800000
>>>>
>>>> IMPORTANT: if you fix the bug, please add the following tag to the commit:
>>>> Reported-by: syzbot+f5f6080811c849739212@syzkaller.appspotmail.com
>>>>
>>>> IPVS: ftp: loaded support on port[0] = 21
>>>> ==================================================================
>>>> BUG: KMSAN: uninit-value in __get_item drivers/net/ppp/pppoe.c:172 [inline]
>>>> BUG: KMSAN: uninit-value in get_item drivers/net/ppp/pppoe.c:236 [inline]
>>>> BUG: KMSAN: uninit-value in pppoe_rcv+0xcef/0x10e0
>>>> drivers/net/ppp/pppoe.c:450
>>>> CPU: 0 PID: 4543 Comm: syz-executor355 Not tainted 4.16.0+ #87
>>>> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
>>>> Google 01/01/2011
>>>> Call Trace:
>>>>   __dump_stack lib/dump_stack.c:17 [inline]
>>>>   dump_stack+0x185/0x1d0 lib/dump_stack.c:53
>>>>   kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
>>>>   __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
>>>>   __get_item drivers/net/ppp/pppoe.c:172 [inline]
>>>>   get_item drivers/net/ppp/pppoe.c:236 [inline]
>>>>   pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450
>>>>   __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562
>>>>   __netif_receive_skb net/core/dev.c:4627 [inline]
>>>>   netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
>>>>   netif_receive_skb+0x230/0x240 net/core/dev.c:4725
>>>>   tun_rx_batched drivers/net/tun.c:1555 [inline]
>>>>   tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962
>>>>   tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
>>>>   call_write_iter include/linux/fs.h:1782 [inline]
>>>>   new_sync_write fs/read_write.c:469 [inline]
>>>>   __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
>>>>   vfs_write+0x463/0x8d0 fs/read_write.c:544
>>>>   SYSC_write+0x172/0x360 fs/read_write.c:589
>>>>   SyS_write+0x55/0x80 fs/read_write.c:581
>>>>   do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
>>>>   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
>>>> RIP: 0033:0x4447c9
>>>> RSP: 002b:00007fff64c8fc28 EFLAGS: 00000297 ORIG_RAX: 0000000000000001
>>>> RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004447c9
>>>> RDX: 000000000000fd87 RSI: 0000000020000600 RDI: 0000000000000004
>>>> RBP: 00000000006cf018 R08: 00007fff64c8fda8 R09: 00007fff00006bda
>>>> R10: 0000000000005fe7 R11: 0000000000000297 R12: 00000000004020d0
>>>> R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000
>>>>
>>>> Uninit was created at:
>>>>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
>>>>   kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
>>>>   kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
>>>>   kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
>>>>   slab_post_alloc_hook mm/slab.h:445 [inline]
>>>>   slab_alloc_node mm/slub.c:2737 [inline]
>>>>   __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
>>>>   __kmalloc_reserve net/core/skbuff.c:138 [inline]
>>>>   __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
>>>>   alloc_skb include/linux/skbuff.h:984 [inline]
>>>>   alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
>>>>   sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
>>>>   tun_alloc_skb drivers/net/tun.c:1532 [inline]
>>>>   tun_get_user+0x2242/0x7c60 drivers/net/tun.c:1829
>>>>   tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
>>>>   call_write_iter include/linux/fs.h:1782 [inline]
>>>>   new_sync_write fs/read_write.c:469 [inline]
>>>>   __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
>>>>   vfs_write+0x463/0x8d0 fs/read_write.c:544
>>>>   SYSC_write+0x172/0x360 fs/read_write.c:589
>>>>   SyS_write+0x55/0x80 fs/read_write.c:581
>>>>   do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
>>>>   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
>>>> ==================================================================
>>> I did a little digging before sending the bug upstream.
>>> If I add memset(obj, 0xfe, size) to __kmalloc_reserve(), these 0xfe
>>> bytes are visible in __get_item() at the place where KMSAN reports an
>>> error.
>>>
>>> The problem is somehow related to tun_get_user() creating a fragmented
>>> sk_buff - when I change the call to tun_alloc_skb() so that it
>>> allocates a single buffer the bug goes away.
>>>
>>
>> I guess the following patch would fix the issue
>>
>> (I will submit it more formally)
> No, as far as I can see it doesn't.
> Saving sid before __skb_pull() is still a good idea, but in this
> particular case |ph| doesn't change.
>> diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
>> index ce61231e96ea5fe27f512fbd0d80d4609997e508..333e967ed968ea3ff2dda25289f7f657263db2b9 100644
>> --- a/drivers/net/ppp/pppoe.c
>> +++ b/drivers/net/ppp/pppoe.c
>> @@ -423,6 +423,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
>>         struct pppoe_hdr *ph;
>>         struct pppox_sock *po;
>>         struct pppoe_net *pn;
>> +       __be16 sid;
>>         int len;
>>
>>         skb = skb_share_check(skb, GFP_ATOMIC);
>> @@ -434,6 +435,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
>>
>>         ph = pppoe_hdr(skb);
>>         len = ntohs(ph->length);

Then ph->length needs to be better validated.

>> +       sid = ph->sid;

I'll take a look, thanks.

^ permalink raw reply

* Re: Regression: kernel 4.14 an later very slow with many ipsec tunnels
From: Florian Westphal @ 2018-09-13 16:38 UTC (permalink / raw)
  To: Wolfgang Walter
  Cc: Florian Westphal, netdev, Steffen Klassert, David Miller,
	Linux Kernel Mailing List, Linus Torvalds
In-Reply-To: <3448099.9yk84El3Sa@stwm.de>

Wolfgang Walter <linux@stwm.de> wrote:
> I'll try that but this isn't that easy as the router image does not contain 
> perf. I also have to do that on our production router. I try to do that 
> tomorrow evening.

No need if its too difficult.

> What I can say is that it depends mainly on number of policy rules and SA.

Thats already a good hint, I guess we're hitting long hash chains in
xfrm_policy_lookup_bytype().

^ permalink raw reply

* Regression: kernel 4.14 an later very slow with many ipsec tunnels
From: Wolfgang Walter @ 2018-09-13 11:30 UTC (permalink / raw)
  To: netdev
  Cc: Florian Westphal, Steffen Klassert, David Miller,
	Linux Kernel Mailing List, Linus Torvalds

Hello,

thanks to the fix from Steffen Klassert I could now run 4.14.69 + his patch 
and 4.18.7 + his patch without oopsing immediately.

But I found that those kernels perform very bad. They perform so bad that they 
are unusable for our router with about 3000 ipsec tunnels (tunnel mode network 
<-> network).

With 4.9. (and all other kernels I used in the last 10 years with much less 
potent hardware) I never had an comparable performance issue with networking.

4.18.7 is better then 4.14.69 but still remains unusuable for us.

Even with very little traffic all 8 cores are working 100% in ksoftirqd. As 
soon as there is real traffic network gets rather unusuable.

Latency of packets goes from between 0.1ms to 1ms up to 100ms to 500ms (4.14) 
or between 15ms to 90ms (4.18).

Throughput also suffers a lot.

I have a simple test I run after every upgrade. This test basically copies 
with scp large files to 60 different remote locations (involving ipsec), 
limited to 1GBit/s combined, and in paralled I ping from different networks 
over this router to machines in other networks of this router (no ipsec-
tunnels involved).

With 4.9 and earlier copying needs about 2 minutes and the pings all remain 
under 2ms roundtrip. 

With 4.14 copying these files needs more than one our. The roundtrip time of 
the ping is > 1 second.

With 4.18 this is much better, copying needs around 6 minutes and ping 
roundtrip is between 30ms and 180ms. But even that is much worse then 4.9.

I think this dramatic loss in performance is due to the removal of the flow 
cache. I propose to revert that for 4.14. I also propose to revert it for the 
next longterm kernel if no other solution is found bringing back 4.9 
performance (at least about the same order of magnitude).

Maybe it should generally be reverted until a solution to the problem exists.

Regards,
-- 
Wolfgang Walter
Studentenwerk München
Anstalt des öffentlichen Rechts

^ permalink raw reply

* [PATCH net-next] net: dsa: b53: Do not fail when IRQ are not initialized
From: Florian Fainelli @ 2018-09-13 16:50 UTC (permalink / raw)
  To: netdev
  Cc: Florian Fainelli, Andrew Lunn, Vivien Didelot, David S. Miller,
	open list

When the Device Tree is not providing the per-port interrupts, do not fail
during b53_srab_irq_enable() but instead bail out gracefully. The SRAB driver
is used on the BCM5301X (Northstar) platforms which do not yet have the SRAB
interrupts wired up.

Fixes: 16994374a6fc ("net: dsa: b53: Make SRAB driver manage port interrupts")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/net/dsa/b53/b53_srab.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 321052732799..90f514252987 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -415,7 +415,13 @@ static int b53_srab_irq_enable(struct b53_device *dev, int port)
 {
 	struct b53_srab_priv *priv = dev->priv;
 	struct b53_srab_port_priv *p = &priv->port_intrs[port];
-	int ret;
+	int ret = 0;
+
+	/* Interrupt is optional and was not specified, do not make
+	 * this fatal
+	 */
+	if (p->irq == -ENXIO)
+		return ret;
 
 	ret = request_threaded_irq(p->irq, b53_srab_port_isr,
 				   b53_srab_port_thread, 0,
-- 
2.17.1

^ permalink raw reply related

* [PATCH] socket: fix struct ifreq size in compat ioctl
From: Johannes Berg @ 2018-09-13 11:49 UTC (permalink / raw)
  To: netdev; +Cc: Robert O'Callahan, Al Viro, Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

As reported by Reobert O'Callahan, since Viro's commit to kill
dev_ifsioc() we attempt to copy too much data in compat mode,
which may lead to EFAULT when the 32-bit version of struct ifreq
sits at/near the end of a page boundary, and the next page isn't
mapped.

Fix this by passing whether or not we're doing a compat call and
copying the appropriate size in/out, as we did before. This works
because only the embedded "struct ifmap" has different size, and
this is only used in SIOCGIFMAP/SIOCSIFMAP which has a different
handler. All other parts of the union are naturally compatible.

Fixes: bf4405737f9f ("kill dev_ifsioc()")
Reported-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/socket.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index e6945e318f02..cef0725a2aaf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -941,7 +941,8 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
-				 unsigned int cmd, unsigned long arg)
+			  unsigned int cmd, unsigned long arg,
+			  bool compat)
 {
 	int err;
 	void __user *argp = (void __user *)arg;
@@ -967,11 +968,15 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
 	} else {
 		struct ifreq ifr;
 		bool need_copyout;
-		if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+		if (copy_from_user(&ifr, argp,
+				   compat ? sizeof(struct compat_ifreq) :
+					    sizeof(struct ifreq)))
 			return -EFAULT;
 		err = dev_ioctl(net, cmd, &ifr, &need_copyout);
 		if (!err && need_copyout)
-			if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+			if (copy_to_user(argp, &ifr,
+					 compat ? sizeof(struct compat_ifreq) :
+						  sizeof(struct ifreq)))
 				return -EFAULT;
 	}
 	return err;
@@ -1070,7 +1075,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = open_related_ns(&net->ns, get_net_ns);
 			break;
 		default:
-			err = sock_do_ioctl(net, sock, cmd, arg);
+			err = sock_do_ioctl(net, sock, cmd, arg, false);
 			break;
 		}
 	return err;
@@ -2750,7 +2755,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
 	int err;
 
 	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, true);
 	set_fs(old_fs);
 	if (!err)
 		err = compat_put_timeval(&ktv, up);
@@ -2766,7 +2771,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
 	int err;
 
 	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, true);
 	set_fs(old_fs);
 	if (!err)
 		err = compat_put_timespec(&kts, up);
@@ -3072,7 +3077,7 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 	}
 
 	set_fs(KERNEL_DS);
-	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
+	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, true);
 	set_fs(old_fs);
 
 out:
@@ -3185,7 +3190,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCBONDSETHWADDR:
 	case SIOCBONDCHANGEACTIVE:
 	case SIOCGIFNAME:
-		return sock_do_ioctl(net, sock, cmd, arg);
+		return sock_do_ioctl(net, sock, cmd, arg, true);
 	}
 
 	return -ENOIOCTLCMD;
-- 
2.14.4

^ permalink raw reply related

* Re: [RFC PATCH iproute2-next] man: Add devlink health man page
From: Eran Ben Elisha @ 2018-09-13 11:58 UTC (permalink / raw)
  To: Tobin C. Harding
  Cc: netdev, Jiri Pirko, Andy Gospodarek, Michael Chan, Jakub Kicinski,
	Simon Horman, Alexander Duyck, Andrew Lunn, Florian Fainelli,
	Tal Alon, Ariel Almog
In-Reply-To: <20180913102732.GG11198@eros>



On 9/13/2018 1:27 PM, Tobin C. Harding wrote:
> On Thu, Sep 13, 2018 at 11:18:16AM +0300, Eran Ben Elisha wrote:
>> Add devlink-health man page. Devlink-health tool will control device
>> health attributes, sensors, actions and logging.
>>
>> Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
>>
>> -------------------------------------------------------
>> Copy paste man output to here for easier review process of the RFC.
>>
>> DEVLINK-HEALTH(8)                                                                                               Linux                                                                                              DEVLINK-HEALTH(8)
>>
>> NAME
>>         devlink-health - devlink health configuration
>>
>> SYNOPSIS
>>         devlink [ OPTIONS ] health  { COMMAND | help }
>>
>>         OPTIONS := { -V[ersion] | -n[no-nice-names] }
>>
>>         devlink health show [ DEV ] [ sensor NAME ]
>>
>>         devlink health sensor set DEV name NAME [ action NAME { active | inactive } ]"
>>
>>         devlink health action set DEV name NAME period PERIOD count COUNT fail { ignore | down }
>>
>>         devlink health action reinit DEV name NAME
>>
>>         devlink health help
>>
>> DESCRIPTION
>>         devlink-health tool allows user to configure the way driver treats unexpected status. The tool allows configuration of the sensors that can trigger health activity. Set for each sensor the follow up operations, such as,
>>         reset and dump of info. In addition, set the health activity termination action.
>>
>>     devlink health show - Display devlink health sensors and actions attributes
>>         DEV - Specifies the devlink device to show.  If this argument is omitted, all devices are listed.
>>
>>             Format is:
>>               BUS_NAME/BUS_ADDRESS
>>
>>         sensor NAME - Specifies the devlink sensor to show.
>>
> 
> Perhaps the commands should include the optional arguments so when
> reading the description one doesn't have to scroll to the top of the
> page all the time
> 
> e.g
>       devlink health show [ DEV ] [ sensor NAME ] - Display devlink health sensors and actions attributes
> 

I followed the scheme presented in all other devlink man pages.
see devlink-region, devlink-port, etc.

 From my perspective, I am fine with adding it to devlink-health, need 
ack from the devlink maintainer to see if he likes it...

>>     devlink health sensor set - sets devlink health sensor attributes
>>         DEV    Specifies the devlink device to show.
> 
> 	 		      	      	     	set
> 
>>         name NAME
>>                Name of the sensor to set.
>>
>>         action NAME { active | inactive }
>>                    Specify which actions to activate and which to deactivate once a sensor was triggered. actions can be dump, reset, etc.
>>
>>     devlink health action set - sets devlink action attributes
>>         DEV    Specifies the devlink device to set.
>>
>>         name NAME
>>                Specifies the devlink action to set.
> 
> This is a little unclear to me?

what is not clear? the term 'action' or the naming? can you elaborate?

> 
>>         period PERIOD
>>                The period on which we limit the amount of performed actions, measured in seconds.
>>
>>         count COUNT
>>                The maximum amount of actions performed in a limit time frame.
> 
> Perhaps		    	    	      	
>                  The maximum number of actions performed in a limited time frame.

ack

> 
>>         fail   { ignore | down }
>>                    Specify the behavior once count limit was reached.
>>
>>                    ignore - Ignore errors without execution of any action.
>>
>>                    down - Driver will remain in nonoperational state.
>>
>>     devlink health action reinit - reset devlink action attributes (period, count, fail, etc)
>>         DEV    Specifies the devlink device to set.
>>
>>         name NAME
>>                Specifies the devlink action to set.
> 
> Perhaps s/set/reinitialise/g for the above two descriptions.

ack

> 
> Hope this helps,
> Tobin.

thanks

^ permalink raw reply

* Re: [PATCH 2/2] netlink: add ethernet address policy types
From: Johannes Berg @ 2018-09-13 12:02 UTC (permalink / raw)
  To: Michal Kubecek; +Cc: linux-wireless, netdev
In-Reply-To: <20180913115849.GF29691@unicorn.suse.cz>

On Thu, 2018-09-13 at 13:58 +0200, Michal Kubecek wrote:

> The code looks correct to me but I have some doubts. Having a special
> policy for MAC addresses may lead to adding one for IPv4 address (maybe
> not, we can use NLA_U32 for them), IPv6 addresses and other data types
> with fixed length. Wouldn't it be more helpful to add a variant of
> NLA_BINARY (NLA_BINARY_EXACT?) which would fail/warn if attribute length
> isn't equal to .len?

Yeah, I guess we could do that, and then

#define NLA_ETH_ADDR .len = ETH_ALEN, .type = NLA_BINARY_EXACT
#define NLA_IP6_ADDR .len = 16, .type = NLA_BINARY_EXACT

or so?

johannes

^ permalink raw reply

* Re: [PATCH 1/2] netlink: add NLA_REJECT policy type
From: Michal Kubecek @ 2018-09-13 12:05 UTC (permalink / raw)
  To: Johannes Berg; +Cc: linux-wireless, netdev
In-Reply-To: <1536837915.4160.1.camel@sipsolutions.net>

On Thu, Sep 13, 2018 at 01:25:15PM +0200, Johannes Berg wrote:
> On Thu, 2018-09-13 at 12:49 +0200, Michal Kubecek wrote:
> 
> > >  		if (type > 0 && type <= maxtype) {
> > >  			if (policy) {
> > > -				err = validate_nla(nla, maxtype, policy);
> > > +				err = validate_nla(nla, maxtype, policy,
> > > +						   extack);
> > >  				if (err < 0) {
> > > -					NL_SET_ERR_MSG_ATTR(extack, nla,
> > > -							    "Attribute failed policy validation");
> > > +					NL_SET_BAD_ATTR(extack, nla);
> > > +					if (extack && !extack->_msg)
> > > +						NL_SET_ERR_MSG(extack,
> > > +							       "Attribute failed policy validation");
> > >  					goto errout;
> > >  				}
> > >  			}
> > > -- 
> > 
> > Technically, this would change the outcome when nla_parse() is called
> > with extack->_msg already set nad validate_nla() fails on something else
> > than NLA_REJECT; it will preserve the previous message in such case.
> > But I don't think this is a serious problem.
> 
> Yes, that's true. I looked at quite a few of the setters just now (there
> are ~500 already, wow!), and they all set & return, so this shouldn't be
> an issue.

In ethtool (work in progress) I sometimes use extack message for
non-fatal warnings but AFAICS never before parsing the userspace
request (actually always shortly before returning). So I don't have
a problem with it.

Michal Kubecek

^ permalink raw reply

* Re: [RFC PATCH iproute2-next] man: Add devlink health man page
From: Andrew Lunn @ 2018-09-13 12:08 UTC (permalink / raw)
  To: Eran Ben Elisha
  Cc: netdev, Jiri Pirko, Andy Gospodarek, Michael Chan, Jakub Kicinski,
	Simon Horman, Alexander Duyck, Florian Fainelli, Tal Alon,
	Ariel Almog
In-Reply-To: <1536826696-9413-2-git-send-email-eranbe@mellanox.com>

>        devlink health sensor set pci/0000:01:00.0 name TX_COMP_ERROR action reset off action dump on
>            Sets TX_COMP_ERROR sensor parameters for a specific device.

I hope the real sensors have more understandable names. If i remember
correctly, the same sort of comment was given for resource
management. It was pretty unclear what the resource names actually
mean. Is an average user going to have any idea how to actually use
these sensors and actions?

Can you give more examples of sensors. We should understand if there
are any overlaps with hwmon.

    Andrew

^ permalink raw reply

* Re: [PATCH net-next] net: dsa: b53: Do not fail when IRQ are not initialized
From: David Miller @ 2018-09-13 17:19 UTC (permalink / raw)
  To: f.fainelli; +Cc: netdev, andrew, vivien.didelot, linux-kernel
In-Reply-To: <20180913165045.3802-1-f.fainelli@gmail.com>

From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 13 Sep 2018 09:50:45 -0700

> When the Device Tree is not providing the per-port interrupts, do not fail
> during b53_srab_irq_enable() but instead bail out gracefully. The SRAB driver
> is used on the BCM5301X (Northstar) platforms which do not yet have the SRAB
> interrupts wired up.
> 
> Fixes: 16994374a6fc ("net: dsa: b53: Make SRAB driver manage port interrupts")
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>

Applied, thanks Florian.

^ permalink raw reply

* Re: [PATCH 2/2] netlink: add ethernet address policy types
From: Michal Kubecek @ 2018-09-13 12:12 UTC (permalink / raw)
  To: Johannes Berg; +Cc: linux-wireless, netdev
In-Reply-To: <1536840173.4160.4.camel@sipsolutions.net>

On Thu, Sep 13, 2018 at 02:02:53PM +0200, Johannes Berg wrote:
> On Thu, 2018-09-13 at 13:58 +0200, Michal Kubecek wrote:
> 
> > The code looks correct to me but I have some doubts. Having a special
> > policy for MAC addresses may lead to adding one for IPv4 address (maybe
> > not, we can use NLA_U32 for them), IPv6 addresses and other data types
> > with fixed length. Wouldn't it be more helpful to add a variant of
> > NLA_BINARY (NLA_BINARY_EXACT?) which would fail/warn if attribute length
> > isn't equal to .len?
> 
> Yeah, I guess we could do that, and then
> 
> #define NLA_ETH_ADDR .len = ETH_ALEN, .type = NLA_BINARY_EXACT
> #define NLA_IP6_ADDR .len = 16, .type = NLA_BINARY_EXACT
> 
> or so?

Maybe rather

  #define NLA_ETH_ADDR NLA_BINARY_EXACT, .len = ETH_ALEN
  #define NLA_IP6_ADDR NLA_BINARY_EXACT, .len = sizeof(struct in6_addr)

so that one could write

  { .type = NLA_ETH_ADDR }

as with other types.

Michal Kubecek

^ permalink raw reply

* Re: Regression: kernel 4.14 an later very slow with many ipsec tunnels
From: David Miller @ 2018-09-13 17:23 UTC (permalink / raw)
  To: fw; +Cc: linux, netdev, steffen.klassert, linux-kernel, torvalds
In-Reply-To: <20180913163848.ni5xc4gc4d6uusdn@breakpoint.cc>

From: Florian Westphal <fw@strlen.de>
Date: Thu, 13 Sep 2018 18:38:48 +0200

> Wolfgang Walter <linux@stwm.de> wrote:
>> What I can say is that it depends mainly on number of policy rules and SA.
> 
> Thats already a good hint, I guess we're hitting long hash chains in
> xfrm_policy_lookup_bytype().

I don't really see how recent changes can influence that.

And the bydst hashes have been dynamically sized for a very long time.

I might have missed something...

^ permalink raw reply

* Re: [PATCH] socket: fix struct ifreq size in compat ioctl
From: Al Viro @ 2018-09-13 12:13 UTC (permalink / raw)
  To: Johannes Berg; +Cc: netdev, Robert O'Callahan, Johannes Berg
In-Reply-To: <20180913114909.8331-1-johannes@sipsolutions.net>

On Thu, Sep 13, 2018 at 01:49:09PM +0200, Johannes Berg wrote:
> From: Johannes Berg <johannes.berg@intel.com>
> 
> As reported by Reobert O'Callahan, since Viro's commit to kill
> dev_ifsioc() we attempt to copy too much data in compat mode,
> which may lead to EFAULT when the 32-bit version of struct ifreq
> sits at/near the end of a page boundary, and the next page isn't
> mapped.
> 
> Fix this by passing whether or not we're doing a compat call and
> copying the appropriate size in/out, as we did before. This works
> because only the embedded "struct ifmap" has different size, and
> this is only used in SIOCGIFMAP/SIOCSIFMAP which has a different
> handler. All other parts of the union are naturally compatible.

Might be better to pass explicit size instead of this bool compat...

^ permalink raw reply

* Re: KMSAN: uninit-value in pppoe_rcv
From: Guillaume Nault @ 2018-09-13 17:23 UTC (permalink / raw)
  To: Alexander Potapenko
  Cc: Eric Dumazet, syzbot+f5f6080811c849739212, LKML, mostrows,
	Networking, syzkaller-bugs
In-Reply-To: <20180913161929.GA1507@alphalink.fr>

On Thu, Sep 13, 2018 at 06:19:29PM +0200, Guillaume Nault wrote:
> On Thu, Sep 13, 2018 at 04:12:38PM +0200, Alexander Potapenko wrote:
> > On Thu, Sep 13, 2018 at 3:57 PM Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > >
> > >
> > >
> > > On 09/12/2018 03:38 AM, Alexander Potapenko wrote:
> > > > On Wed, Sep 12, 2018 at 12:24 PM syzbot
> > > > <syzbot+f5f6080811c849739212@syzkaller.appspotmail.com> wrote:
> > > >>
> > > >> Hello,
> > > >>
> > > >> syzbot found the following crash on:
> > > >>
> > > >> HEAD commit:    d2d741e5d189 kmsan: add initialization for shmem pages
> > > >> git tree:       https://github.com/google/kmsan.git/master
> > > >> console output: https://syzkaller.appspot.com/x/log.txt?x=1465fc37800000
> > > >> kernel config:  https://syzkaller.appspot.com/x/.config?x=48f9de3384bcd0f
> > > >> dashboard link: https://syzkaller.appspot.com/bug?extid=f5f6080811c849739212
> > > >> compiler:       clang version 7.0.0 (trunk 329391)
> > > >> syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=14d6e607800000
> > > >> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=10a15b5b800000
> > > >>
> > > >> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> > > >> Reported-by: syzbot+f5f6080811c849739212@syzkaller.appspotmail.com
> > > >>
> > > >> IPVS: ftp: loaded support on port[0] = 21
> > > >> ==================================================================
> > > >> BUG: KMSAN: uninit-value in __get_item drivers/net/ppp/pppoe.c:172 [inline]
> > > >> BUG: KMSAN: uninit-value in get_item drivers/net/ppp/pppoe.c:236 [inline]
> > > >> BUG: KMSAN: uninit-value in pppoe_rcv+0xcef/0x10e0
> > > >> drivers/net/ppp/pppoe.c:450
> > > >> CPU: 0 PID: 4543 Comm: syz-executor355 Not tainted 4.16.0+ #87
> > > >> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> > > >> Google 01/01/2011
> > > >> Call Trace:
> > > >>   __dump_stack lib/dump_stack.c:17 [inline]
> > > >>   dump_stack+0x185/0x1d0 lib/dump_stack.c:53
> > > >>   kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
> > > >>   __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683
> > > >>   __get_item drivers/net/ppp/pppoe.c:172 [inline]
> > > >>   get_item drivers/net/ppp/pppoe.c:236 [inline]
> > > >>   pppoe_rcv+0xcef/0x10e0 drivers/net/ppp/pppoe.c:450
> > > >>   __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562
> > > >>   __netif_receive_skb net/core/dev.c:4627 [inline]
> > > >>   netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701
> > > >>   netif_receive_skb+0x230/0x240 net/core/dev.c:4725
> > > >>   tun_rx_batched drivers/net/tun.c:1555 [inline]
> > > >>   tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962
> > > >>   tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
> > > >>   call_write_iter include/linux/fs.h:1782 [inline]
> > > >>   new_sync_write fs/read_write.c:469 [inline]
> > > >>   __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
> > > >>   vfs_write+0x463/0x8d0 fs/read_write.c:544
> > > >>   SYSC_write+0x172/0x360 fs/read_write.c:589
> > > >>   SyS_write+0x55/0x80 fs/read_write.c:581
> > > >>   do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
> > > >>   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> > > >> RIP: 0033:0x4447c9
> > > >> RSP: 002b:00007fff64c8fc28 EFLAGS: 00000297 ORIG_RAX: 0000000000000001
> > > >> RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004447c9
> > > >> RDX: 000000000000fd87 RSI: 0000000020000600 RDI: 0000000000000004
> > > >> RBP: 00000000006cf018 R08: 00007fff64c8fda8 R09: 00007fff00006bda
> > > >> R10: 0000000000005fe7 R11: 0000000000000297 R12: 00000000004020d0
> > > >> R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000
> > > >>
> > > >> Uninit was created at:
> > > >>   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
> > > >>   kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
> > > >>   kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
> > > >>   kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
> > > >>   slab_post_alloc_hook mm/slab.h:445 [inline]
> > > >>   slab_alloc_node mm/slub.c:2737 [inline]
> > > >>   __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
> > > >>   __kmalloc_reserve net/core/skbuff.c:138 [inline]
> > > >>   __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
> > > >>   alloc_skb include/linux/skbuff.h:984 [inline]
> > > >>   alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
> > > >>   sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
> > > >>   tun_alloc_skb drivers/net/tun.c:1532 [inline]
> > > >>   tun_get_user+0x2242/0x7c60 drivers/net/tun.c:1829
> > > >>   tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990
> > > >>   call_write_iter include/linux/fs.h:1782 [inline]
> > > >>   new_sync_write fs/read_write.c:469 [inline]
> > > >>   __vfs_write+0x7fb/0x9f0 fs/read_write.c:482
> > > >>   vfs_write+0x463/0x8d0 fs/read_write.c:544
> > > >>   SYSC_write+0x172/0x360 fs/read_write.c:589
> > > >>   SyS_write+0x55/0x80 fs/read_write.c:581
> > > >>   do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
> > > >>   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> > > >> ==================================================================
> > > > I did a little digging before sending the bug upstream.
> > > > If I add memset(obj, 0xfe, size) to __kmalloc_reserve(), these 0xfe
> > > > bytes are visible in __get_item() at the place where KMSAN reports an
> > > > error.
> > > >
> > > > The problem is somehow related to tun_get_user() creating a fragmented
> > > > sk_buff - when I change the call to tun_alloc_skb() so that it
> > > > allocates a single buffer the bug goes away.
> > > >
> > >
> > > I guess the following patch would fix the issue
> > >
> > > (I will submit it more formally)
> > No, as far as I can see it doesn't.
> > Saving sid before __skb_pull() is still a good idea, but in this
> > particular case |ph| doesn't change.
> 
> Yes, we probably need to save sid.
> 
> But I think the problem found by syzbot is related to
> eth_hdr(skb)->h_source. PPPoE expects that Ethernet header has already
> been parsed and is accessible at skb_mac_header(skb).
> But here skb_mac_header(skb) == skb->data, and we may pull only 6 bytes
> (sizeof(truct pppoe_hdr)). Therefore eth_hdr(skb)->h_source points past
> skb's head length.
> 
> Not sure if something needs to be changed in tun.c for properly setting
> skb_mac_header. But PPPoE has no reason to consider packets from
> non-Ethernet devices anyway.

Nothing to change in tun.c. Just some more tests in pppoe.
Can you try this patch? It only addresses this particular report, not
the problems spotted by Eric.

-------- 8< --------
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 5aa59f41bf8c..77241b584dff 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -429,6 +429,9 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb)
 		goto out;
 
+	if (skb_mac_header_len(skb) < ETH_HLEN)
+		goto drop;
+
 	if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
 		goto drop;
 

^ permalink raw reply related

* Re: [PATCH 2/2] netlink: add ethernet address policy types
From: Johannes Berg @ 2018-09-13 12:16 UTC (permalink / raw)
  To: Michal Kubecek; +Cc: linux-wireless, netdev
In-Reply-To: <20180913121227.GH29691@unicorn.suse.cz>

On Thu, 2018-09-13 at 14:12 +0200, Michal Kubecek wrote:
> On Thu, Sep 13, 2018 at 02:02:53PM +0200, Johannes Berg wrote:
> > On Thu, 2018-09-13 at 13:58 +0200, Michal Kubecek wrote:
> > 
> > > The code looks correct to me but I have some doubts. Having a special
> > > policy for MAC addresses may lead to adding one for IPv4 address (maybe
> > > not, we can use NLA_U32 for them), IPv6 addresses and other data types
> > > with fixed length. Wouldn't it be more helpful to add a variant of
> > > NLA_BINARY (NLA_BINARY_EXACT?) which would fail/warn if attribute length
> > > isn't equal to .len?
> > 
> > Yeah, I guess we could do that, and then
> > 
> > #define NLA_ETH_ADDR .len = ETH_ALEN, .type = NLA_BINARY_EXACT
> > #define NLA_IP6_ADDR .len = 16, .type = NLA_BINARY_EXACT
> > 
> > or so?
> 
> Maybe rather
> 
>   #define NLA_ETH_ADDR NLA_BINARY_EXACT, .len = ETH_ALEN
>   #define NLA_IP6_ADDR NLA_BINARY_EXACT, .len = sizeof(struct in6_addr)
> 
> so that one could write
> 
>   { .type = NLA_ETH_ADDR }

Yeah, that's possible. I considered it for a second, but it was slightly
too magical for my taste :-)

Better pick a different "namespace", perhaps NLA_POLICY_ETH_ADDR or so?

johannes

^ permalink raw reply

* Re: [PATCH] socket: fix struct ifreq size in compat ioctl
From: Johannes Berg @ 2018-09-13 12:16 UTC (permalink / raw)
  To: Al Viro; +Cc: netdev, Robert O'Callahan
In-Reply-To: <20180913121353.GR19965@ZenIV.linux.org.uk>

On Thu, 2018-09-13 at 13:13 +0100, Al Viro wrote:
> On Thu, Sep 13, 2018 at 01:49:09PM +0200, Johannes Berg wrote:
> > From: Johannes Berg <johannes.berg@intel.com>
> > 
> > As reported by Reobert O'Callahan, since Viro's commit to kill
> > dev_ifsioc() we attempt to copy too much data in compat mode,
> > which may lead to EFAULT when the 32-bit version of struct ifreq
> > sits at/near the end of a page boundary, and the next page isn't
> > mapped.
> > 
> > Fix this by passing whether or not we're doing a compat call and
> > copying the appropriate size in/out, as we did before. This works
> > because only the embedded "struct ifmap" has different size, and
> > this is only used in SIOCGIFMAP/SIOCSIFMAP which has a different
> > handler. All other parts of the union are naturally compatible.
> 
> Might be better to pass explicit size instead of this bool compat...

Yeah, that's a bit better perhaps, I should resend to have the bugzilla
link anyway.

johannes

^ permalink raw reply

* Re: KMSAN: uninit-value in pppoe_rcv
From: Eric Dumazet @ 2018-09-13 17:31 UTC (permalink / raw)
  To: Guillaume Nault, Alexander Potapenko
  Cc: Eric Dumazet, syzbot+f5f6080811c849739212, LKML, mostrows,
	Networking, syzkaller-bugs
In-Reply-To: <20180913172344.GB1507@alphalink.fr>



On 09/13/2018 10:23 AM, Guillaume Nault wrote:

> Nothing to change in tun.c. Just some more tests in pppoe.
> Can you try this patch? It only addresses this particular report, not
> the problems spotted by Eric.
> 
> -------- 8< --------
> diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
> index 5aa59f41bf8c..77241b584dff 100644
> --- a/drivers/net/ppp/pppoe.c
> +++ b/drivers/net/ppp/pppoe.c
> @@ -429,6 +429,9 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
>  	if (!skb)
>  		goto out;
>  
> +	if (skb_mac_header_len(skb) < ETH_HLEN)
> +		goto drop;
> +
>  	if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
>  		goto drop;
>  
> 


Yeah this probably will help ;)

^ permalink raw reply

* Re: [PATCH 0/2] net: qcom/emac: add shared mdio bus support
From: Andrew Lunn @ 2018-09-13 12:24 UTC (permalink / raw)
  To: Wang Dongsheng; +Cc: timur, davem, yu.zheng, netdev, devicetree
In-Reply-To: <1536829493-10088-1-git-send-email-dongsheng.wang@hxt-semitech.com>

On Thu, Sep 13, 2018 at 05:04:51PM +0800, Wang Dongsheng wrote:
> Share the mii_bus for others MAC device because QDF2400 emac
> include MDIO, and the motherboard has more than one PHY connected
> to an MDIO bus.
> 
> Tested: QDF2400 (ACPI), buildin/insmod/rmmod
> 
> Wang Dongsheng (2):
>   dt-bindings: net: qcom: Add binding for shared mdio bus

Hi Wang

Patch #1 did not seem to make it to netdev.

I probably have some comments on the binding...

  Andrew

^ permalink raw reply

* Re: [PATCH net-next 1/3] net: rework SIOCGSTAMP ioctl handling
From: Arnd Bergmann @ 2018-09-13 12:28 UTC (permalink / raw)
  To: Networking, David Miller
  Cc: linux-arch, y2038 Mailman List, Eric Dumazet, Willem de Bruijn,
	Linux Kernel Mailing List, linux-hams, Bluez mailing list,
	linux-can, dccp, linux-wpan, linux-sctp, linux-x25
In-Reply-To: <20180829130308.3504560-1-arnd@arndb.de>

On Wed, Aug 29, 2018 at 3:03 PM Arnd Bergmann <arnd@arndb.de> wrote:

> diff --git a/net/core/sock.c b/net/core/sock.c
> index 3730eb855095..df17bbfaca27 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -2897,37 +2897,31 @@ bool lock_sock_fast(struct sock *sk)
>  }
>  EXPORT_SYMBOL(lock_sock_fast);
>
> -int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
> +int sock_gettstamp(struct socket *sock, void __user *userstamp,
> +                  bool timeval, bool time32)
>  {
> -       struct timeval tv;
> -
> -       sock_enable_timestamp(sk, SOCK_TIMESTAMP);
> -       tv = ktime_to_timeval(sk->sk_stamp);
> -       if (tv.tv_sec == -1)
> -               return -ENOENT;
> -       if (tv.tv_sec == 0) {
> -               sk->sk_stamp = ktime_get_real();
> -               tv = ktime_to_timeval(sk->sk_stamp);
> -       }
> -       return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
> -}
> -EXPORT_SYMBOL(sock_get_timestamp);

As  I just learned, sparc64 uses a 32-bit suseconds_t, so this
function always leaked 32 bits of kernel stack data by copying
the padding bytes of 'tv' into user space.

Linux-4.11 and higher could avoid that with
CONFIG_GCC_PLUGIN_STRUCTLEAK, but older kernels
have been affected since socket timestamps were first added.

The same thing is probably true of many other interfaces
that pass a timeval.

> -int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
> -{
> -       struct timespec ts;
> +       struct sock *sk = sock->sk;
> +       struct timespec64 ts;
>
>         sock_enable_timestamp(sk, SOCK_TIMESTAMP);
> -       ts = ktime_to_timespec(sk->sk_stamp);
> +       ts = ktime_to_timespec64(sk->sk_stamp);
>         if (ts.tv_sec == -1)
>                 return -ENOENT;
>         if (ts.tv_sec == 0) {
>                 sk->sk_stamp = ktime_get_real();
> -               ts = ktime_to_timespec(sk->sk_stamp);
> +               ts = ktime_to_timespec64(sk->sk_stamp);
>         }
> -       return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
> +
> +       if (timeval)
> +               ts.tv_nsec /= 1000;
> +#ifdef CONFIG_COMPAT_32BIT_TIME
> +       if (time32)
> +               return put_old_timespec32(&ts, userstamp);
> +#endif
> +
> +       return put_timespec64(&ts, userstamp);
>  }

My new implementation is worse here: it no longer leaks stack
data, but since we now write a big-endian 64-bit microseconds
value, the microseconds are in the wrong place and will
be interpreted as zero by user space...

I'll also have to revisit a few other similar patches I did for
y2038, to figure out what they should do on sparc64.

         Arnd

^ permalink raw reply

* Re: [PATCHv2] net: ipv4: Use BUG_ON directly instead of a if condition followed by BUG
From: David Miller @ 2018-09-13 17:42 UTC (permalink / raw)
  To: zhongjiang; +Cc: edumazet, kuznet, netdev, linux-kernel
In-Reply-To: <1536671593-14746-1-git-send-email-zhongjiang@huawei.com>

From: zhong jiang <zhongjiang@huawei.com>
Date: Tue, 11 Sep 2018 21:13:13 +0800

> The if condition can be removed if we use BUG_ON directly.
> The issule is detected with the help of Coccinelle.
> 
> Signed-off-by: zhong jiang <zhongjiang@huawei.com>

Taking Simon's feedback into consideration...

I very often see changes like this and have to check the implementation
of BUG_ON() et al. to make sure it evaluates it's arguments even when
debugging is disabled.

Even if it is always guaranteed to do so, like me people will be unsure
forever and have to check.

That makes auditing code and validating things more time consuming and
hard.

I also think the code as written now looks a lot nicer.

So I'm not applying this, sorry.

^ permalink raw reply

* Re: [PATCH 1/2] r8169: Align ASPM/CLKREQ setting function with vendor driver
From: David Miller @ 2018-09-13 17:48 UTC (permalink / raw)
  To: kai.heng.feng; +Cc: nic_swsd, hkallweit1, netdev, linux-kernel
In-Reply-To: <20180912065821.7198-1-kai.heng.feng@canonical.com>

From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Wed, 12 Sep 2018 14:58:20 +0800

> There's a small delay after setting ASPM in vendor drivers, r8101 and
> r8168.
> In addition, those drivers enable ASPM before ClkReq, also change that
> to align with vendor driver.
> 
> I haven't seen anything bad becasue of this, but I think it's better to
> keep in sync with vendor driver.
> 
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>

I want Heiner Kallweit to review these before I apply them.

Thanks.

^ permalink raw reply

* [PATCH v2] socket: fix struct ifreq size in compat ioctl
From: Johannes Berg @ 2018-09-13 12:40 UTC (permalink / raw)
  To: netdev; +Cc: Robert O'Callahan, Al Viro, Johannes Berg

From: Johannes Berg <johannes.berg@intel.com>

As reported by Reobert O'Callahan, since Viro's commit to kill
dev_ifsioc() we attempt to copy too much data in compat mode,
which may lead to EFAULT when the 32-bit version of struct ifreq
sits at/near the end of a page boundary, and the next page isn't
mapped.

Fix this by passing the approprate compat/non-compat size to copy
and using that, as before the dev_ifsioc() removal. This works
because only the embedded "struct ifmap" has different size, and
this is only used in SIOCGIFMAP/SIOCSIFMAP which has a different
handler. All other parts of the union are naturally compatible.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199469.

Fixes: bf4405737f9f ("kill dev_ifsioc()")
Reported-by: Robert O'Callahan <robert@ocallahan.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/socket.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index e6945e318f02..01f3f8f32d6f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -941,7 +941,8 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
-				 unsigned int cmd, unsigned long arg)
+			  unsigned int cmd, unsigned long arg,
+			  unsigned int ifreq_size)
 {
 	int err;
 	void __user *argp = (void __user *)arg;
@@ -967,11 +968,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
 	} else {
 		struct ifreq ifr;
 		bool need_copyout;
-		if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+		if (copy_from_user(&ifr, argp, ifreq_size))
 			return -EFAULT;
 		err = dev_ioctl(net, cmd, &ifr, &need_copyout);
 		if (!err && need_copyout)
-			if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+			if (copy_to_user(argp, &ifr, ifreq_size))
 				return -EFAULT;
 	}
 	return err;
@@ -1070,7 +1071,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = open_related_ns(&net->ns, get_net_ns);
 			break;
 		default:
-			err = sock_do_ioctl(net, sock, cmd, arg);
+			err = sock_do_ioctl(net, sock, cmd, arg,
+					    sizeof(struct ifreq));
 			break;
 		}
 	return err;
@@ -2750,7 +2752,8 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
 	int err;
 
 	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv,
+			    sizeof(struct compat_ifreq));
 	set_fs(old_fs);
 	if (!err)
 		err = compat_put_timeval(&ktv, up);
@@ -2766,7 +2769,8 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
 	int err;
 
 	set_fs(KERNEL_DS);
-	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
+	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts,
+			    sizeof(struct compat_ifreq));
 	set_fs(old_fs);
 	if (!err)
 		err = compat_put_timespec(&kts, up);
@@ -3072,7 +3076,8 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 	}
 
 	set_fs(KERNEL_DS);
-	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
+	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r,
+			    sizeof(struct compat_ifreq));
 	set_fs(old_fs);
 
 out:
@@ -3185,7 +3190,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCBONDSETHWADDR:
 	case SIOCBONDCHANGEACTIVE:
 	case SIOCGIFNAME:
-		return sock_do_ioctl(net, sock, cmd, arg);
+		return sock_do_ioctl(net, sock, cmd, arg,
+				     sizeof(struct compat_ifreq));
 	}
 
 	return -ENOIOCTLCMD;
-- 
2.14.4

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox