Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net] ipv4/igmp: fix v1/v2 switchback timeout based on rfc3376, 8.12
From: Hangbin Liu @ 2018-10-26  3:30 UTC (permalink / raw)
  To: netdev
  Cc: Daniel Borkmann, David S. Miller, Hannes Frederic Sowa, Cong Wang,
	Hangbin Liu

Similiar with ipv6 mcast commit 89225d1ce6af3 ("net: ipv6: mld: fix v1/v2
switchback timeout to rfc3810, 9.12.")

i) RFC3376 8.12. Older Version Querier Present Timeout says:

   The Older Version Querier Interval is the time-out for transitioning
   a host back to IGMPv3 mode once an older version query is heard.
   When an older version query is received, hosts set their Older
   Version Querier Present Timer to Older Version Querier Interval.

   This value MUST be ((the Robustness Variable) times (the Query
   Interval in the last Query received)) plus (one Query Response
   Interval).

Currently we only use a hardcode value IGMP_V1/v2_ROUTER_PRESENT_TIMEOUT.
Fix it by adding two new items mr_qi(Query Interval) and mr_qri(Query Response
Interval) in struct in_device.

Now we can calculate the switchback time via (mr_qrv * mr_qi) + mr_qri.
We need update these values when receive IGMPv3 queries.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 include/linux/inetdevice.h |  4 +++-
 net/ipv4/igmp.c            | 53 +++++++++++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c759d1c..a64f21a 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -37,7 +37,9 @@ struct in_device {
 	unsigned long		mr_v1_seen;
 	unsigned long		mr_v2_seen;
 	unsigned long		mr_maxdelay;
-	unsigned char		mr_qrv;
+	unsigned long		mr_qi;		/* Query Interval */
+	unsigned long		mr_qri;		/* Query Response Interval */
+	unsigned char		mr_qrv;		/* Query Robustness Variable */
 	unsigned char		mr_gq_running;
 	unsigned char		mr_ifc_count;
 	struct timer_list	mr_gq_timer;	/* general query timer */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da3944..765b2b3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -111,13 +111,10 @@
 #ifdef CONFIG_IP_MULTICAST
 /* Parameter names and values are taken from igmp-v2-06 draft */
 
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT		(400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT		(400*HZ)
 #define IGMP_V2_UNSOLICITED_REPORT_INTERVAL	(10*HZ)
 #define IGMP_V3_UNSOLICITED_REPORT_INTERVAL	(1*HZ)
+#define IGMP_QUERY_INTERVAL			(125*HZ)
 #define IGMP_QUERY_RESPONSE_INTERVAL		(10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE		2
-
 
 #define IGMP_INITIAL_REPORT_DELAY		(1)
 
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 
 			max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
 			in_dev->mr_v1_seen = jiffies +
-				IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 			group = 0;
 		} else {
 			/* v2 router present */
 			max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
 			in_dev->mr_v2_seen = jiffies +
-				IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 		}
 		/* cancel the interface change timer */
 		in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (!max_delay)
 			max_delay = 1;	/* can't mod w/ 0 */
 		in_dev->mr_maxdelay = max_delay;
-		if (ih3->qrv)
-			in_dev->mr_qrv = ih3->qrv;
+
+		/* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+		 * received value was zero, use the default or statically
+		 * configured value.
+		 */
+		in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+		in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+		/* RFC3376, 8.3. Query Response Interval:
+		 * The number of seconds represented by the [Query Response
+		 * Interval] must be less than the [Query Interval].
+		 */
+		if (in_dev->mr_qri >= in_dev->mr_qi)
+			in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
 		if (!group) { /* general query */
 			if (ih3->nsrcs)
 				return true;	/* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
 	ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
 }
 
-void ip_mc_init_dev(struct in_device *in_dev)
-{
 #ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
 	struct net *net = dev_net(in_dev->dev);
+
+	in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+	in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
 #endif
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
 	ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
 	timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
 	timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
+	ip_mc_reset(in_dev);
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
 }
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
 void ip_mc_up(struct in_device *in_dev)
 {
 	struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 
 	ASSERT_RTNL();
 
-#ifdef CONFIG_IP_MULTICAST
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+	ip_mc_reset(in_dev);
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for_each_pmc_rtnl(in_dev, pmc) {
-- 
2.5.5

^ permalink raw reply related

* bnx2: rx_fw_discards: BCM5716 sporadically drops packets when update to driver version 2.2.6
From: maowenan @ 2018-10-26  3:15 UTC (permalink / raw)
  To: rasesh.mody, netdev, f.fainelli, andrew, linux-kernel

Hi，

After I update version of bnx2 driver from 2.2.1 to 2.2.6, I find BCM5716 sporadically drops packets, which
shows in rx_fw_discards.
C36-141-5:~ #  ethtool -S NIC0

NIC statistics:
     rx_ucast_packets: 11902

     rx_mcast_packets: 217

     rx_bcast_packets: 4954320

     rx_filtered_packets: 328793

     rx_fw_discards: 2742

C36-141-5:~ #

5s later:

C36-141-5:~ #  ethtool -S NIC0

NIC statistics:
     rx_ucast_packets: 11910

     rx_mcast_packets: 217

     rx_bcast_packets: 4958117

     rx_filtered_packets: 328897

     rx_fw_discards: 2750

C36-141-5:~ #

so rx_fw_discards: 2742-----> rx_fw_discards: 2750, lost 8 packets.

the information of bnx2
C36-141-5:~ # modinfo bnx2
kernel/drivers/net/ethernet/broadcom/bnx2.ko

firmware:       bnx2/bnx2-rv2p-09ax-6.0.17.fw

firmware:       bnx2/bnx2-rv2p-09-6.0.17.fw

firmware:       bnx2/bnx2-mips-09-6.2.1b.fw

firmware:       bnx2/bnx2-rv2p-06-6.0.15.fw

firmware:       bnx2/bnx2-mips-06-6.2.3.fw
version:        2.2.6

1) Firstly, I check the patches from 2.2.1 to 2.2.6, below patch is interesting.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0021850d0417a4dc38ed871d929b651b87e2ead9
Do not enable filter SORT MODE in chip init routine. This patch addresses an
issue where BCM5716 sporadically drops packets when changing multicast list.

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 8957eb5f4478..8c9a8b7787d2 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -4984,8 +4984,6 @@ bnx2_init_chip(struct bnx2 *bp)

 	bp->idle_chk_status_idx = 0xffff;

-	bp->rx_mode = BNX2_EMAC_RX_MODE_SORT_MODE;
-
 	/* Set up how to generate a link change interrupt. */
 	BNX2_WR(bp, BNX2_EMAC_ATTENTION_ENA, BNX2_EMAC_ATTENTION_ENA_LINK);

2) Secondly, I revert this patch, after verify it, I find rx_fw_discards does not increasing.
so I think this patch can fix current issue. But I'm not sure the issue of this patch to fix
will be reproduced?
I'm not convinced that what factor will trigger rx_fw_discards increasing?
And how to fix this?

Thanks a lot.

^ permalink raw reply related

* Re: [PATCH v3 2/2] net: qcom/emac: add phy-handle support for ACPI
From: Wang, Dongsheng @ 2018-10-26  3:04 UTC (permalink / raw)
  To: Timur Tabi, Andrew Lunn
  Cc: Zheng, Joey, f.fainelli@gmail.com, netdev@vger.kernel.org,
	robert.moore@intel.com, rjw@rjwysocki.net,
	linux-acpi@vger.kernel.org
In-Reply-To: <ea868a30-13a1-f695-4e34-d873d975ebc9@kernel.org>

On 2018/10/26 10:37, Timur Tabi wrote:
> On 10/25/18 9:18 PM, Wang, Dongsheng wrote:
>> But when I was reading Documentation/acpi/DSD-properties-rules.txt, my
>> understanding is we should try to conform to DT bindings. So maybe ACPI
>> doesn't have such a document, just DT bindings.
> There was an attempt to document DSDs, but it was abandoned after a while.
>
> https://github.com/ahs3/dsd
>

Yes, here's a database concept, and I asked some Intel guys, the answer
I got was there is no such database or document. :(


Cheers,

Dongsheng

^ permalink raw reply

* Re: [PATCH v3 2/2] net: qcom/emac: add phy-handle support for ACPI
From: Timur Tabi @ 2018-10-26  2:37 UTC (permalink / raw)
  To: Wang, Dongsheng, Andrew Lunn
  Cc: Zheng, Joey, f.fainelli@gmail.com, netdev@vger.kernel.org,
	robert.moore@intel.com, rjw@rjwysocki.net,
	linux-acpi@vger.kernel.org
In-Reply-To: <0a55d42c839140dd9a5264ab4dae4c8e@HXTBJIDCEMVIW02.hxtcorp.net>

On 10/25/18 9:18 PM, Wang, Dongsheng wrote:
> But when I was reading Documentation/acpi/DSD-properties-rules.txt, my
> understanding is we should try to conform to DT bindings. So maybe ACPI
> doesn't have such a document, just DT bindings.

There was an attempt to document DSDs, but it was abandoned after a while.

https://github.com/ahs3/dsd

^ permalink raw reply

* [PATCH net] ipv6/mcast: update mc_qrv when join new group
From: Hangbin Liu @ 2018-10-26  2:30 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Hangbin Liu

Currently we only set mc_qrv to sysctl_mld_qrv when interface up. If we
change sysctl_mld_qrv after interface up, it will has no effect.

Fix it by assigning latest sysctl_mld_qrv to idev mc_qrv when join new group.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 net/ipv6/mcast.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index dbab62e..bed4890 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -680,6 +680,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
 		return;
 
+	mc->idev->mc_qrv = sysctl_mld_qrv;
 	if (mld_in_v1_mode(mc->idev)) {
 		igmp6_join_group(mc);
 		return;
-- 
2.5.5

^ permalink raw reply related

* [PATCH net] bridge: do not add port to router list when receives query with source 0.0.0.0
From: Hangbin Liu @ 2018-10-26  2:28 UTC (permalink / raw)
  To: netdev
  Cc: Nikolay Aleksandrov, Jiri Pirko, Linus Lüssing,
	David S. Miller, Hangbin Liu

Based on RFC 4541, 2.1.1.  IGMP Forwarding Rules

  The switch supporting IGMP snooping must maintain a list of
  multicast routers and the ports on which they are attached.  This
  list can be constructed in any combination of the following ways:

  a) This list should be built by the snooping switch sending
     Multicast Router Solicitation messages as described in IGMP
     Multicast Router Discovery [MRDISC].  It may also snoop
     Multicast Router Advertisement messages sent by and to other
     nodes.

  b) The arrival port for IGMP Queries (sent by multicast routers)
     where the source address is not 0.0.0.0.

We should not add the port to router list when receives query with source
0.0.0.0.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 net/bridge/br_multicast.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 024139b..41cdafb 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1422,7 +1422,15 @@ static void br_multicast_query_received(struct net_bridge *br,
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
-	br_multicast_mark_router(br, port);
+
+	/* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
+	 * the arrival port for IGMP Queries where the source address
+	 * is 0.0.0.0 should not be added to router port list.
+	 */
+	if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
+	    (saddr->proto == htons(ETH_P_IPV6) &&
+	     !ipv6_addr_any(&saddr->u.ip6)))
+		br_multicast_mark_router(br, port);
 }
 
 static void br_ip4_multicast_query(struct net_bridge *br,
-- 
2.5.5

^ permalink raw reply related

* Re: [PATCH v3 2/2] net: qcom/emac: add phy-handle support for ACPI
From: Wang, Dongsheng @ 2018-10-26  2:18 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: timur@kernel.org, Zheng, Joey, f.fainelli@gmail.com,
	netdev@vger.kernel.org, robert.moore@intel.com, rjw@rjwysocki.net,
	linux-acpi@vger.kernel.org
In-Reply-To: <20181025192430.GA16785@lunn.ch>

On 2018/10/26 3:24, Andrew Lunn wrote:
> On Thu, Oct 25, 2018 at 06:09:15PM +0800, Wang Dongsheng wrote:
>> Use "phy-handle" to porint an internal MDIO device port.
> Hi Dongsheng
>
> You are basically defining how all future ACPI based MAC drivers get
> access to their PHY. This needs to become part of the ACPI standard,
> etc.
>
> This code should not be hidden away in the emac driver. It needs to be
> placed somewhere public so other drivers can use it. And it needs good
> documentation, including an example of what needs to go into the ACPI
> tables, etc.
Hi Andrew

I saw AppliedMicro(apm) xgene has used "phy-handle" for ACPI method, so
I guess "phy-handle" has become part of the ACPI standard. But I cannot
make sure.
I tried to confirm the property is defined in the document(Like DT
binding). However, I did not find any documentation on the description
property definition on the UEFI/ACPICA website.
But when I was reading Documentation/acpi/DSD-properties-rules.txt, my
understanding is we should try to conform to DT bindings. So maybe ACPI
doesn't have such a document, just DT bindings.

Cheers,
Dongsheng

> Thanks
> 	Andrew
>

^ permalink raw reply

* RE: [PATCH 3/3] usb: renesas_usbhs: Remove dummy runtime PM callbacks
From: Yoshihiro Shimoda @ 2018-10-26  2:09 UTC (permalink / raw)
  To: Wolfram Sang, Jarkko Nikula
  Cc: linux-pm@vger.kernel.org, linux-i2c@vger.kernel.org, Wolfram Sang,
	netdev@vger.kernel.org, David S . Miller, Sergei Shtylyov,
	linux-renesas-soc@vger.kernel.org, linux-usb@vger.kernel.org
In-Reply-To: <20181025225742.GB20207@kunai>

Hi Wolfram-san,

> From: Wolfram Sang, Sent: Friday, October 26, 2018 7:58 AM
<snip>
> >  static const struct dev_pm_ops usbhsc_pm_ops = {
> >  	.suspend		= usbhsc_suspend,
> >  	.resume			= usbhsc_resume,
> 
> Unrelated to this patch, but I wonder right now: is there a reason not
> to use SET_SYSTEM_SLEEP_PM_OPS here? Shimoda-san?

I don't know why because this code is contributed from Morimoto-san.
I'm guessing this code seems to come from sh_eth.c or i2c-sh_mobile.c
and we don't have the SET_SYSTEM_SLEEP_PM_OPS macro at 2009.
Morimoto-san contributed this code at 2010, but it seems not to realize
we have such macro.

Anyway, I'll try to use SET_SYSTEM_SLEEP_PM_OPS on the renesas_usbhs driver.

Best regards,
Yoshihiro Shimoda

^ permalink raw reply

* [PATCH 0/1] Make ipmr queue length configurable
From: Brodie Greenfield @ 2018-10-26  2:01 UTC (permalink / raw)
  To: davem, kuznet, yoshfuji
  Cc: netdev, linux-kernel, chris.packham, luuk.paulussen,
	Brodie Greenfield

We want to have some more space in our queue for processing incoming
multicast packets, so we can process more of them without dropping
them prematurely. It is useful to be able to increase this limit on
higher-spec platforms that can handle more items.

For the particular use case here at Allied Telesis, we have linux
running on our switches and routers, with support for the number of
multicast groups being increased. Basically, this queue length affects
the time taken to fully learn all of the multicast streams. 

Brodie Greenfield (1):
  ipmr: Make cache queue length configurable

 Documentation/networking/ip-sysctl.txt | 7 +++++++
 include/net/netns/ipv4.h               | 1 +
 include/uapi/linux/sysctl.h            | 1 +
 kernel/sysctl_binary.c                 | 1 +
 net/ipv4/af_inet.c                     | 2 ++
 net/ipv4/ipmr.c                        | 4 +++-
 net/ipv4/sysctl_net_ipv4.c             | 7 +++++++
 7 files changed, 22 insertions(+), 1 deletion(-)

-- 
2.19.1

^ permalink raw reply

* Re: [PATCH bpf 0/7] Batch of direct packet access fixes for BPF
From: Alexei Starovoitov @ 2018-10-26  0:11 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: ast, netdev
In-Reply-To: <20181024200549.8516-1-daniel@iogearbox.net>

On Wed, Oct 24, 2018 at 10:05:42PM +0200, Daniel Borkmann wrote:
> Several fixes to get direct packet access in order from verifier
> side. Also test suite fix to run cg_skb as unpriv and an improvement
> to make direct packet write less error prone in future.

Applied, Thanks

^ permalink raw reply

* Re: [PATCH ghak90 (was ghak32) V4 03/10] audit: log container info of syscalls
From: Casey Schaufler @ 2018-10-26  8:09 UTC (permalink / raw)
  To: Steve Grubb, Paul Moore
  Cc: luto, rgb, linux-api, containers, linux-kernel, viro, dhowells,
	carlos, linux-audit, netfilter-devel, ebiederm, simo, netdev,
	linux-fsdevel, Eric Paris, Serge Hallyn
In-Reply-To: <20181025235527.15a39d75@ivy-bridge>

On 10/25/2018 2:55 PM, Steve Grubb wrote:
> ...
> And historically speaking setting audit loginuid produces a LOGIN
> event, so it only makes sense to consider binding container ID to
> container as a CONTAINER event. For other supplemental records, we name
> things what they are: PATH, CWD, SOCKADDR, etc. So, CONTAINER_ID makes
> sense. CONTAINER_OP sounds like its for operations on a container. Do
> we have any operations on a container?

The answer has to be "no", because containers are, by emphatic assertion,
not kernel constructs. Any CONTAINER_OP event has to come from user space.
I think.

^ permalink raw reply

* Re: [PATCH] net/{ipv4,ipv6}: Do not put target net if input nsid is invalid
From: David Miller @ 2018-10-25 23:22 UTC (permalink / raw)
  To: bjorn; +Cc: netdev, lirongqing, dsahern
In-Reply-To: <20181025191825.23936-1-bjorn@mork.no>

From: Bjørn Mork <bjorn@mork.no>
Date: Thu, 25 Oct 2018 21:18:25 +0200

> The cleanup path will put the target net when netnsid is set.  So we must
> reset netnsid if the input is invalid.
> 
> Fixes: d7e38611b81e ("net/ipv4: Put target net when address dump fails due to bad attributes")
> Fixes: 242afaa6968c ("net/ipv6: Put target net when address dump fails due to bad attributes")
> Cc: David Ahern <dsahern@gmail.com>
> Signed-off-by: Bjørn Mork <bjorn@mork.no>

Applied, thank you.

^ permalink raw reply

* Re: [PATCH v1 net] lan743x: Remove SPI dependency from Microchip group.
From: David Miller @ 2018-10-25 23:21 UTC (permalink / raw)
  To: Bryan.Whitehead; +Cc: netdev, UNGLinuxDriver
In-Reply-To: <1540487378-18986-2-git-send-email-Bryan.Whitehead@microchip.com>

From: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Date: Thu, 25 Oct 2018 13:09:38 -0400

> The SPI dependency does not apply to lan743x driver, and other
> drivers in the group already state their dependence on SPI.
> 
> Signed-off-by: Bryan Whitehead <Bryan.Whitehead@microchip.com>

Yep, make sense.

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net] drivers: net: remove <net/busy_poll.h> inclusion when not needed
From: David Miller @ 2018-10-25 23:20 UTC (permalink / raw)
  To: edumazet; +Cc: netdev, eric.dumazet
In-Reply-To: <20181025134212.34029-1-edumazet@google.com>

From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Oct 2018 06:42:12 -0700

> Drivers using generic NAPI interface no longer need to include
> <net/busy_poll.h>, since busy polling was moved to core networking
> stack long ago.
> 
> See commit 79e7fff47b7b ("net: remove support for per driver
> ndo_busy_poll()") for reference.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied, thanks Eric.

^ permalink raw reply

* Re: [PATCH net] net: phy: genphy_10g_driver: Avoid NULL pointer dereference
From: David Miller @ 2018-10-25 23:19 UTC (permalink / raw)
  To: andrew; +Cc: netdev, f.fainelli, jose.abreu
In-Reply-To: <1540471358-6218-1-git-send-email-andrew@lunn.ch>

From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 25 Oct 2018 14:42:38 +0200

> This driver got missed during the recent change of .features from a
> u32 to a pointer to a Linux bitmap. Change the initialisation from 0
> to PHY_10GBIT_FEATURES so removing the danger of a NULL pointer
> dereference.
> 
> Fixes: 719655a14971 ("net: phy: Replace phy driver features u32 with link_mode bitmap")
> Reported-by: Jose Abreu <jose.abreu@synopsys.com>
> Signed-off-by: Andrew Lunn <andrew@lunn.ch>

Applied, thanks Andrew.

^ permalink raw reply

* Re: [PATCH net] r8169: fix broken Wake-on-LAN from S5 (poweroff)
From: David Miller @ 2018-10-25 23:16 UTC (permalink / raw)
  To: hkallweit1; +Cc: nic_swsd, netdev, neil
In-Reply-To: <6a3d4915-1d27-89b2-953a-66ec6d82b846@gmail.com>

From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 25 Oct 2018 18:40:19 +0200

> It was reported that WoL from S5 is broken (WoL from S3 works) and the
> analysis showed that during system shutdown the network interface was
> brought down already when the actual kernel shutdown started.
> Therefore netif_running() returned false and as a consequence the PHY
> was suspended. Obviously WoL wasn't working then.
> To fix this the original patch needs to be effectively reverted.
> A side effect is that when normally bringing down the interface and
> WoL is enabled the PHY will remain powered on (like it was before the
> original patch).
> 
> Fixes: fe87bef01f9b ("r8169: don't check WoL when powering down PHY and interface is down")
> Reported-by: Neil MacLeod <neil@nmacleod.com>
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>

Applied and queued up for -stable, thanks.

^ permalink raw reply

* KASAN: use-after-free Read in sk_psock_unlink
From: syzbot @ 2018-10-26  7:37 UTC (permalink / raw)
  To: daniel, davem, john.fastabend, linux-kernel, netdev,
	syzkaller-bugs

Hello,

syzbot found the following crash on:

HEAD commit:    8c60c36d0b8c Add linux-next specific files for 20181019
git tree:       linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=17356dbd400000
kernel config:  https://syzkaller.appspot.com/x/.config?x=8b6d7c4c81535e89
dashboard link: https://syzkaller.appspot.com/bug?extid=3acd9f67a6a15766686e
compiler:       gcc (GCC) 8.0.1 20180413 (experimental)

Unfortunately, I don't have any reproducer for this crash yet.

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+3acd9f67a6a15766686e@syzkaller.appspotmail.com

EXT4-fs (loop5): couldn't mount RDWR because of unsupported optional  
features (80)
EXT4-fs (loop5): couldn't mount RDWR because of unsupported optional  
features (80)
input: \x02 as /devices/virtual/input/input21
==================================================================
BUG: KASAN: use-after-free in sk_psock_unlink+0x4d8/0x700  
net/core/sock_map.c:992
Read of size 4 at addr ffff8801b7fc1018 by task syz-executor4/21409

CPU: 0 PID: 21409 Comm: syz-executor4 Not tainted 4.19.0-rc8-next-20181019+  
#98
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x244/0x39d lib/dump_stack.c:113
  print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412
  __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:432
  sk_psock_unlink+0x4d8/0x700 net/core/sock_map.c:992
  tcp_bpf_remove+0xd0/0x130 net/ipv4/tcp_bpf.c:511
  tcp_bpf_close+0x1c6/0x4a0 net/ipv4/tcp_bpf.c:551
  inet_release+0x104/0x1f0 net/ipv4/af_inet.c:428
  __sock_release+0xd7/0x250 net/socket.c:580
  sock_close+0x19/0x20 net/socket.c:1142
  __fput+0x3bc/0xa70 fs/file_table.c:279
  ____fput+0x15/0x20 fs/file_table.c:312
  task_work_run+0x1e8/0x2a0 kernel/task_work.c:113
  get_signal+0x1550/0x1970 kernel/signal.c:2347
  do_signal+0x9c/0x21c0 arch/x86/kernel/signal.c:816
  exit_to_usermode_loop+0x2e5/0x380 arch/x86/entry/common.c:162
  prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
  syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
  do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457569
Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7  
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff  
ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fd88965ec78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
RAX: 0000000000280000 RBX: 0000000000000006 RCX: 0000000000457569
RDX: fffffffffffffe6e RSI: 0000000020a88f88 RDI: 0000000000000003
RBP: 000000000072bf00 R08: 0000000020e68000 R09: 0000000000000010
R10: 0000000020000000 R11: 0000000000000246 R12: 00007fd88965f6d4
R13: 00000000004c3915 R14: 00000000004d57c0 R15: 00000000ffffffff

Allocated by task 21423:
  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
  set_track mm/kasan/kasan.c:460 [inline]
  kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553
  kmem_cache_alloc_trace+0x152/0x750 mm/slab.c:3620
  kmalloc include/linux/slab.h:546 [inline]
  kzalloc include/linux/slab.h:741 [inline]
  sock_hash_alloc+0x1eb/0x5a0 net/core/sock_map.c:801
  find_and_alloc_map kernel/bpf/syscall.c:129 [inline]
  map_create+0x3bd/0x1100 kernel/bpf/syscall.c:509
  __do_sys_bpf kernel/bpf/syscall.c:2394 [inline]
  __se_sys_bpf kernel/bpf/syscall.c:2371 [inline]
  __x64_sys_bpf+0x303/0x510 kernel/bpf/syscall.c:2371
  do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 10109:
  save_stack+0x43/0xd0 mm/kasan/kasan.c:448
  set_track mm/kasan/kasan.c:460 [inline]
  __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521
  kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
  __cache_free mm/slab.c:3498 [inline]
  kfree+0xcf/0x230 mm/slab.c:3817
  sock_hash_free+0x450/0x640 net/core/sock_map.c:864
  bpf_map_free_deferred+0xd9/0x110 kernel/bpf/syscall.c:290
  process_one_work+0xc8b/0x1c40 kernel/workqueue.c:2153
  worker_thread+0x17f/0x1390 kernel/workqueue.c:2296
  kthread+0x35a/0x440 kernel/kthread.c:246
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

The buggy address belongs to the object at ffff8801b7fc1000
  which belongs to the cache kmalloc-512 of size 512
The buggy address is located 24 bytes inside of
  512-byte region [ffff8801b7fc1000, ffff8801b7fc1200)
The buggy address belongs to the page:
page:ffffea0006dff040 count:1 mapcount:0 mapping:ffff8801da800940 index:0x0
flags: 0x2fffc0000000200(slab)
raw: 02fffc0000000200 ffffea0006ee5888 ffffea00071fc6c8 ffff8801da800940
raw: 0000000000000000 ffff8801b7fc1000 0000000100000006 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
  ffff8801b7fc0f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  ffff8801b7fc0f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
> ffff8801b7fc1000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                             ^
  ffff8801b7fc1080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  ffff8801b7fc1100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
==================================================================


---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller@googlegroups.com.

syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with  
syzbot.

^ permalink raw reply

* Re: [PATCH 3/3] usb: renesas_usbhs: Remove dummy runtime PM callbacks
From: Wolfram Sang @ 2018-10-25 22:57 UTC (permalink / raw)
  To: Jarkko Nikula, Yoshihiro Shimoda
  Cc: linux-pm, linux-i2c, Wolfram Sang, netdev, David S . Miller,
	Sergei Shtylyov, linux-renesas-soc, linux-usb, Yoshihiro Shimoda
In-Reply-To: <20181024135134.28456-4-jarkko.nikula@linux.intel.com>

[-- Attachment #1: Type: text/plain, Size: 829 bytes --]

On Wed, Oct 24, 2018 at 04:51:34PM +0300, Jarkko Nikula wrote:
> Platform drivers don't need dummy runtime PM callbacks that just return
> success in order to have runtime PM happening. This has changed since
> following commits:
> 
> 05aa55dddb9e ("PM / Runtime: Lenient generic runtime pm callbacks")
> 543f2503a956 ("PM / platform_bus: Allow runtime PM by default")
> 8b313a38ecff ("PM / Platform: Use generic runtime PM callbacks directly")
> 
> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>

Acked-by: Wolfram Sang <wsa+renesas@sang-engineering.com>

>  static const struct dev_pm_ops usbhsc_pm_ops = {
>  	.suspend		= usbhsc_suspend,
>  	.resume			= usbhsc_resume,

Unrelated to this patch, but I wonder right now: is there a reason not
to use SET_SYSTEM_SLEEP_PM_OPS here? Shimoda-san?


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH v2 bpf] bpf: devmap: fix wrong interface selection in notifier_call
From: Daniel Borkmann @ 2018-10-25 22:56 UTC (permalink / raw)
  To: Taehee Yoo, ast; +Cc: netdev, john.fastabend
In-Reply-To: <20181024111517.13361-1-ap420073@gmail.com>

On 10/24/2018 01:15 PM, Taehee Yoo wrote:
> The dev_map_notification() removes interface in devmap if
> unregistering interface's ifindex is same.
> But only checking ifindex is not enough because other netns can have
> same ifindex. so that wrong interface selection could occurred.
> Hence netdev pointer comparison code is added.
> 
> v2: compare netdev pointer instead of using net_eq() (Daniel Borkmann)
> v1: Initial patch
> 
> Fixes: 2ddf71e23cc2 ("net: add notifier hooks for devmap bpf map")
> Signed-off-by: Taehee Yoo <ap420073@gmail.com>

Applied to bpf, thanks Taehee!

^ permalink raw reply

* Re: [PATCH] selftests/bpf: add config fragments BPF_STREAM_PARSER and XDP_SOCKETS
From: Daniel Borkmann @ 2018-10-25 22:55 UTC (permalink / raw)
  To: Naresh Kamboju, netdev; +Cc: bhole_prashant_q7, davem
In-Reply-To: <1540478848-27827-1-git-send-email-naresh.kamboju@linaro.org>

On 10/25/2018 04:47 PM, Naresh Kamboju wrote:
> BPF sockmap and hashmap are dependent on CONFIG_BPF_STREAM_PARSER and
> xskmap is dependent on CONFIG_XDP_SOCKETS
> 
> Signed-off-by: Naresh Kamboju <naresh.kamboju@linaro.org>

Applied to bpf, thanks Naresh!

^ permalink raw reply

* Re: [PATCH 2/3] net: ethernet: Remove dummy runtime PM callbacks from Renesas drivers
From: Wolfram Sang @ 2018-10-25 22:54 UTC (permalink / raw)
  To: Jarkko Nikula
  Cc: linux-pm, linux-i2c, Wolfram Sang, netdev, David S . Miller,
	Sergei Shtylyov, linux-renesas-soc, linux-usb, Yoshihiro Shimoda
In-Reply-To: <20181024135134.28456-3-jarkko.nikula@linux.intel.com>

[-- Attachment #1: Type: text/plain, Size: 584 bytes --]

On Wed, Oct 24, 2018 at 04:51:33PM +0300, Jarkko Nikula wrote:
> Platform drivers don't need dummy runtime PM callbacks that just return
> success in order to have runtime PM happening. This has changed since
> following commits:
> 
> 05aa55dddb9e ("PM / Runtime: Lenient generic runtime pm callbacks")
> 543f2503a956 ("PM / platform_bus: Allow runtime PM by default")
> 8b313a38ecff ("PM / Platform: Use generic runtime PM callbacks directly")
> 
> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>

Acked-by: Wolfram Sang <wsa+renesas@sang-engineering.com>


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH net-next] net/ipv6: Block IPv6 addrconf on team ports
From: Jay Vosburgh @ 2018-10-25 22:40 UTC (permalink / raw)
  To: Chas Williams; +Cc: davem, netdev, vfalico, andy, jiri, kuznet, yoshfuji
In-Reply-To: <b8ae5ea8-985f-432c-e90a-7cd9b9da6009@gmail.com>

Chas Williams <3chas3@gmail.com> wrote:

>On 10/25/2018 05:59 PM, Jay Vosburgh wrote:
>> Chas Williams <3chas3@gmail.com> wrote:
>>
>>> netif_is_lag_port should be used to identify link aggregation ports.
>>> For this to work, we need to reorganize the bonding and team drivers
>>> so that the necessary flags are set before dev_open is called.
>>>
>>> commit 31e77c93e432 ("sched/fair: Update blocked load when newly idle")
>>> made this decision originally based on the IFF_SLAVE flag which isn't
>>> used by the team driver.  Note, we do need to retain the IFF_SLAVE
>>> check for the eql driver.
>>
>> 	Is 31e77c93e432 the correct commit reference?  I don't see
>> anything in there about IFF_SLAVE or bonding; it's a patch to the
>> process scheduler.
>
>No, that's wrong.  It should be c2edacf80e155.
>
>> 	And, as Jiri said, the subject doesn't mention bonding.
>
>The behavior of bonding wasn't changed.  The intent of the patch
>is to add team slaves to the interfaces that don't get automatic
>IPv6 addresses.  The body discusses why bonding had to change as
>well.

	Sure, but the bonding code has changed, and the current
presentation makes it harder for reviewers to follow (or perhaps even
notice).

>I was under the impression that the subject needs to kept short.
>If there a better way to phrase what I want to do?

	I'd suggest splitting this into three patches: A first patch
that adds the new IPv6 functionality, then one patch each for team and
bonding to take advantage of that new functionality.  Each of the three
would then be very straightforward, change just one thing, and should be
clearer all around.

	-J

>>> Signed-off-by: Chas Williams <3chas3@gmail.com>
>>> ---
>>> drivers/net/bonding/bond_main.c | 4 ++--
>>> drivers/net/team/team.c         | 7 +++++--
>>> net/ipv6/addrconf.c             | 2 +-
>>> 3 files changed, 8 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>> index ffa37adb7681..5cdad164332b 100644
>>> --- a/drivers/net/bonding/bond_main.c
>>> +++ b/drivers/net/bonding/bond_main.c
>>> @@ -1536,6 +1536,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>>>
>>> 	/* set slave flag before open to prevent IPv6 addrconf */
>>> 	slave_dev->flags |= IFF_SLAVE;
>>> +	slave_dev->priv_flags |= IFF_BONDING;
>>>
>>> 	/* open the slave since the application closed it */
>>> 	res = dev_open(slave_dev);
>>> @@ -1544,7 +1545,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>>> 		goto err_restore_mac;
>>> 	}
>>>
>>> -	slave_dev->priv_flags |= IFF_BONDING;
>>> 	/* initialize slave stats */
>>> 	dev_get_stats(new_slave->dev, &new_slave->slave_stats);
>>>
>>> @@ -1804,10 +1804,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>>> 	slave_disable_netpoll(new_slave);
>>>
>>> err_close:
>>> -	slave_dev->priv_flags &= ~IFF_BONDING;
>>> 	dev_close(slave_dev);
>>>
>>> err_restore_mac:
>>> +	slave_dev->priv_flags &= ~IFF_BONDING;
>>> 	slave_dev->flags &= ~IFF_SLAVE;
>>> 	if (!bond->params.fail_over_mac ||
>>> 	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
>>> diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
>>> index db633ae9f784..8fc7d57e9f6d 100644
>>> --- a/drivers/net/team/team.c
>>> +++ b/drivers/net/team/team.c
>>> @@ -1128,14 +1128,12 @@ static int team_upper_dev_link(struct team *team, struct team_port *port,
>>> 					   &lag_upper_info, extack);
>>> 	if (err)
>>> 		return err;
>>> -	port->dev->priv_flags |= IFF_TEAM_PORT;
>>> 	return 0;
>>> }
>>>
>>> static void team_upper_dev_unlink(struct team *team, struct team_port *port)
>>> {
>>> 	netdev_upper_dev_unlink(port->dev, team->dev);
>>> -	port->dev->priv_flags &= ~IFF_TEAM_PORT;
>>> }
>>>
>>> static void __team_port_change_port_added(struct team_port *port, bool linkup);
>>> @@ -1214,6 +1212,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
>>> 		goto err_port_enter;
>>> 	}
>>>
>>> +	/* set slave flag before open to prevent IPv6 addrconf */
>>> +	port->dev->priv_flags |= IFF_TEAM_PORT;
>>> +
>>> 	err = dev_open(port_dev);
>>> 	if (err) {
>>> 		netdev_dbg(dev, "Device %s opening failed\n",
>>> @@ -1292,6 +1293,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
>>> 	dev_close(port_dev);
>>>
>>> err_dev_open:
>>> +	port->dev->priv_flags &= ~IFF_TEAM_PORT;
>>> 	team_port_leave(team, port);
>>> 	team_port_set_orig_dev_addr(port);
>>>
>>> @@ -1328,6 +1330,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
>>> 	dev_uc_unsync(port_dev, dev);
>>> 	dev_mc_unsync(port_dev, dev);
>>> 	dev_close(port_dev);
>>> +	port->dev->priv_flags &= ~IFF_TEAM_PORT;
>>> 	team_port_leave(team, port);
>>>
>>> 	__team_option_inst_mark_removed_port(team, port);
>>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>>> index 45b84dd5c4eb..121f863022ed 100644
>>> --- a/net/ipv6/addrconf.c
>>> +++ b/net/ipv6/addrconf.c
>>> @@ -3482,7 +3482,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
>>>
>>> 	case NETDEV_UP:
>>> 	case NETDEV_CHANGE:
>>> -		if (dev->flags & IFF_SLAVE)
>>> +		if (netif_is_lag_port(dev) || dev->flags & IFF_SLAVE)
>>
>> 	Note that netvsc_vf_join() also uses IFF_SLAVE in order skip
>> IPv6 addrconf for netvsc devices; I don't believe its usage will pass
>> netif_is_lag_port().  It looks like the above will work, but your commit
>> message mentions eql as the reason for retaining the IFF_SLAVE test, and
>> eql isn't the only user of IFF_SLAVE in this manner.
>>
>> 	-J
>>
>>> 			break;
>>>
>>> 		if (idev && idev->cnf.disable_ipv6)
>>> -- 
>>> 2.14.4

---
	-Jay Vosburgh, jay.vosburgh@canonical.com

^ permalink raw reply

* RE: [PATCH] bonding:avoid repeated display of same link status change
From: Manish Kumar Singh @ 2018-10-26  6:49 UTC (permalink / raw)
  To: Michal Kubecek
  Cc: Eric Dumazet,
	Mahesh Bandewar (महेश बंडेवार),
	linux-netdev, Jay Vosburgh, Veaceslav Falico, Andy Gospodarek,
	David S. Miller, linux-kernel
In-Reply-To: <20181025092930.GC22291@unicorn.suse.cz>



> -----Original Message-----
> From: Michal Kubecek [mailto:mkubecek@suse.cz]
> Sent: 25 अक्तूबर 2018 14:59
> To: Manish Kumar Singh
> Cc: Eric Dumazet; Mahesh Bandewar (महेश बंडेवार); linux-netdev; Jay
> Vosburgh; Veaceslav Falico; Andy Gospodarek; David S. Miller; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH] bonding:avoid repeated display of same link status
> change
> 
> On Thu, Oct 25, 2018 at 02:21:05AM -0700, Manish Kumar Singh wrote:
> > > From: Michal Kubecek [mailto:mkubecek@suse.cz]
> > > IMHO it does not. AFAICS multiple instances of bond_mii_monitor()
> cannot
> > > run simultaneously for the same bond so that there doesn't seem to be
> > > anything to collide with. (And if they could, we would need to test and
> > > set the flag atomically in bond_miimon_inspect().)
> > >
> > Yes, Michal, we are inline with your understanding.
> > when the -original- patch was posted to upstream there was no
> > -synchronization- nor -racing- addressing code was in read/write of this
> > added filed, as we -never- saw need for either.
> >
> > -only- writer of the added field is bond_mii_monitor.
> > -only- reader of the added field is bond_miimon_inspect.
> > -this writer & reader -never- can run concurrently.
> > -writer invokes the reader.
> >
> > hence, imo uint_8 rtnl_needed is all what is needed; with
> bond_mii_monitor doing rtnl_needed = 1; and bond_miimon_inspect doing
> if rtnl_needed.
> >
> > here is the gravity of the situation with multiple customers whose names
> including machine names redacted:
> >
> >  4353 May 31 02:38:57 hostname kernel: ixgbe 0000:03:00.0: removed PHC
> on p2p1
> >  4354 May 31 02:38:57 hostname kernel: public: link status down for active
> interface p2p1, disabling it in 100 ms
> >  4355 May 31 02:38:57 hostname kernel: public: link status down for active
> interface p2p1, disabling it in 100 ms
> >  4356 May 31 02:38:57 hostname kernel: public: link status definitely down
> for interface p2p1, disabling it
> >  4357 May 31 02:38:57 hostname kernel: public: making interface p2p2 the
> new active one
> >  4358 May 31 02:38:59 hostname kernel: ixgbe 0000:03:00.0: registered PHC
> device on p2p1
> >  4359 May 31 02:39:00 hostname kernel: ixgbe 0000:03:00.0 p2p1: NIC Link is
> Up 10 Gbps, Flow Control: RX/TX
> >  4360 May 31 02:39:00 hostname kernel: public: link status up for interface
> p2p1, enabling it in 200 ms
> >  4361 May 31 02:39:00 hostname kernel: public: link status definitely up for
> interface p2p1, 10000 Mbps full duplex
> >  4362 May 31 02:45:37 hostname journal: Missed 217723 kernel messages
> >  4363 May 31 02:45:37 hostname kernel: public: link status down for active
> interface p2p2, disabling it in 100 ms
> >  	---------------------
> >                       11000+ APPROX SAME REPEATED MESSAGES in second
> > 	---------------------
> > 15877 May 31 02:45:37 hostname kernel: public: link status down for active
> interface p2p2, disabling it in 100 ms
> > 15878 May 31 02:45:37 hostname kernel: public: link status definitely down
> for interface p2p2, disabling it
> > 15879 May 31 02:45:37 hostname kernel: public: making interface p2p1 the
> new active one
> 
> When I was replying, I didn't know this was a v2 and I haven't seen the
> v1 discussion. I have read it since and I think I understand Eric's
> point now. The thing is that just adding e.g. u8 is OK as it is now.
> However, someone could later add another u8 next to it which would also
> be perfectly OK on its own but reads/writes to these two could collide
> between each other.
> 
> And as pointed out by a colleague, even having atomic_t and u8 flag in
> one 64-bit word could be a problem on architectures which cannot do an
> atomic read/write from/to a 32-bit word (sparc seems to be one).

Thanks Michal for explaining it, now we understand the problem what Eric was referring to in v1 of the patch.
I could think of fixing it in 3 ways, Please suggest which one would be safe and optimal fix:

1. Use type unit64_t for rtnl_needed .
2. Use type atomic64_t for rtnl_needed and atomic64_set/read.
3. Use type uint64_t for rtnl_needed with spinlock protection.

I think option 3 would be overkill keeping in mind the frequency of bond_mii_monitor.

Thanks,
Manish
> 
> Michal Kubecek

^ permalink raw reply

* Re: [PATCH net-next] net/ipv6: Block IPv6 addrconf on team ports
From: Chas Williams @ 2018-10-25 22:12 UTC (permalink / raw)
  To: Jay Vosburgh; +Cc: davem, netdev, vfalico, andy, jiri, kuznet, yoshfuji
In-Reply-To: <25151.1540504752@famine>



On 10/25/2018 05:59 PM, Jay Vosburgh wrote:
> Chas Williams <3chas3@gmail.com> wrote:
> 
>> netif_is_lag_port should be used to identify link aggregation ports.
>> For this to work, we need to reorganize the bonding and team drivers
>> so that the necessary flags are set before dev_open is called.
>>
>> commit 31e77c93e432 ("sched/fair: Update blocked load when newly idle")
>> made this decision originally based on the IFF_SLAVE flag which isn't
>> used by the team driver.  Note, we do need to retain the IFF_SLAVE
>> check for the eql driver.
> 
> 	Is 31e77c93e432 the correct commit reference?  I don't see
> anything in there about IFF_SLAVE or bonding; it's a patch to the
> process scheduler.

No, that's wrong.  It should be c2edacf80e155.

> 	And, as Jiri said, the subject doesn't mention bonding.

The behavior of bonding wasn't changed.  The intent of the patch
is to add team slaves to the interfaces that don't get automatic
IPv6 addresses.  The body discusses why bonding had to change as
well.

I was under the impression that the subject needs to kept short.
If there a better way to phrase what I want to do?

> 
>> Signed-off-by: Chas Williams <3chas3@gmail.com>
>> ---
>> drivers/net/bonding/bond_main.c | 4 ++--
>> drivers/net/team/team.c         | 7 +++++--
>> net/ipv6/addrconf.c             | 2 +-
>> 3 files changed, 8 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index ffa37adb7681..5cdad164332b 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -1536,6 +1536,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>>
>> 	/* set slave flag before open to prevent IPv6 addrconf */
>> 	slave_dev->flags |= IFF_SLAVE;
>> +	slave_dev->priv_flags |= IFF_BONDING;
>>
>> 	/* open the slave since the application closed it */
>> 	res = dev_open(slave_dev);
>> @@ -1544,7 +1545,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>> 		goto err_restore_mac;
>> 	}
>>
>> -	slave_dev->priv_flags |= IFF_BONDING;
>> 	/* initialize slave stats */
>> 	dev_get_stats(new_slave->dev, &new_slave->slave_stats);
>>
>> @@ -1804,10 +1804,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
>> 	slave_disable_netpoll(new_slave);
>>
>> err_close:
>> -	slave_dev->priv_flags &= ~IFF_BONDING;
>> 	dev_close(slave_dev);
>>
>> err_restore_mac:
>> +	slave_dev->priv_flags &= ~IFF_BONDING;
>> 	slave_dev->flags &= ~IFF_SLAVE;
>> 	if (!bond->params.fail_over_mac ||
>> 	    BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
>> diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
>> index db633ae9f784..8fc7d57e9f6d 100644
>> --- a/drivers/net/team/team.c
>> +++ b/drivers/net/team/team.c
>> @@ -1128,14 +1128,12 @@ static int team_upper_dev_link(struct team *team, struct team_port *port,
>> 					   &lag_upper_info, extack);
>> 	if (err)
>> 		return err;
>> -	port->dev->priv_flags |= IFF_TEAM_PORT;
>> 	return 0;
>> }
>>
>> static void team_upper_dev_unlink(struct team *team, struct team_port *port)
>> {
>> 	netdev_upper_dev_unlink(port->dev, team->dev);
>> -	port->dev->priv_flags &= ~IFF_TEAM_PORT;
>> }
>>
>> static void __team_port_change_port_added(struct team_port *port, bool linkup);
>> @@ -1214,6 +1212,9 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
>> 		goto err_port_enter;
>> 	}
>>
>> +	/* set slave flag before open to prevent IPv6 addrconf */
>> +	port->dev->priv_flags |= IFF_TEAM_PORT;
>> +
>> 	err = dev_open(port_dev);
>> 	if (err) {
>> 		netdev_dbg(dev, "Device %s opening failed\n",
>> @@ -1292,6 +1293,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
>> 	dev_close(port_dev);
>>
>> err_dev_open:
>> +	port->dev->priv_flags &= ~IFF_TEAM_PORT;
>> 	team_port_leave(team, port);
>> 	team_port_set_orig_dev_addr(port);
>>
>> @@ -1328,6 +1330,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
>> 	dev_uc_unsync(port_dev, dev);
>> 	dev_mc_unsync(port_dev, dev);
>> 	dev_close(port_dev);
>> +	port->dev->priv_flags &= ~IFF_TEAM_PORT;
>> 	team_port_leave(team, port);
>>
>> 	__team_option_inst_mark_removed_port(team, port);
>> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
>> index 45b84dd5c4eb..121f863022ed 100644
>> --- a/net/ipv6/addrconf.c
>> +++ b/net/ipv6/addrconf.c
>> @@ -3482,7 +3482,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
>>
>> 	case NETDEV_UP:
>> 	case NETDEV_CHANGE:
>> -		if (dev->flags & IFF_SLAVE)
>> +		if (netif_is_lag_port(dev) || dev->flags & IFF_SLAVE)
> 
> 	Note that netvsc_vf_join() also uses IFF_SLAVE in order skip
> IPv6 addrconf for netvsc devices; I don't believe its usage will pass
> netif_is_lag_port().  It looks like the above will work, but your commit
> message mentions eql as the reason for retaining the IFF_SLAVE test, and
> eql isn't the only user of IFF_SLAVE in this manner.
> 
> 	-J
> 
>> 			break;
>>
>> 		if (idev && idev->cnf.disable_ipv6)
>> -- 
>> 2.14.4
>>
> 
> ---
> 	-Jay Vosburgh, jay.vosburgh@canonical.com
> 

^ permalink raw reply

* Re: [PATCH net-next] net/ipv6: Block IPv6 addrconf on team ports
From: Chas Williams @ 2018-10-25 22:08 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, netdev, j.vosburgh, vfalico, andy, kuznet, yoshfuji
In-Reply-To: <20181025211008.GA2229@nanopsycho.orion>



On 10/25/2018 05:10 PM, Jiri Pirko wrote:
> Thu, Oct 25, 2018 at 11:02:27PM CEST, 3chas3@gmail.com wrote:
>> netif_is_lag_port should be used to identify link aggregation ports.
>> For this to work, we need to reorganize the bonding and team drivers
>> so that the necessary flags are set before dev_open is called.
>>
>> commit 31e77c93e432 ("sched/fair: Update blocked load when newly idle")
>> made this decision originally based on the IFF_SLAVE flag which isn't
>> used by the team driver.  Note, we do need to retain the IFF_SLAVE
>> check for the eql driver.
>>
>> Signed-off-by: Chas Williams <3chas3@gmail.com>
>> ---
>> drivers/net/bonding/bond_main.c | 4 ++--
>> drivers/net/team/team.c         | 7 +++++--
>> net/ipv6/addrconf.c             | 2 +-
> 
> Subject talks about "team" yet you modify bond and team. Confusing..

The subject discusses what I want to do. The body of the message
covers how I had to do it. The behavior of bonding with respect to
addrconf isn't changed but netif_is_lag_port is picky about the
flags it wants to see from bonding.  So some bonding changes are
necessary.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox