* [PATCH net-next 4/4] ARM64: dts: marvell: Add network support for Armada 3700
From: Gregory CLEMENT @ 2016-11-22 16:48 UTC (permalink / raw)
To: David S. Miller, linux-kernel, netdev
Cc: Thomas Petazzoni, Andrew Lunn, Jason Cooper, Gregory CLEMENT,
linux-arm-kernel, Sebastian Hesselbarth
In-Reply-To: <20161122164844.19566-1-gregory.clement@free-electrons.com>
Add neta nodes for network support both in device tree for the SoC and
the board.
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
arch/arm64/boot/dts/marvell/armada-3720-db.dts | 23 +++++++++++++++++++++++
arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 23 +++++++++++++++++++++++
2 files changed, 46 insertions(+)
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
index 1372e9a6aaa4..c8b82e4145de 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts
@@ -81,3 +81,26 @@
&pcie0 {
status = "okay";
};
+
+&mdio {
+ status = "okay";
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ };
+};
+
+ð0 {
+ phy-mode = "rgmii-id";
+ phy = <&phy0>;
+ status = "okay";
+};
+
+ð1 {
+ phy-mode = "rgmii-id";
+ phy = <&phy1>;
+ status = "okay";
+};
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index c4762538ec01..a7278ce9e523 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -140,6 +140,29 @@
};
};
+ eth0: ethernet@30000 {
+ compatible = "marvell,armada-3700-neta";
+ reg = <0x30000 0x4000>;
+ interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&sb_periph_clk 8>;
+ status = "disabled";
+ };
+
+ mdio: mdio@32004 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "marvell,orion-mdio";
+ reg = <0x32004 0x4>;
+ };
+
+ eth1: ethernet@40000 {
+ compatible = "marvell,armada-3700-neta";
+ reg = <0x40000 0x4000>;
+ interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&sb_periph_clk 7>;
+ status = "disabled";
+ };
+
usb3: usb@58000 {
compatible = "marvell,armada3700-xhci",
"generic-xhci";
--
2.10.2
^ permalink raw reply related
* Re: [RFC net-next 2/3] net: dsa: Propagate VLAN add/del to CPU port(s)
From: Vivien Didelot @ 2016-11-22 16:50 UTC (permalink / raw)
To: Florian Fainelli, netdev
Cc: idosch, andrew, Florian Fainelli, bridge, jiri, davem
In-Reply-To: <20161121190925.14530-3-f.fainelli@gmail.com>
Hi Florian,
Open question: will we need to do the same for FDB and MDB objects?
Florian Fainelli <f.fainelli@gmail.com> writes:
> Now that the bridge layer can call into switchdev to signal programming
> requests targeting the bridge master device itself, allow the switch
> drivers to implement separate programming of downstream and
> upstream/management ports.
>
> Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
> ---
> net/dsa/slave.c | 45 +++++++++++++++++++++++++++++++++------------
> 1 file changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index d0c7bce88743..18288261b964 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -223,35 +223,30 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
> return 0;
> }
>
> -static int dsa_slave_port_vlan_add(struct net_device *dev,
> +static int dsa_slave_port_vlan_add(struct dsa_switch *ds, int port,
> const struct switchdev_obj_port_vlan *vlan,
> struct switchdev_trans *trans)
> {
> - struct dsa_slave_priv *p = netdev_priv(dev);
> - struct dsa_switch *ds = p->parent;
>
Extra newline ^.
> if (switchdev_trans_ph_prepare(trans)) {
> if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
> return -EOPNOTSUPP;
>
> - return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans);
> + return ds->ops->port_vlan_prepare(ds, port, vlan, trans);
> }
>
> - ds->ops->port_vlan_add(ds, p->port, vlan, trans);
> + ds->ops->port_vlan_add(ds, port, vlan, trans);
>
> return 0;
> }
>
> -static int dsa_slave_port_vlan_del(struct net_device *dev,
> +static int dsa_slave_port_vlan_del(struct dsa_switch *ds, int port,
> const struct switchdev_obj_port_vlan *vlan)
> {
> - struct dsa_slave_priv *p = netdev_priv(dev);
> - struct dsa_switch *ds = p->parent;
> -
> if (!ds->ops->port_vlan_del)
> return -EOPNOTSUPP;
>
> - return ds->ops->port_vlan_del(ds, p->port, vlan);
> + return ds->ops->port_vlan_del(ds, port, vlan);
> }
>
> static int dsa_slave_port_vlan_dump(struct net_device *dev,
> @@ -465,8 +460,21 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
> const struct switchdev_obj *obj,
> struct switchdev_trans *trans)
> {
> + struct dsa_slave_priv *p = netdev_priv(dev);
> + struct dsa_switch *ds = p->parent;
> + int port = p->port;
> int err;
>
> + /* Here we may be called with an orig_dev which is different from dev,
> + * on purpose, to receive request coming from e.g the bridge master
> + * device. Although there are no network device associated with CPU/DSA
> + * ports, we may still have programming operation for these ports.
> + */
> + if (obj->orig_dev == p->bridge_dev) {
> + ds = ds->dst->ds[0];
> + port = ds->dst->cpu_port;
> + }
> +
> /* For the prepare phase, ensure the full set of changes is feasable in
> * one go in order to signal a failure properly. If an operation is not
> * supported, return -EOPNOTSUPP.
> @@ -483,7 +491,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
> trans);
> break;
> case SWITCHDEV_OBJ_ID_PORT_VLAN:
> - err = dsa_slave_port_vlan_add(dev,
> + err = dsa_slave_port_vlan_add(ds, port,
> SWITCHDEV_OBJ_PORT_VLAN(obj),
> trans);
Note that dsa_slave_port_vlan_add() will be called N times, N being the
number of bridge ports. This is not an issue for the moment though.
Programming it only once requires caching, so leave it for an eventual
future patch.
When issuing the following command (lan0 being a member of br0):
# bridge vlan add vid 42 dev lan0
the CPU port is also programmed as tagged in VLAN 42. Is that expected?
Thanks,
Vivien
^ permalink raw reply
* Re: [RFC 02/10] IB/hfi-vnic: Virtual Network Interface Controller (VNIC) Bus driver
From: Jason Gunthorpe @ 2016-11-22 17:04 UTC (permalink / raw)
To: Vishwanathapura, Niranjana
Cc: Doug Ledford, linux-rdma, netdev, Dennis Dalessandro
In-Reply-To: <20161122015304.GB67988@knc-06.sc.intel.com>
On Mon, Nov 21, 2016 at 05:53:04PM -0800, Vishwanathapura, Niranjana wrote:
> There are many example drivers in kernel which are using bus_register() in
> an initcall.
There really are not, certainly not in major subsystems.
> We could add a custom Interface between HFI1 driver and hfi_vnic drivers
> without involving a bus.
hfi is already registering on the infiniband class, just use that.
> But using the existing bus model gave a lot of in-built flexibility in
> decoupling devices from the drivers.
If you want to have your own bus then you need your own hfi
subsystem. drivers/infiniband is not a dumping ground..
Jason
^ permalink raw reply
* [PATCH v2] net: dsa: mv88e6xxx: add MV88E6097 switch
From: Stefan Eichenberger @ 2016-11-22 16:47 UTC (permalink / raw)
To: andrew; +Cc: vivien.didelot, f.fainelli, netdev, Stefan Eichenberger
In-Reply-To: <20161122150754.GF2691@lunn.ch>
Add support for the MV88E6097 switch. The change was tested on an Armada
based platform with a MV88E6097 switch.
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@netmodule.com>
---
drivers/net/dsa/mv88e6xxx/chip.c | 26 ++++++++++++++++++++++++++
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h | 2 ++
2 files changed, 28 insertions(+)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 48b58c7..2d5941c 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3208,6 +3208,19 @@ static const struct mv88e6xxx_ops mv88e6095_ops = {
.stats_get_stats = mv88e6095_stats_get_stats,
};
+static const struct mv88e6xxx_ops mv88e6097_ops = {
+ .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+ .phy_read = mv88e6xxx_g2_smi_phy_read,
+ .phy_write = mv88e6xxx_g2_smi_phy_write,
+ .port_set_link = mv88e6xxx_port_set_link,
+ .port_set_duplex = mv88e6xxx_port_set_duplex,
+ .port_set_speed = mv88e6185_port_set_speed,
+ .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+ .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+ .stats_get_strings = mv88e6095_stats_get_strings,
+ .stats_get_stats = mv88e6095_stats_get_stats,
+};
+
static const struct mv88e6xxx_ops mv88e6123_ops = {
/* MV88E6XXX_FAMILY_6165 */
.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
@@ -3579,6 +3592,19 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
.ops = &mv88e6095_ops,
},
+ [MV88E6097] = {
+ .prod_num = PORT_SWITCH_ID_PROD_NUM_6097,
+ .family = MV88E6XXX_FAMILY_6097,
+ .name = "Marvell 88E6097/88E6097F",
+ .num_databases = 4096,
+ .num_ports = 11,
+ .port_base_addr = 0x10,
+ .global1_addr = 0x1b,
+ .age_time_coeff = 15000,
+ .flags = MV88E6XXX_FLAGS_FAMILY_6097,
+ .ops = &mv88e6097_ops,
+ },
+
[MV88E6123] = {
.prod_num = PORT_SWITCH_ID_PROD_NUM_6123,
.family = MV88E6XXX_FAMILY_6165,
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 9298faa..ab52c37 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -81,6 +81,7 @@
#define PORT_SWITCH_ID 0x03
#define PORT_SWITCH_ID_PROD_NUM_6085 0x04a
#define PORT_SWITCH_ID_PROD_NUM_6095 0x095
+#define PORT_SWITCH_ID_PROD_NUM_6097 0x099
#define PORT_SWITCH_ID_PROD_NUM_6131 0x106
#define PORT_SWITCH_ID_PROD_NUM_6320 0x115
#define PORT_SWITCH_ID_PROD_NUM_6123 0x121
@@ -378,6 +379,7 @@
enum mv88e6xxx_model {
MV88E6085,
MV88E6095,
+ MV88E6097,
MV88E6123,
MV88E6131,
MV88E6161,
--
2.9.3
^ permalink raw reply related
* Re: wl1251 & mac address & calibration data
From: Pali Rohár @ 2016-11-22 17:05 UTC (permalink / raw)
To: Michal Kazior
Cc: Kalle Valo, Pavel Machek, Ivaylo Dimitrov, Sebastian Reichel,
Aaro Koskinen, Tony Lindgren, linux-wireless, Network Development,
linux-kernel
In-Reply-To: <CA+BoTQkSOJ85PiT=pjmH234sviCj-LVWWM=64KbCe4AG9CMNUg@mail.gmail.com>
[-- Attachment #1: Type: Text/Plain, Size: 4860 bytes --]
On Tuesday 22 November 2016 17:14:28 Michal Kazior wrote:
> On 22 November 2016 at 16:31, Pali Rohár <pali.rohar@gmail.com> wrote:
> > On Tuesday 22 November 2016 16:22:57 Michal Kazior wrote:
> >> On 21 November 2016 at 16:51, Pali Rohár <pali.rohar@gmail.com>
> >> wrote:
> >> > On Friday 11 November 2016 18:20:50 Pali Rohár wrote:
> >> >> Hi! I will open discussion about mac address and calibration
> >> >> data for wl1251 wireless chip again...
> >> >>
> >> >> Problem: Mac address & calibration data for wl1251 chip on
> >> >> Nokia N900 are stored on second nand partition (mtd1) in
> >> >> special proprietary format which is used only for Nokia N900
> >> >> (probably on N8x0 and N9 too). Wireless driver wl1251.ko
> >> >> cannot work without mac address and calibration data.
> >>
> >> Same problem applies to some ath9k/ath10k supported routers. Some
> >> even carry mac address as implicit offset from ethernet mac
> >> address. As far as I understand OpenWRT cooks cal blobs on first
> >> boot prior to loading modules.
> >
> > So... wl1251 on Nokia N900 is not alone and this problem is there
> > for more drivers and devices. Which means we should come up with
> > some generic solution.
>
> This isn't particularly a problem for ath9k/ath10k.
>
> Let me give you more background on ath10k.
>
> ath10k devices can come with caldata and macaddr stored in their
> OTP/EEPROM. In that case a generic "template" board file is used.
> Userspace doesn't need to do anything special.
>
> Some vendors however decide to use flash partition to store caldata.
> In that case ath10k expects userspace to prepare
> cal-$bus-$devname.bin files, each for a different radio (you can
> have multiple radios on a system).
>
> Now translating this for wl1251 I would expect it should also use
> something like wl1251-nvs-sdio-0x0001.bin for devices like N900 that
> have caldata on flash partition (instead of the generic
> wl1251-nvs.bin). I'm not sure if wl1251-nvs.bin is something
> comparable to (the generic) board.bin ath10k has though. Maybe the
> entire idea behind wl1251-nvs.bin is flawed as it's supposed to be
> device specific and is oblivious to possibility of having multiple
> wl1251 radios on one system (probably sane assumption from practical
> standpoint but still).
Basically nvs data are device specific, in ideal case they should be
generated in factory by some calibration process (or so).
> >> >> Absence of mac address cause that driver generates random mac
> >> >> address at every kernel boot which has couple of problems
> >> >> (unstable identifier of wireless device due to udev permanent
> >> >> storage rules; unpredictable behaviour for dhcp mac address
> >> >> assignment, mac address filtering, ...).
> >> >>
> >> >> Currently there is no way to set (permanent) mac address for
> >> >> network interface from userspace. And it does not make sense
> >> >> to implement in linux kernel large parser for proprietary
> >> >> format of second nand partition where is mac address stored
> >> >> only for one device -- Nokia N900.
> >> >>
> >> >> Driver wl1251.ko loads calibration data via request_firmware()
> >> >> for file wl1251-nvs.bin. There are some "example" calibration
> >> >> file in linux- firmware repository, but it is not suitable for
> >> >> normal usage as real calibration data are per-device specific.
> >>
> >> You could hook up a script that cooks up the cal/mac file via
> >> modprobe's install hook, no?
> >
> > Via modprobe hook I can either pass custom module parameter or call
> > any other system (shell) commands.
> >
> > As wl1251.ko does not accept mac_address as module parameter, such
> > modprobe hook does not help -- as there is absolutely no way from
> > userspace to set or change (permanent) mac address.
>
> Quoting modprobe.d manual:
> > install modulename command...
> >
> > This command instructs modprobe to run your
> > command instead of inserting the module in the
> > kernel as normal. The command can be any shell
> > command: this allows you to do any kind of
> > complex processing you might wish. [...]
I know. But this do not allow me to send mac address to kernel -- as
kernel does not support such command yet (reason for my first question).
> You can hook up a script that cooks up wl1251-nvs.bin (caldata,
> macaddr) and then insmod the actual wl1251.ko module. Or you can just
> cook up the nvs on first device boot and store it in /lib/firmware
> (possibly overwriting the "generic" wl1251 from linux-firmware).
This is what I would like to prevent -- overwriting (possible readonly)
system files with some device specific. It is really bad idea!
--
Pali Rohár
pali.rohar@gmail.com
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply
* [PATCH net] udplite: call proper backlog handlers
From: Eric Dumazet @ 2016-11-22 17:06 UTC (permalink / raw)
To: Eric Dumazet, Herbert Xu, Benjamin LaHaise, Willem de Bruijn
Cc: Andrey Konovalov, samanthakumar, David S. Miller, netdev, LKML
In-Reply-To: <CANn89iKewy1iMa=dUGpE=8UjHUi4o=s=6P_-ngLr_S_ZDByTwg@mail.gmail.com>
From: Eric Dumazet <edumazet@google.com>
In commits 93821778def10 ("udp: Fix rcv socket locking") and
f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into
__udpv6_queue_rcv_skb") UDP backlog handlers were renamed, but UDPlite
was forgotten.
This leads to crashes if UDPlite header is pulled twice, which happens
starting from commit e6afc8ace6dd ("udp: remove headers from UDP packets
before queueing")
Bug found by syzkaller team, thanks a lot guys !
Note that backlog use in UDP/UDPlite is scheduled to be removed starting
from linux-4.10, so this patch is only needed up to linux-4.9
Fixes: 93821778def1 ("udp: Fix rcv socket locking")
Fixes: f7ad74fef3af ("net/ipv6/udp: UDP encapsulation: break backlog_rcv into __udpv6_queue_rcv_skb")
Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
---
net/ipv4/udp.c | 2 +-
net/ipv4/udp_impl.h | 2 +-
net/ipv4/udplite.c | 2 +-
net/ipv6/udp.c | 2 +-
net/ipv6/udp_impl.h | 2 +-
net/ipv6/udplite.c | 2 +-
6 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0de9d5d2b9ae..5bab6c3f7a2f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1455,7 +1455,7 @@ static void udp_v4_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 7e0fe4bdd967..feb50a16398d 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index af817158d830..ff450c2aad9b 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -50,7 +50,7 @@ struct proto udplite_prot = {
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
- .backlog_rcv = udp_queue_rcv_skb,
+ .backlog_rcv = __udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e5056d4873d1..e4a8000d59ad 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -514,7 +514,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
}
-static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
int rc;
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index f6eb1ab34f4b..e78bdc76dcc3 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
int flags, int *addr_len);
-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
void udpv6_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 47d0d2b87106..2f5101a12283 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -45,7 +45,7 @@ struct proto udplitev6_prot = {
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
- .backlog_rcv = udpv6_queue_rcv_skb,
+ .backlog_rcv = __udpv6_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v6_get_port,
^ permalink raw reply related
* Re: [PATCHv2 net-next 00/11] Start adding support for mv88e6390
From: Vivien Didelot @ 2016-11-22 17:21 UTC (permalink / raw)
To: Andrew Lunn, David Miller; +Cc: netdev, Andrew Lunn
In-Reply-To: <1479767225-13789-1-git-send-email-andrew@lunn.ch>
Hi,
Andrew Lunn <andrew@lunn.ch> writes:
> This is the first patchset implementing support for the mv88e6390
> family. This is a new generation of switch devices and has numerous
> incompatible changes to the registers. These patches allow the switch
> to the detected during probe, and makes the statistics unit work.
>
> These patches are insufficient to make the mv88e6390 functional. More
> patches will follow.
>
> v2:
> Move stats code into global1
> Change DT compatible string to mv88e6190
> Fixed mv88e6351 stats which v1 had broken
Thanks Andrew!
For what it's worth:
Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Vivien
^ permalink raw reply
* Re: [PATCH v9 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking
From: Simon Guinot @ 2016-11-22 17:28 UTC (permalink / raw)
To: Levy, Amir (Jer)
Cc: gregkh@linuxfoundation.org, andreas.noever@gmail.com,
bhelgaas@google.com, corbet@lwn.net, linux-kernel@vger.kernel.org,
linux-pci@vger.kernel.org, netdev@vger.kernel.org,
linux-doc@vger.kernel.org, mario_limonciello@dell.com,
thunderbolt-linux, Westerberg, Mika, Winkler, Tomas,
Zhang, Xiong Y, Jamet, Michael, remi.rerolle@seagate.com
In-Reply-To: <20161118112007.GB1230@kw.sim.vm.gnt>
[-- Attachment #1: Type: text/plain, Size: 5819 bytes --]
On Fri, Nov 18, 2016 at 12:20:07PM +0100, Simon Guinot wrote:
> On Fri, Nov 18, 2016 at 08:48:36AM +0000, Levy, Amir (Jer) wrote:
> > On Tue, Nov 15 2016, 12:59 PM, Simon Guinot wrote:
> > > On Wed, Nov 09, 2016 at 03:42:53PM +0000, Levy, Amir (Jer) wrote:
> > > > On Wed, Nov 9 2016, 04:36 PM, Simon Guinot wrote:
> > > > > Hi Amir,
> > > > >
> > > > > I have an ASUS "All Series/Z87-DELUXE/QUAD" motherboard with a
> > > > > Thunderbolt 2 "Falcon Ridge" chipset (device ID 156d).
> > > > >
> > > > > Is the thunderbolt-icm driver supposed to work with this chipset ?
> > > > >
> > > >
> > > > Yes, the thunderbolt-icm supports Falcon Ridge, device ID 156c.
> > > > 156d is the bridge -
> > > > http://lxr.free-electrons.com/source/include/linux/pci_ids.h#L2619
> > > >
> > > > > I have installed both a 4.8.6 Linux kernel (patched with your v9
> > > > > series) and the thunderbolt-software-daemon (27 october release)
> > > > > inside a Debian system (Jessie).
> > > > >
> > > > > If I connect the ASUS motherboard with a MacBook Pro (Thunderbolt
> > > > > 2, device ID 156c), I can see that the thunderbolt-icm driver is
> > > > > loaded and that the thunderbolt-software-daemon is well started.
> > > > > But the Ethernet interface is not created.
> > > > >
> > > > > I have attached to this email the syslog file. There is the logs
> > > > > from both the kernel and the daemon inside. Note that the daemon
> > > > > logs are everything but clear about what could be the issue. Maybe
> > > > > I missed some kind of configuration ? But I failed to find any
> > > > > valuable information about configuring the driver and/or the
> > > > > daemon in
> > > the various documentation files.
> > > > >
> > > > > Please, can you provide some guidance ? I'd really like to test
> > > > > your patch series.
> > > >
> > > > First, thank you very much for willing to test it.
> > > > Thunderbolt Networking support was added during Falcon Ridge, in the
> > > latest FR images.
> > > > Do you know which Thunderbolt image version you have on your system?
> > > > Currently I submitted only Thunderbolt Networking feature in Linux,
> > > > and we plan to add more features like reading the image version and
> > > updating the image.
> > > > If you don't know the image version, the only thing I can suggest is
> > > > to load windows, install thunderbolt SW and check in the Thunderbolt
> > > application the image version.
> > > > To know if image update is needed, you can check -
> > > > https://thunderbolttechnology.net/updates
> > >
> > > Hi Amir,
> > >
> > > From the Windows Thunderbolt software, I can read 13.00 for the
> > > firmware version. And from https://thunderbolttechnology.net/updates,
> > > I can see that there is no update available for my ASUS motherboard.
> > >
> > > Am I good to go ?
> > >
> >
> > Thunderbolt Networking is supported on both Thunderbolt(tm) 2 and Thunderbolt(tm) 3 systems.
> > Thunderbolt 2 systems must have updated NVM (version 25 or later) in order for the functionality to work properly.
> > If the system does not have the update, please contact the OEM directly for an updated NVM.
> > For best functionality and support, Intel recommends using Thunderbolt 3 systems for all validation and testing.
>
> Maybe it is worth mentioning in the documentation and/or in the Kconfig
> help message that a minimal firmware version is needed for Thunderbolt 2
> controllers.
>
> It would have saved some time for me :)
>
> >
> > > BTW, it is quite a shame that the Thunderbolt firmware version can't
> > > be read from Linux.
> > >
> >
> > This is WIP, once this patch will be upstream, we will be able to focus more
> > on aligning Linux with the Thunderbolt features that we have for windows.
>
> Well, I rather see the firmware identification and update as basic
> features on the top of which ones you can build a driver. For example in
> this case this would allow the ICM driver and/or the userland daemon to
> exit with a useful error message rather than just not working without any
> explanation.
>
> Next week I'll try the driver with a Thunderbolt 3 controller.
Hi Amir,
I tested the thunderbolt-icm driver (v9 series) on an Gigabyte
motherboard (Z170X-UD5 TH-CF) with a Thunderbolt 3 controller (Alpine
Ridge 4C).
I can see that the network interface is well created when the
motherboard is connected to a MacBook Pro (Thunderbolt 2 or 3).
And here are the TCP bandwidths measured using the iperf3 benchmark:
- MacBook Pro Thunderbolt 2: 8.46Gbits/sec
- MacBook Pro Thunderbolt 3: 11.8Gbits/sec
Are this results consistent with your expectations ?
From the MacOS system interface on the MacBook Pro Thunderbolt 3,
I noticed that the interface appears as dual lane (2x 20Gb/sec). But
when two MacBook Pro are connected together, the interface appears as
single lane (1x 40Gb/sec). Is some lane bonding support missing in the
Linux implementation ?
Here are a couple of additional questions:
- When the network interface is created, there is no IP address
assigned (or negotiated ?) on the Linux side. But it is done on the
MacOS side. And in the Linux kernel logs I can also read the message:
"ready for ThunderboltIP negotiation". Is there something missing or
not working on the Linux side ? What is the correct way to configure
or negotiate the IP address. For my tests I did it manually...
- When the Linux machine is started with the Thunderbolt wire already
connected to a MacBook Pro, sometimes (but not every time) the
network interface is not created. The Thunderbolt wire needs to be
replugged.
FWIW you get my
Tested-by: Simon Guinot <simon.guinot@sequanux.org>
Simon
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 181 bytes --]
^ permalink raw reply
* Re: net/can: use-after-free in bcm_rx_thr_flush
From: Oliver Hartkopp @ 2016-11-22 17:29 UTC (permalink / raw)
To: Andrey Konovalov, Marc Kleine-Budde, David S. Miller, linux-can,
netdev, LKML
Cc: Dmitry Vyukov, Alexander Potapenko, Kostya Serebryany,
Eric Dumazet, syzkaller
In-Reply-To: <CAAeHK+xhwpVO=xHqEOyP-Tm6cx1+jDLDpYT0hpMwM2PCf7W3Jw@mail.gmail.com>
Hi Andrey,
thanks for the report.
Although I can't see the issue in the code ...
On 11/22/2016 10:22 AM, Andrey Konovalov wrote:
> ==================================================================
> BUG: KASAN: use-after-free in bcm_rx_thr_flush+0x284/0x2b0
> Read of size 1 at addr ffff88006c1faae5 by task a.out/3874
>
> page:ffffea0001b07e80 count:1 mapcount:0 mapping: (null) index:0x0
> flags: 0x100000000000080(slab)
> page dumped because: kasan: bad access detected
(..)
>
> The buggy address belongs to the object at ffff88006c1faae0
> which belongs to the cache kmalloc-32 of size 32
???
> The buggy address ffff88006c1faae5 is located 5 bytes inside
> of 32-byte region [ffff88006c1faae0, ffff88006c1fab00)
(..)
> Memory state around the buggy address:
> ffff88006c1fa980: fc fc fb fb fb fb fc fc fb fb fb fb fc fc fb fb
> ffff88006c1faa00: fb fb fc fc fb fb fb fb fc fc fb fb fb fb fc fc
>> ffff88006c1faa80: fb fb fb fb fc fc fb fb fb fb fc fc fb fb fb fb
> ^
> ffff88006c1fab00: fc fc fb fb fb fb fc fc 00 00 00 00 fc fc 00 00
> ffff88006c1fab80: 00 00 fc fc fb fb fb fb fc fc fb fb fb fb fc fc
> ==================================================================
(should be some zero initialized memory here)
The relevant code of bcm_rx_do_flush() can be found here:
http://lxr.free-electrons.com/source/net/can/bcm.c#L589
static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
unsigned int index)
{
struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
if ((op->last_frames) && (lcf->flags & RX_THR)) { <<<----- !!!
if (update)
bcm_rx_changed(op, lcf);
return 1;
}
return 0;
}
lcf->flags points into an array of struct canfd_frame at offset 5 which
is allocated here:
http://lxr.free-electrons.com/source/net/can/bcm.c#L1105
/* create and init array for received CAN frames */
op->last_frames = kzalloc(msg_head->nframes * op->cfsiz,
GFP_KERNEL);
So why does KASAN complain about accessing some kind of 32 byte cache
when it should point into a zero initialized allocated space?
I will write some other test cases with a similar setting of options to
check if I can trigger the instability too.
Tnx & regards,
Oliver
^ permalink raw reply
* RE: [PATCH v9 0/8] thunderbolt: Introducing Thunderbolt(TM) Networking
From: Mario.Limonciello @ 2016-11-22 17:36 UTC (permalink / raw)
To: simon.guinot, amir.jer.levy
Cc: gregkh, andreas.noever, bhelgaas, corbet, linux-kernel, linux-pci,
netdev, linux-doc, thunderbolt-linux, mika.westerberg,
tomas.winkler, xiong.y.zhang, michael.jamet, remi.rerolle
In-Reply-To: <20161122172828.GB31492@kw.sim.vm.gnt>
> Here are a couple of additional questions:
>
> - When the network interface is created, there is no IP address
> assigned (or negotiated ?) on the Linux side. But it is done on the
> MacOS side. And in the Linux kernel logs I can also read the message:
> "ready for ThunderboltIP negotiation". Is there something missing or
> not working on the Linux side ? What is the correct way to configure
> or negotiate the IP address. For my tests I did it manually...
>
> - When the Linux machine is started with the Thunderbolt wire already
> connected to a MacBook Pro, sometimes (but not every time) the
> network interface is not created. The Thunderbolt wire needs to be
> replugged.
>
> FWIW you get my
>
> Tested-by: Simon Guinot <simon.guinot@sequanux.org>
>
> Simon
Simon,
Since I also performed testing on the previous patchset, I'll share what I did.
I configured Network Manager to use the TBT interface to share an internet
connection to another box. This configures a static IP address on the local
Linux side and sets up routing.
Network manager remembers setup this in a configuration database.
When the interface goes up it will then set up a DHCP server to hand
out an IP address to the other side.
^ permalink raw reply
* Re: net/can: use-after-free in bcm_rx_thr_flush
From: Andrey Konovalov @ 2016-11-22 17:37 UTC (permalink / raw)
To: syzkaller
Cc: Marc Kleine-Budde, David S. Miller, linux-can, netdev, LKML,
Dmitry Vyukov, Alexander Potapenko, Kostya Serebryany,
Eric Dumazet
In-Reply-To: <5abd6ebd-1249-71a9-da30-65d8371a2a36@hartkopp.net>
On Tue, Nov 22, 2016 at 6:29 PM, Oliver Hartkopp <socketcan@hartkopp.net> wrote:
> Hi Andrey,
>
> thanks for the report.
>
> Although I can't see the issue in the code ...
>
> On 11/22/2016 10:22 AM, Andrey Konovalov wrote:
>
>> ==================================================================
>> BUG: KASAN: use-after-free in bcm_rx_thr_flush+0x284/0x2b0
>> Read of size 1 at addr ffff88006c1faae5 by task a.out/3874
>>
>> page:ffffea0001b07e80 count:1 mapcount:0 mapping: (null)
>> index:0x0
>> flags: 0x100000000000080(slab)
>> page dumped because: kasan: bad access detected
>
>
> (..)
>
>>
>> The buggy address belongs to the object at ffff88006c1faae0
>> which belongs to the cache kmalloc-32 of size 32
>
>
> ???
>
>> The buggy address ffff88006c1faae5 is located 5 bytes inside
>> of 32-byte region [ffff88006c1faae0, ffff88006c1fab00)
>
>
> (..)
>
>> Memory state around the buggy address:
>> ffff88006c1fa980: fc fc fb fb fb fb fc fc fb fb fb fb fc fc fb fb
>> ffff88006c1faa00: fb fb fc fc fb fb fb fb fc fc fb fb fb fb fc fc
>>>
>>> ffff88006c1faa80: fb fb fb fb fc fc fb fb fb fb fc fc fb fb fb fb
>>
>> ^
>> ffff88006c1fab00: fc fc fb fb fb fb fc fc 00 00 00 00 fc fc 00 00
>> ffff88006c1fab80: 00 00 fc fc fb fb fb fb fc fc fb fb fb fb fc fc
>> ==================================================================
>
>
> (should be some zero initialized memory here)
>
> The relevant code of bcm_rx_do_flush() can be found here:
>
> http://lxr.free-electrons.com/source/net/can/bcm.c#L589
>
> static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
> unsigned int index)
> {
> struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
>
> if ((op->last_frames) && (lcf->flags & RX_THR)) { <<<----- !!!
> if (update)
> bcm_rx_changed(op, lcf);
> return 1;
> }
> return 0;
> }
>
>
> lcf->flags points into an array of struct canfd_frame at offset 5 which is
> allocated here:
>
> http://lxr.free-electrons.com/source/net/can/bcm.c#L1105
>
> /* create and init array for received CAN frames */
> op->last_frames = kzalloc(msg_head->nframes * op->cfsiz,
> GFP_KERNEL);
>
> So why does KASAN complain about accessing some kind of 32 byte cache when
> it should point into a zero initialized allocated space?
Hi Oliver,
My guess would be that this is an out-of-bounds access which doesn't
hit the redzone.
The free and alloc stack traces also look unrelated to the access.
Besides I have a bunch of related slab-out-of-bounds reports, see below.
Thanks for looking at this!
==================================================================
BUG: KASAN: slab-out-of-bounds in bcm_send_to_user+0x330/0x480
Read of size 16 at addr ffff88006de17338 by task syz-executor/30679
page:ffffea0001b78580 count:1 mapcount:0 mapping: (null)
index:0xffff88006de16760 compound_mapcount: 0
flags: 0x500000000004080(slab|head)
page dumped because: kasan: bad access detected
CPU: 2 PID: 30679 Comm: syz-executor Not tainted 4.9.0-rc6+ #429
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
ffff88003cd277b0 ffffffff81b472e4 ffff88003cd27840 ffff88006de17338
00000000000000fb 00000000000000fc ffff88003cd27830 ffffffff8150ad42
0000000000000000 ffffffff81509f65 ffff88006aef9830 0000000000000282
Call Trace:
[< inline >] __dump_stack lib/dump_stack.c:15
[<ffffffff81b472e4>] dump_stack+0xb3/0x10f lib/dump_stack.c:51
[< inline >] describe_address mm/kasan/report.c:259
[<ffffffff8150ad42>] kasan_report_error+0x122/0x560 mm/kasan/report.c:365
[<ffffffff8150b536>] kasan_report+0x36/0x40 mm/kasan/report.c:387
[< inline >] check_memory_region_inline mm/kasan/kasan.c:308
[<ffffffff81509d2e>] check_memory_region+0x13e/0x1a0 mm/kasan/kasan.c:315
[<ffffffff8150a223>] memcpy+0x23/0x50 mm/kasan/kasan.c:350
[<ffffffff83574410>] bcm_send_to_user+0x330/0x480 net/can/bcm.c:325
[<ffffffff8357478e>] bcm_rx_changed+0x22e/0x2a0 net/can/bcm.c:443
[< inline >] bcm_rx_do_flush net/can/bcm.c:591
[<ffffffff83577d1e>] bcm_rx_thr_flush+0x19e/0x2b0 net/can/bcm.c:612
[< inline >] bcm_rx_setup net/can/bcm.c:1199
[<ffffffff83578b36>] bcm_sendmsg+0xbb6/0x30e0 net/can/bcm.c:1351
[< inline >] sock_sendmsg_nosec net/socket.c:621
[<ffffffff82b7176c>] sock_sendmsg+0xcc/0x110 net/socket.c:631
[<ffffffff82b73651>] ___sys_sendmsg+0x771/0x8b0 net/socket.c:1954
[<ffffffff82b7563e>] __sys_sendmsg+0xce/0x170 net/socket.c:1988
[< inline >] SYSC_sendmsg net/socket.c:1999
[<ffffffff82b7570d>] SyS_sendmsg+0x2d/0x50 net/socket.c:1995
[<ffffffff83fc4301>] entry_SYSCALL_64_fastpath+0x1f/0xc2
The buggy address belongs to the object at ffff88006de17320
which belongs to the cache kmalloc-32 of size 32
The buggy address ffff88006de17338 is located 24 bytes inside
of 32-byte region [ffff88006de17320, ffff88006de17340)
Freed by task 0:
[<ffffffff8107e236>] save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
[<ffffffff81509e56>] save_stack+0x46/0xd0 mm/kasan/kasan.c:495
[< inline >] set_track mm/kasan/kasan.c:507
[<ffffffff8150a6b3>] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:571
[< inline >] slab_free_hook mm/slub.c:1352
[< inline >] slab_free_freelist_hook mm/slub.c:1374
[< inline >] slab_free mm/slub.c:2951
[<ffffffff81506b98>] kfree+0xe8/0x2b0 mm/slub.c:3871
[<ffffffff819dd8c1>] selinux_cred_free+0x51/0x80 security/selinux/hooks.c:3725
[<ffffffff819ce358>] security_cred_free+0x48/0x80 security/security.c:907
[<ffffffff8117e27d>] put_cred_rcu+0xed/0x390 kernel/cred.c:116
[< inline >] __rcu_reclaim kernel/rcu/rcu.h:118
[< inline >] rcu_do_batch kernel/rcu/tree.c:2776
[< inline >] invoke_rcu_callbacks kernel/rcu/tree.c:3040
[< inline >] __rcu_process_callbacks kernel/rcu/tree.c:3007
[<ffffffff8125dfe0>] rcu_process_callbacks+0xa40/0x1190 kernel/rcu/tree.c:3024
[<ffffffff83fc70af>] __do_softirq+0x23f/0x8e5 kernel/softirq.c:284
Allocated by task 4074:
[<ffffffff8107e236>] save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57
[<ffffffff81509e56>] save_stack+0x46/0xd0 mm/kasan/kasan.c:495
[< inline >] set_track mm/kasan/kasan.c:507
[<ffffffff8150a0cb>] kasan_kmalloc+0xab/0xe0 mm/kasan/kasan.c:598
[<ffffffff8150a632>] kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:537
[< inline >] slab_post_alloc_hook mm/slab.h:417
[< inline >] slab_alloc_node mm/slub.c:2708
[< inline >] slab_alloc mm/slub.c:2716
[<ffffffff815090ef>] __kmalloc_track_caller+0xcf/0x2a0 mm/slub.c:4240
[<ffffffff8146bf84>] kmemdup+0x24/0x50 mm/util.c:113
[<ffffffff819dcbe9>] selinux_cred_prepare+0x49/0xb0
security/selinux/hooks.c:3739
[<ffffffff819ce40d>] security_prepare_creds+0x7d/0xb0 security/security.c:912
[<ffffffff8117fab3>] prepare_creds+0x243/0x340 kernel/cred.c:277
[<ffffffff811876a4>] set_current_groups+0x14/0x50 kernel/groups.c:155
[< inline >] SYSC_setgroups kernel/groups.c:221
[<ffffffff8118807f>] SyS_setgroups+0x17f/0x1d0 kernel/groups.c:202
[<ffffffff83fc4301>] entry_SYSCALL_64_fastpath+0x1f/0xc2
Memory state around the buggy address:
ffff88006de17200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff88006de17280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88006de17300: fc fc fc fc 00 00 00 00 fc fc fc fc fc fc fc fc
^
ffff88006de17380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff88006de17400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================
>
> I will write some other test cases with a similar setting of options to
> check if I can trigger the instability too.
>
> Tnx & regards,
> Oliver
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller+unsubscribe@googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.
^ permalink raw reply
* Re: [RFC net-next 0/3] net: bridge: Allow CPU port configuration
From: Andrew Lunn @ 2016-11-22 17:48 UTC (permalink / raw)
To: Ido Schimmel
Cc: Florian Fainelli, netdev, davem, bridge, stephen, vivien.didelot,
jiri, idosch
In-Reply-To: <20161122174140.2ly226dcxkv23nlo@splinter.mtl.com>
Hi Ido
> First of all, I want to be sure that when we say "CPU port", we're
> talking about the same thing. In mlxsw, the CPU port is a pipe between
> the device and the host, through which all packets trapped to the host
> go through. So, when a packet is trapped, the driver reads its Rx
> descriptor, checks through which port it ingressed, resolves its netdev,
> sets skb->dev accordingly and injects it to the Rx path via
> netif_receive_skb(). The CPU port itself isn't represented using a
> netdev.
With DSA, we have a real physical ethernet network interface for the
'cpu' port. It connects to one of the ports of the switch. Frames on
this interface have an extra header, indicating which switch port it
came from, and we do a similar resolving it to a slave netdev, strip
of the header and injecting it into the receiver path via
netif_receive_skb().
Andrew
^ permalink raw reply
* Re: [RFC net-next 0/3] net: bridge: Allow CPU port configuration
From: Ido Schimmel @ 2016-11-22 17:41 UTC (permalink / raw)
To: Florian Fainelli
Cc: netdev, davem, bridge, stephen, vivien.didelot, andrew, jiri,
idosch
In-Reply-To: <20161121190925.14530-1-f.fainelli@gmail.com>
Hi Florian,
On Mon, Nov 21, 2016 at 11:09:22AM -0800, Florian Fainelli wrote:
> Hi all,
>
> This patch series allows using the bridge master interface to configure
> an Ethernet switch port's CPU/management port with different VLAN attributes than
> those of the bridge downstream ports/members.
>
> Jiri, Ido, Andrew, Vivien, please review the impact on mlxsw and mv88e6xxx, I
> tested this with b53 and a mockup DSA driver.
We'll need to add a check in mlxsw and ignore any VLAN configuration for
the bridge device itself. Otherwise, any configuration done on br0 will
be propagated to all of its slaves, which is incorrect.
>
> Open questions:
>
> - if we have more than one bridge on top of a physical switch, the driver
> should keep track of that and verify that we are not going to change
> the CPU port VLAN attributes in a way that results in incompatible settings
> to be applied
>
> - if the default behavior is to have all VLANs associated with the CPU port
> be ingressing/egressing tagged to the CPU, is this really useful?
First of all, I want to be sure that when we say "CPU port", we're
talking about the same thing. In mlxsw, the CPU port is a pipe between
the device and the host, through which all packets trapped to the host
go through. So, when a packet is trapped, the driver reads its Rx
descriptor, checks through which port it ingressed, resolves its netdev,
sets skb->dev accordingly and injects it to the Rx path via
netif_receive_skb(). The CPU port itself isn't represented using a
netdev.
Given the above, having VLAN filters (or STP) on the CPU port itself
isn't really helpful (we do have them for physical ports of course...).
So, mlxsw will not benefit from this patchset and if we've the same
concept of "CPU port", then I'm not sure why you don't just enable all
the VLANs on it?
Also, how are you going to set the VLAN filters for the CPU port when
you don't offload a bridge, but instead vlan devices between which you
route packets? You lose your abstraction of CPU port...
Thanks!
^ permalink raw reply
* Re: [RFC net-next 0/3] net: bridge: Allow CPU port configuration
From: Florian Fainelli @ 2016-11-22 17:56 UTC (permalink / raw)
To: Ido Schimmel; +Cc: idosch, andrew, vivien.didelot, netdev, bridge, jiri, davem
In-Reply-To: <20161122174140.2ly226dcxkv23nlo@splinter.mtl.com>
On 11/22/2016 09:41 AM, Ido Schimmel wrote:
> Hi Florian,
>
> On Mon, Nov 21, 2016 at 11:09:22AM -0800, Florian Fainelli wrote:
>> Hi all,
>>
>> This patch series allows using the bridge master interface to configure
>> an Ethernet switch port's CPU/management port with different VLAN attributes than
>> those of the bridge downstream ports/members.
>>
>> Jiri, Ido, Andrew, Vivien, please review the impact on mlxsw and mv88e6xxx, I
>> tested this with b53 and a mockup DSA driver.
>
> We'll need to add a check in mlxsw and ignore any VLAN configuration for
> the bridge device itself. Otherwise, any configuration done on br0 will
> be propagated to all of its slaves, which is incorrect.
>
>>
>> Open questions:
>>
>> - if we have more than one bridge on top of a physical switch, the driver
>> should keep track of that and verify that we are not going to change
>> the CPU port VLAN attributes in a way that results in incompatible settings
>> to be applied
>>
>> - if the default behavior is to have all VLANs associated with the CPU port
>> be ingressing/egressing tagged to the CPU, is this really useful?
>
> First of all, I want to be sure that when we say "CPU port", we're
> talking about the same thing. In mlxsw, the CPU port is a pipe between
> the device and the host, through which all packets trapped to the host
> go through. So, when a packet is trapped, the driver reads its Rx
> descriptor, checks through which port it ingressed, resolves its netdev,
> sets skb->dev accordingly and injects it to the Rx path via
> netif_receive_skb(). The CPU port itself isn't represented using a
> netdev.
In the case of DSA, the CPU port is a normal Ethernet MAC driver, but in
premise, this driver plus the DSA tag protocol hook do exactly the same
things as you just describe.
>
> Given the above, having VLAN filters (or STP) on the CPU port itself
> isn't really helpful (we do have them for physical ports of course...).
> So, mlxsw will not benefit from this patchset and if we've the same
> concept of "CPU port", then I'm not sure why you don't just enable all
> the VLANs on it?
We do enable all VLANs on the CPU port (at least with b53, but I think
mv88e6xxx does it too), but compared to e.g: mlxsw, we trap all traffic
by default, and actually, quite often (always actually, until we add IP
routing offloads) the CPU is involved in the LAN/WAN routing, so it is
not infrequent to have the following packet flow:
LAN port -> VLAN 1 -> eth0.1 -> NAT/routing -> eth0.2 -> VLAN 2 -> WAN port
In that case, having the ability to define the per-port membership for
VLANs, including the CPU, kind of helps, especially if there are
private/guests VLAN on either the LAN or WAN segments that the CPU does
not necessarily need to play a role in.
NB: this scheme works because in most configurations that we support
today, the CPU port's speed is greater or equal than the speed of the
downstream/front panel ports.
>
> Also, how are you going to set the VLAN filters for the CPU port when
> you don't offload a bridge, but instead vlan devices between which you
> route packets? You lose your abstraction of CPU port...
As far as I can tell today, this is not particularly helpful with DSA,
where we start with all traffic going to the CPU (each DSA created
network device is segregated from the other) and only then we require
having bridge VLAN filtering enabled in the kernel, and configuring
bridge VLAN membership to have a proper VLAN-based scheme.
If you did configure VLAN membership with e.g: port0.<vid> we could
support that just fine, but that programming interface does not allow
configuring the default VLAN, and in our case, it matters a bit to
support the LAN/WAN routing scenario described. We could agree that all
untagged traffic should go to VLAN 0 or 1 for instance, but that could
then, vary on a per-driver/HW basis.
Hope this clarifies things a bit!
--
Florian
^ permalink raw reply
* [PATCH/RFC -next] net: phy: Fix double free in phy_detach()
From: Geert Uytterhoeven @ 2016-11-22 18:07 UTC (permalink / raw)
To: Florian Fainelli, David S. Miller, Josh Cartwright,
Nathan Sullivan, Zach Brown
Cc: netdev, linux-renesas-soc, linux-kernel, Geert Uytterhoeven
During "poweroff" on sh73a0/kzm9g:
WARNING: CPU: 0 PID: 1271 at drivers/base/devres.c:889 phy_detach+0x44/0x60
Modules linked in:
CPU: 0 PID: 1271 Comm: halt Not tainted 4.9.0-rc6-kzm9g-05637-gb090128865050239 #823
Hardware name: Generic SH73A0 (Flattened Device Tree)
[<c010e7e0>] (unwind_backtrace) from [<c010aeb8>] (show_stack+0x10/0x14)
[<c010aeb8>] (show_stack) from [<c02f2b68>] (dump_stack+0xa4/0xdc)
[<c02f2b68>] (dump_stack) from [<c0121204>] (__warn+0xcc/0xfc)
[<c0121204>] (__warn) from [<c01212d8>] (warn_slowpath_null+0x1c/0x24)
[<c01212d8>] (warn_slowpath_null) from [<c03aac40>] (phy_detach+0x44/0x60)
[<c03aac40>] (phy_detach) from [<c03ae924>] (smsc911x_stop+0xf4/0x10c)
[<c03ae924>] (smsc911x_stop) from [<c0464c4c>] (__dev_close_many+0x94/0xb8)
[<c0464c4c>] (__dev_close_many) from [<c0464d64>] (__dev_close+0x20/0x34)
[<c0464d64>] (__dev_close) from [<c046d8ac>] (__dev_change_flags+0x8c/0x130)
[<c046d8ac>] (__dev_change_flags) from [<c046d968>] (dev_change_flags+0x18/0x48)
[<c046d968>] (dev_change_flags) from [<c04d1aac>] (devinet_ioctl+0x33c/0x708)
[<c04d1aac>] (devinet_ioctl) from [<c0450694>] (sock_ioctl+0x29c/0x2f8)
[<c0450694>] (sock_ioctl) from [<c02079a0>] (vfs_ioctl+0x20/0x34)
[<c02079a0>] (vfs_ioctl) from [<c0208398>] (do_vfs_ioctl+0x870/0x9c4)
[<c0208398>] (do_vfs_ioctl) from [<c0208520>] (SyS_ioctl+0x34/0x5c)
[<c0208520>] (SyS_ioctl) from [<c0106dc0>] (ret_fast_syscall+0x0/0x1c)
---[ end trace 4555b9be7369b463 ]---
If device_release_driver(&phydev->mdio.dev) was called, it has already
released all resources belonging to the PHY device. Hence the subsequent
call to phy_led_triggers_unregister() may cause a double free, leading
to the warning.
Move the call to phy_led_triggers_unregister() before the possible call
to device_release_driver() to fix this.
Fixes: 2e0bc452f4721520 ("net: phy: leds: add support for led triggers on phy link state change")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
Is this the right fix?
---
drivers/net/phy/phy_device.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9e8f048891bd192f..b32457660db66de4 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -981,6 +981,8 @@ void phy_detach(struct phy_device *phydev)
phydev->attached_dev = NULL;
phy_suspend(phydev);
+ phy_led_triggers_unregister(phydev);
+
/* If the device had no specific driver before (i.e. - it
* was using the generic driver), we unbind the device
* from the generic driver so that there's a chance a
@@ -994,8 +996,6 @@ void phy_detach(struct phy_device *phydev)
}
}
- phy_led_triggers_unregister(phydev);
-
/*
* The phydev might go away on the put_device() below, so avoid
* a use-after-free bug by reading the underlying bus first.
--
1.9.1
^ permalink raw reply related
* Re: net/icmp: null-ptr-deref in icmp6_send
From: Cong Wang @ 2016-11-22 18:11 UTC (permalink / raw)
To: Andrey Konovalov
Cc: David Ahern, David S. Miller, Alexey Kuznetsov, James Morris,
Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Dmitry Vyukov,
Alexander Potapenko, Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <CAAeHK+wRE7y_0d7djWnq9S1O_w-1wQRq1g7VsAmzEAE+JY_v2Q@mail.gmail.com>
On Tue, Nov 22, 2016 at 2:23 AM, Andrey Konovalov <andreyknvl@google.com> wrote:
> Hi,
>
> I've got the following error report while fuzzing the kernel with syzkaller.
>
> It seems that skb_dst(skb) may end up being NULL.
>
> As far as I can see the bug was introduced in commit 5d41ce29e ("net:
> icmp6_send should use dst dev to determine L3 domain").
> ICMP v4 probaly has similar issue due to 9d1a6c4ea ("net:
> icmp_route_lookup should use rt dev to determine L3 domain").
ipv6_parse_hopopts() is called before NF_INET_PRE_ROUTING,
so the skb_dst could be NULL.
I have no idea what commit 5d41ce29e tried to fix, but we already
use skb->dev a few lines before l3mdev_master_ifindex(), so I don't
understand why skb->dev could be NULL, maybe just for vrf dev?
^ permalink raw reply
* [PATCH net] bnxt: do not busy-poll when link is down
From: Andy Gospodarek @ 2016-11-22 18:14 UTC (permalink / raw)
To: netdev; +Cc: Andy Gospodarek, Michael Chan
When busy polling while a link is down (during a link-flap test), TX
timeouts were observed as well as the following messages in the ring
buffer:
bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51
bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free tx failed. rc:-1
bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51
bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free rx failed. rc:-1
These were resolved by checking for link status and returning if link
was not up.
Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Tested-by: Rob Miller <rob.miller@broadcom.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e18635b..013e373 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1811,6 +1811,9 @@ static int bnxt_busy_poll(struct napi_struct *napi)
if (atomic_read(&bp->intr_sem) != 0)
return LL_FLUSH_FAILED;
+ if (!bp->link_info.link_up)
+ return LL_FLUSH_FAILED;
+
if (!bnxt_lock_poll(bnapi))
return LL_FLUSH_BUSY;
--
2.1.0
^ permalink raw reply related
* List pre vas
From: Paní KLeung @ 2016-11-22 15:12 UTC (permalink / raw)
Ahoj.....
Dobre rano, a jak to delate? Jen rychly jedno, je tu oficialni
prilezitosti bych chtel diskutovat s vami soukrome.
Ocenil bych vasi rychlou reakci tady na mem osobnim soukromeho e-mailu
nize pro dalsi komunikaci.
S pratelskym pozdravem,
Paní Ko May Leung
email: lngkomay61@gmail.com
Místopredseda, Managing Director
a vykonny reditel Chong Hing Bank Limited
^ permalink raw reply
* Re: [PATCH net] bnxt: do not busy-poll when link is down
From: Eric Dumazet @ 2016-11-22 18:38 UTC (permalink / raw)
To: Andy Gospodarek; +Cc: netdev, Michael Chan
In-Reply-To: <1479838448-44877-1-git-send-email-gospo@broadcom.com>
On Tue, 2016-11-22 at 13:14 -0500, Andy Gospodarek wrote:
> When busy polling while a link is down (during a link-flap test), TX
> timeouts were observed as well as the following messages in the ring
> buffer:
>
> bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51
> bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free tx failed. rc:-1
> bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51
> bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free rx failed. rc:-1
>
> These were resolved by checking for link status and returning if link
> was not up.
>
> Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
> Signed-off-by: Michael Chan <michael.chan@broadcom.com>
> Tested-by: Rob Miller <rob.miller@broadcom.com>
> ---
> drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index e18635b..013e373 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -1811,6 +1811,9 @@ static int bnxt_busy_poll(struct napi_struct *napi)
> if (atomic_read(&bp->intr_sem) != 0)
> return LL_FLUSH_FAILED;
>
> + if (!bp->link_info.link_up)
> + return LL_FLUSH_FAILED;
> +
> if (!bnxt_lock_poll(bnapi))
> return LL_FLUSH_BUSY;
>
Any plans removing this busy polling stuff, now it is done in core
networking stack ?
This would remove bnxt_lock_napi() extra overhead in normal path ( napi
poll )
I could do this but I do not have the hardware to do the tests.
^ permalink raw reply
* Re: [PATCH] net: dsa: mv88e6xxx: egress all frames
From: Stefan Eichenberger @ 2016-11-22 18:37 UTC (permalink / raw)
To: Andrew Lunn; +Cc: Stefan Eichenberger, vivien.didelot, f.fainelli, netdev
In-Reply-To: <20161122150330.GE2691@lunn.ch>
Hi Andrew
On Tue, Nov 22, 2016 at 04:03:30PM +0100, Andrew Lunn wrote:
> On Tue, Nov 22, 2016 at 11:39:44AM +0100, Stefan Eichenberger wrote:
> > Egress multicast and egress unicast is only enabled for CPU/DSA ports
> > but for switching operation it seems it should be enabled for all ports.
> > Do I miss something here?
> >
> > I did the following test:
> > brctl addbr br0
> > brctl addif br0 lan0
> > brctl addif br0 lan1
> >
> > In this scenario the unicast and multicast packets were not forwarded,
> > therefore ARP requests were not resolved, and no connection could be
> > established.
>
> Hi Stefan
>
> This is probably specific to the 6097 family. It works fine without
> this on other devices. Creating a bridge like above and pinging across
> it is one of my standard tests. But i only test modern devices like
> the 6165, 6352, 6351, 6390 families.
Okay perfect, I wasn't 100% sure if I would have to configure something
additionally.
>
> In fact, you might need to review all the code and look where
> mv88e6xxx_6095_family(chip) is used and consider if you need to add
> mv88e6xxx_6097_family(chip). e.g.
>
> if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip)) {
> /* Set the upstream port this port should use */
> reg |= dsa_upstream_port(ds);
> /* enable forwarding of unknown multicast addresses to
> * the upstream port
> */
> if (port == dsa_upstream_port(ds))
> reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
> }
>
> Maybe this is your problem?
I think I still don't understand exactly how the driver works.
My problem is that the multicast and broadcast frames are filtered and
the following counter is increasing in ethtool:
sw_in_filtered: 596
This makes sense because "Egress Floods" in the Port Control Register is
set to 0. What kind of mechanism should make sure that for example ARP
packets are sent trough all ports anyway?
Unfortunately I don't have any devices available with more modern
devices, so I can't double check the registers.
Regards,
Stefan
^ permalink raw reply
* Re: [PATCH net] bnxt: do not busy-poll when link is down
From: Michael Chan @ 2016-11-22 18:55 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Andy Gospodarek, Netdev
In-Reply-To: <1479839916.8455.439.camel@edumazet-glaptop3.roam.corp.google.com>
On Tue, Nov 22, 2016 at 10:38 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2016-11-22 at 13:14 -0500, Andy Gospodarek wrote:
>> When busy polling while a link is down (during a link-flap test), TX
>> timeouts were observed as well as the following messages in the ring
>> buffer:
>>
>> bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51
>> bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free tx failed. rc:-1
>> bnxt_en 0008:01:00.2 enP8p1s0f2d2: Resp cmpl intr err msg: 0x51
>> bnxt_en 0008:01:00.2 enP8p1s0f2d2: hwrm_ring_free rx failed. rc:-1
>>
>> These were resolved by checking for link status and returning if link
>> was not up.
>>
>> Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
>> Signed-off-by: Michael Chan <michael.chan@broadcom.com>
>> Tested-by: Rob Miller <rob.miller@broadcom.com>
>> ---
>> drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++
>> 1 file changed, 3 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
>> index e18635b..013e373 100644
>> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
>> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
>> @@ -1811,6 +1811,9 @@ static int bnxt_busy_poll(struct napi_struct *napi)
>> if (atomic_read(&bp->intr_sem) != 0)
>> return LL_FLUSH_FAILED;
>>
>> + if (!bp->link_info.link_up)
>> + return LL_FLUSH_FAILED;
>> +
>> if (!bnxt_lock_poll(bnapi))
>> return LL_FLUSH_BUSY;
>>
>
>
> Any plans removing this busy polling stuff, now it is done in core
> networking stack ?
>
> This would remove bnxt_lock_napi() extra overhead in normal path ( napi
> poll )
>
> I could do this but I do not have the hardware to do the tests.
>
It's on my list of many TODO things. Probably in the next few weeks.
^ permalink raw reply
* Re: [PATCH] net: dsa: mv88e6xxx: egress all frames
From: Andrew Lunn @ 2016-11-22 19:02 UTC (permalink / raw)
To: Stefan Eichenberger
Cc: Stefan Eichenberger, vivien.didelot, f.fainelli, netdev
In-Reply-To: <20161122183732.GD13973@gruene.netmodule.intranet>
On Tue, Nov 22, 2016 at 07:37:33PM +0100, Stefan Eichenberger wrote:
> Hi Andrew
>
> On Tue, Nov 22, 2016 at 04:03:30PM +0100, Andrew Lunn wrote:
> > On Tue, Nov 22, 2016 at 11:39:44AM +0100, Stefan Eichenberger wrote:
> > > Egress multicast and egress unicast is only enabled for CPU/DSA ports
> > > but for switching operation it seems it should be enabled for all ports.
> > > Do I miss something here?
> > >
> > > I did the following test:
> > > brctl addbr br0
> > > brctl addif br0 lan0
> > > brctl addif br0 lan1
> > >
> > > In this scenario the unicast and multicast packets were not forwarded,
> > > therefore ARP requests were not resolved, and no connection could be
> > > established.
> >
> > Hi Stefan
> >
> > This is probably specific to the 6097 family. It works fine without
> > this on other devices. Creating a bridge like above and pinging across
> > it is one of my standard tests. But i only test modern devices like
> > the 6165, 6352, 6351, 6390 families.
>
> Okay perfect, I wasn't 100% sure if I would have to configure something
> additionally.
No. The idea is you treat the interfaces as normal interfaces. You
should not need to do anything additional to what you would do with a
normal interface, when adding it to a bridge.
> > In fact, you might need to review all the code and look where
> > mv88e6xxx_6095_family(chip) is used and consider if you need to add
> > mv88e6xxx_6097_family(chip). e.g.
> >
> > if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip)) {
> > /* Set the upstream port this port should use */
> > reg |= dsa_upstream_port(ds);
> > /* enable forwarding of unknown multicast addresses to
> > * the upstream port
> > */
> > if (port == dsa_upstream_port(ds))
> > reg |= PORT_CONTROL_2_FORWARD_UNKNOWN;
> > }
> >
> > Maybe this is your problem?
>
> I think I still don't understand exactly how the driver works.
>
> My problem is that the multicast and broadcast frames are filtered and
> the following counter is increasing in ethtool:
> sw_in_filtered: 596
This is not what is supposed to happen. Broadcast and multicast frames
should go to all ports in the bridge. There are two different ways
this can happen:
1) The mv88e6xxx driver started out with the host doing all bridge
operations. The switch forwards all frames to the software bridge, and
the software bridge then sends them out another port if needed.
2) We later added support for hardware bridging. That is, the switch
itself bridges frames between ports. It will only pass frames to the
software bridge if it does not know what to do with a frame itself.
Now, the different families are not 100% compatible with each
other. We never had access to a 6097, so it has not been tested
recently, and we have probably broken it... My guess would be,
anywhere mv88e6xxx_6095_family(chip) is used, there also needs to be
an mv88e6xxx_6097_family(chip). But i could be wrong.
What you might find useful is
https://github.com/vivien/linux.git 161b96bd7d16d21b0f046c935b70c3b2d277ccc2
although it might need some changes for recent commits.
With that, you can see deeper into the switches registers.
Andrew
^ permalink raw reply
* Re: [PATCH net] bnxt: do not busy-poll when link is down
From: Eric Dumazet @ 2016-11-22 19:06 UTC (permalink / raw)
To: Michael Chan; +Cc: Andy Gospodarek, Netdev
In-Reply-To: <CACKFLik9rvPfSh1ppcTtjS-bbgsOh4hKJCEaXkCN46eb5_fhpA@mail.gmail.com>
On Tue, 2016-11-22 at 10:55 -0800, Michael Chan wrote:
> On Tue, Nov 22, 2016 at 10:38 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> >
> > Any plans removing this busy polling stuff, now it is done in core
> > networking stack ?
> >
> > This would remove bnxt_lock_napi() extra overhead in normal path ( napi
> > poll )
> >
> > I could do this but I do not have the hardware to do the tests.
> >
> It's on my list of many TODO things. Probably in the next few weeks.
Awesome, thanks !
^ permalink raw reply
* Re: net/icmp: null-ptr-deref in icmp6_send
From: David Ahern @ 2016-11-22 19:14 UTC (permalink / raw)
To: Cong Wang
Cc: Andrey Konovalov, David S. Miller, Alexey Kuznetsov, James Morris,
Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Dmitry Vyukov,
Alexander Potapenko, Kostya Serebryany, Eric Dumazet, syzkaller
In-Reply-To: <CAM_iQpUQtt2CGrYF4H2DJv_NRsmnBqkXj5rJVXD8osJBFz_qYw@mail.gmail.com>
Sent from my iPhone
> On Nov 22, 2016, at 1:11 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
>> On Tue, Nov 22, 2016 at 2:23 AM, Andrey Konovalov <andreyknvl@google.com> wrote:
>> Hi,
>>
>> I've got the following error report while fuzzing the kernel with syzkaller.
>>
>> It seems that skb_dst(skb) may end up being NULL.
>>
>> As far as I can see the bug was introduced in commit 5d41ce29e ("net:
>> icmp6_send should use dst dev to determine L3 domain").
>> ICMP v4 probaly has similar issue due to 9d1a6c4ea ("net:
>> icmp_route_lookup should use rt dev to determine L3 domain").
>
>
> ipv6_parse_hopopts() is called before NF_INET_PRE_ROUTING,
> so the skb_dst could be NULL.
>
> I have no idea what commit 5d41ce29e tried to fix, but we already
> use skb->dev a few lines before l3mdev_master_ifindex(), so I don't
> understand why skb->dev could be NULL, maybe just for vrf dev?
On PTO this week and currently at the beach. Will take a look tonight. Thanks for the report.
^ permalink raw reply
* Re: [PATCH net-next] tcp: enhance tcp_collapse_retrans() with skb_shift()
From: Eric Dumazet @ 2016-11-22 18:57 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Neal Cardwell, Yuchung Cheng
In-Reply-To: <1479243110.8455.135.camel@edumazet-glaptop3.roam.corp.google.com>
On Tue, 2016-11-15 at 12:51 -0800, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> In commit 2331ccc5b323 ("tcp: enhance tcp collapsing"),
> we made a first step allowing copying right skb to left skb head.
>
> Since all skbs in socket write queue are headless (but possibly the very
> first one), this strategy often does not work.
>
> This patch extends tcp_collapse_retrans() to perform frag shifting,
> thanks to skb_shift() helper.
>
> This helper needs to not BUG on non headless skbs, as callers are ok
> with that.
>
> Tested:
>
> Following packetdrill test now passes :
>
> 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
> +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
> +0 bind(3, ..., ...) = 0
> +0 listen(3, 1) = 0
>
> +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 8>
> +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
> +.100 < . 1:1(0) ack 1 win 257
> +0 accept(3, ..., ...) = 4
>
> +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
> +0 write(4, ..., 200) = 200
> +0 > P. 1:201(200) ack 1
> +.001 write(4, ..., 200) = 200
> +0 > P. 201:401(200) ack 1
> +.001 write(4, ..., 200) = 200
> +0 > P. 401:601(200) ack 1
> +.001 write(4, ..., 200) = 200
> +0 > P. 601:801(200) ack 1
> +.001 write(4, ..., 200) = 200
> +0 > P. 801:1001(200) ack 1
> +.001 write(4, ..., 100) = 100
> +0 > P. 1001:1101(100) ack 1
> +.001 write(4, ..., 100) = 100
> +0 > P. 1101:1201(100) ack 1
> +.001 write(4, ..., 100) = 100
> +0 > P. 1201:1301(100) ack 1
> +.001 write(4, ..., 100) = 100
> +0 > P. 1301:1401(100) ack 1
>
> +.099 < . 1:1(0) ack 201 win 257
> +.001 < . 1:1(0) ack 201 win 257 <nop,nop,sack 1001:1401>
> +0 > P. 201:1001(800) ack 1
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> ---
> net/core/skbuff.c | 4 +++-
> net/ipv4/tcp_output.c | 22 +++++++++++-----------
> 2 files changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 0b2a6e94af2de73ed638634c47a0fb71e2cbc1cb..a9cb81a10c4ba895587727aa4cf098e9a38424ea 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
> struct skb_frag_struct *fragfrom, *fragto;
>
> BUG_ON(shiftlen > skb->len);
> - BUG_ON(skb_headlen(skb)); /* Would corrupt stream */
> +
> + if (skb_headlen(skb))
> + return 0;
>
> todo = shiftlen;
> from = 0;
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index f57b5aa51b59cf0a58975fe34a7dcdb886ea8c50..19105b46a30436ebb85fe97ee43089e77aa028bb 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2514,7 +2514,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
> }
>
> /* Collapses two adjacent SKB's during retransmission. */
> -static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
> +static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
> {
> struct tcp_sock *tp = tcp_sk(sk);
> struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
> @@ -2525,14 +2525,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
>
> BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
>
> + if (next_skb_size) {
> + if (next_skb_size <= skb_availroom(skb))
> + skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
> + next_skb_size);
> + else if (!skb_shift(skb, next_skb, next_skb_size))
> + return false;
> + }
> tcp_highest_sack_combine(sk, next_skb, skb);
>
> tcp_unlink_write_queue(next_skb, sk);
>
> - if (next_skb_size)
> - skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
> - next_skb_size);
> -
> if (next_skb->ip_summed == CHECKSUM_PARTIAL)
> skb->ip_summed = CHECKSUM_PARTIAL;
>
> @@ -2561,6 +2564,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
> tcp_skb_collapse_tstamp(skb, next_skb);
>
> sk_wmem_free_skb(sk, next_skb);
> + return true;
> }
>
> /* Check if coalescing SKBs is legal. */
> @@ -2610,16 +2614,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
>
> if (space < 0)
> break;
> - /* Punt if not enough space exists in the first SKB for
> - * the data in the second
> - */
> - if (skb->len > skb_availroom(to))
> - break;
>
> if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
> break;
>
> - tcp_collapse_retrans(sk, to);
> + if (!tcp_collapse_retrans(sk, to))
> + break;
> }
> }
>
David, patch is marked 'Superseded' in
https://patchwork.ozlabs.org/patch/695264/
Not sure what this means exactly ?
Did I miss a mail/feedback/something ?
Thanks !
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox