Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH 4/4] dt-bindings: Document the DT bindings for lan78xx
From: Andrew Lunn @ 2018-04-12 14:04 UTC (permalink / raw)
  To: Phil Elwell
  Cc: Woojung Huh, Microchip Linux Driver Support, Rob Herring,
	Mark Rutland, David S. Miller, Mauro Carvalho Chehab,
	Greg Kroah-Hartman, Linus Walleij, Andrew Morton, Randy Dunlap,
	netdev, devicetree, linux-kernel, linux-usb
In-Reply-To: <1523541336-145953-5-git-send-email-phil@raspberrypi.org>

On Thu, Apr 12, 2018 at 02:55:36PM +0100, Phil Elwell wrote:
> The Microchip LAN78XX family of devices are Ethernet controllers with
> a USB interface. Despite being discoverable devices it can be useful to
> be able to configure them from Device Tree, particularly in low-cost
> applications without an EEPROM or programmed OTP.
> 
> Document the supported properties in a bindings file, adding it to
> MAINTAINERS at the same time.

Hi Phil

How you link an OF node to a USB device is not obvious. Could you
please include either a pointer to some binding documentation, or make
your example show it.

Thanks
	Andrew

^ permalink raw reply

* [PATCH net 2/2] sfc: limit ARFS workitems in flight per channel
From: Edward Cree @ 2018-04-12 14:02 UTC (permalink / raw)
  To: linux-net-drivers, David Miller; +Cc: netdev
In-Reply-To: <1713aaa9-2803-0d3b-127a-5240876da7b1@solarflare.com>

A misconfigured system (e.g. with all interrupts affinitised to all CPUs)
 may produce a storm of ARFS steering events.  With the existing sfc ARFS
 implementation, that could create a backlog of workitems that grinds the
 system to a halt.  To prevent this, limit the number of workitems that
 may be in flight for a given SFC device to 8 (EFX_RPS_MAX_IN_FLIGHT), and
 return EBUSY from our ndo_rx_flow_steer method if the limit is reached.
Given this limit, also store the workitems in an array of slots within the
 struct efx_nic, rather than dynamically allocating for each request.

Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/net_driver.h | 25 +++++++++++++++
 drivers/net/ethernet/sfc/rx.c         | 58 ++++++++++++++++++-----------------
 2 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 5e379a83c729..eea3808b3f25 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -733,6 +733,27 @@ struct efx_rss_context {
 	u32 rx_indir_table[128];
 };
 
+#ifdef CONFIG_RFS_ACCEL
+/**
+ * struct efx_async_filter_insertion - Request to asynchronously insert a filter
+ * @net_dev: Reference to the netdevice
+ * @spec: The filter to insert
+ * @work: Workitem for this request
+ * @rxq_index: Identifies the channel for which this request was made
+ * @flow_id: Identifies the kernel-side flow for which this request was made
+ */
+struct efx_async_filter_insertion {
+	struct net_device *net_dev;
+	struct efx_filter_spec spec;
+	struct work_struct work;
+	u16 rxq_index;
+	u32 flow_id;
+};
+
+/* Maximum number of ARFS workitems that may be in flight on an efx_nic */
+#define EFX_RPS_MAX_IN_FLIGHT	8
+#endif /* CONFIG_RFS_ACCEL */
+
 /**
  * struct efx_nic - an Efx NIC
  * @name: Device name (net device name or bus id before net device registered)
@@ -850,6 +871,8 @@ struct efx_rss_context {
  * @rps_expire_channel: Next channel to check for expiry
  * @rps_expire_index: Next index to check for expiry in
  *	@rps_expire_channel's @rps_flow_id
+ * @rps_slot_map: bitmap of in-flight entries in @rps_slot
+ * @rps_slot: array of ARFS insertion requests for efx_filter_rfs_work()
  * @active_queues: Count of RX and TX queues that haven't been flushed and drained.
  * @rxq_flush_pending: Count of number of receive queues that need to be flushed.
  *	Decremented when the efx_flush_rx_queue() is called.
@@ -1004,6 +1027,8 @@ struct efx_nic {
 	struct mutex rps_mutex;
 	unsigned int rps_expire_channel;
 	unsigned int rps_expire_index;
+	unsigned long rps_slot_map;
+	struct efx_async_filter_insertion rps_slot[EFX_RPS_MAX_IN_FLIGHT];
 #endif
 
 	atomic_t active_queues;
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 13b0eb71dbf3..9c593c661cbf 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -827,28 +827,13 @@ MODULE_PARM_DESC(rx_refill_threshold,
 
 #ifdef CONFIG_RFS_ACCEL
 
-/**
- * struct efx_async_filter_insertion - Request to asynchronously insert a filter
- * @net_dev: Reference to the netdevice
- * @spec: The filter to insert
- * @work: Workitem for this request
- * @rxq_index: Identifies the channel for which this request was made
- * @flow_id: Identifies the kernel-side flow for which this request was made
- */
-struct efx_async_filter_insertion {
-	struct net_device *net_dev;
-	struct efx_filter_spec spec;
-	struct work_struct work;
-	u16 rxq_index;
-	u32 flow_id;
-};
-
 static void efx_filter_rfs_work(struct work_struct *data)
 {
 	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
 							      work);
 	struct efx_nic *efx = netdev_priv(req->net_dev);
 	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
+	int slot_idx = req - efx->rps_slot;
 	int rc;
 
 	rc = efx->type->filter_insert(efx, &req->spec, true);
@@ -878,8 +863,8 @@ static void efx_filter_rfs_work(struct work_struct *data)
 	}
 
 	/* Release references */
+	clear_bit(slot_idx, &efx->rps_slot_map);
 	dev_put(req->net_dev);
-	kfree(req);
 }
 
 int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
@@ -888,22 +873,36 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_async_filter_insertion *req;
 	struct flow_keys fk;
+	int slot_idx;
+	int rc;
 
-	if (flow_id == RPS_FLOW_ID_INVALID)
-		return -EINVAL;
+	/* find a free slot */
+	for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
+		if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
+			break;
+	if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
+		return -EBUSY;
 
-	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
-		return -EPROTONOSUPPORT;
+	if (flow_id == RPS_FLOW_ID_INVALID) {
+		rc = -EINVAL;
+		goto out_clear;
+	}
 
-	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6))
-		return -EPROTONOSUPPORT;
-	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
-		return -EPROTONOSUPPORT;
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
+		rc = -EPROTONOSUPPORT;
+		goto out_clear;
+	}
 
-	req = kmalloc(sizeof(*req), GFP_ATOMIC);
-	if (!req)
-		return -ENOMEM;
+	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
+		rc = -EPROTONOSUPPORT;
+		goto out_clear;
+	}
+	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
+		rc = -EPROTONOSUPPORT;
+		goto out_clear;
+	}
 
+	req = efx->rps_slot + slot_idx;
 	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
 			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
 			   rxq_index);
@@ -933,6 +932,9 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
 	req->flow_id = flow_id;
 	schedule_work(&req->work);
 	return 0;
+out_clear:
+	clear_bit(slot_idx, &efx->rps_slot_map);
+	return rc;
 }
 
 bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota)

^ permalink raw reply related

* [PATCH net 1/2] sfc: insert ARFS filters with replace_equal=true
From: Edward Cree @ 2018-04-12 14:02 UTC (permalink / raw)
  To: linux-net-drivers, David Miller; +Cc: netdev
In-Reply-To: <1713aaa9-2803-0d3b-127a-5240876da7b1@solarflare.com>

Necessary to allow redirecting a flow when the application moves.

Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations")
Signed-off-by: Edward Cree <ecree@solarflare.com>
---
 drivers/net/ethernet/sfc/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 95682831484e..13b0eb71dbf3 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -851,7 +851,7 @@ static void efx_filter_rfs_work(struct work_struct *data)
 	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
 	int rc;
 
-	rc = efx->type->filter_insert(efx, &req->spec, false);
+	rc = efx->type->filter_insert(efx, &req->spec, true);
 	if (rc >= 0) {
 		/* Remember this so we can check whether to expire the filter
 		 * later.

^ permalink raw reply related

* [PATCH net 0/2] sfc: couple of ARFS fixes
From: Edward Cree @ 2018-04-12 14:00 UTC (permalink / raw)
  To: linux-net-drivers, David Miller; +Cc: netdev

Two issues introduced by my recent asynchronous filter handling changes:
1. The old filter_rfs_insert would replace a matching filter of equal
   priority; we need to pass the appropriate argument to filter_insert to
   make it do the same.
2. It's possible to cause the kernel to hammer ndo_rx_flow_steer very
   hard, so make sure we don't build up too huge a backlog of workitems.

Possibly it would be better to fix #2 on the kernel side; I think the way
 to do that would be to maintain a forward (as well as reverse) queue-to-
 cpu map and replace the set_rps_cpu() check
    if (rxq_index == skb_get_rx_queue(skb))
 with something like (pseudocode)
    if (irqaffinity of queue[skb_get_rx_queue(skb)] includes next_cpu)
 but I'm not sure whether it's right or even necessary, and in any case
 it's not a regression in 4.17 so isn't 'net' material.
(There's also the issue that we come up in the bad configuration by
 default, but that too is a problem for another time.)

Edward Cree (2):
  sfc: insert ARFS filters with replace_equal=true
  sfc: limit ARFS workitems in flight per channel

 drivers/net/ethernet/sfc/net_driver.h | 25 +++++++++++++++
 drivers/net/ethernet/sfc/rx.c         | 60 ++++++++++++++++++-----------------
 2 files changed, 56 insertions(+), 29 deletions(-)

^ permalink raw reply

* Re: [PATCHv2 net] sctp: do not check port in sctp_inet6_cmp_addr
From: Neil Horman @ 2018-04-12 13:58 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Marcelo Ricardo Leitner
In-Reply-To: <7220f314b70726ba4afee91df11e9ae8d1962e01.1523514271.git.lucien.xin@gmail.com>

On Thu, Apr 12, 2018 at 02:24:31PM +0800, Xin Long wrote:
> pf->cmp_addr() is called before binding a v6 address to the sock. It
> should not check ports, like in sctp_inet_cmp_addr.
> 
> But sctp_inet6_cmp_addr checks the addr by invoking af(6)->cmp_addr,
> sctp_v6_cmp_addr where it also compares the ports.
> 
> This would cause that setsockopt(SCTP_SOCKOPT_BINDX_ADD) could bind
> multiple duplicated IPv6 addresses after Commit 40b4f0fd74e4 ("sctp:
> lack the check for ports in sctp_v6_cmp_addr").
> 
> This patch is to remove af->cmp_addr called in sctp_inet6_cmp_addr,
> but do the proper check for both v6 addrs and v4mapped addrs.
> 
> v1->v2:
>   - define __sctp_v6_cmp_addr to do the common address comparison
>     used for both pf and af v6 cmp_addr.
> 
> Fixes: 40b4f0fd74e4 ("sctp: lack the check for ports in sctp_v6_cmp_addr")
> Reported-by: Jianwen Ji <jiji@redhat.com>
> Signed-off-by: Xin Long <lucien.xin@gmail.com>
> ---
>  net/sctp/ipv6.c | 60 ++++++++++++++++++++++++++++-----------------------------
>  1 file changed, 30 insertions(+), 30 deletions(-)
> 
> diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
> index f1fc48e..09aba03 100644
> --- a/net/sctp/ipv6.c
> +++ b/net/sctp/ipv6.c
> @@ -521,46 +521,49 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
>  	addr->v6.sin6_scope_id = 0;
>  }
>  
> -/* Compare addresses exactly.
> - * v4-mapped-v6 is also in consideration.
> - */
> -static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
> -			    const union sctp_addr *addr2)
> +static int __sctp_v6_cmp_addr(const union sctp_addr *addr1,
> +			      const union sctp_addr *addr2)
>  {
>  	if (addr1->sa.sa_family != addr2->sa.sa_family) {
>  		if (addr1->sa.sa_family == AF_INET &&
>  		    addr2->sa.sa_family == AF_INET6 &&
> -		    ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) {
> -			if (addr2->v6.sin6_port == addr1->v4.sin_port &&
> -			    addr2->v6.sin6_addr.s6_addr32[3] ==
> -			    addr1->v4.sin_addr.s_addr)
> -				return 1;
> -		}
> +		    ipv6_addr_v4mapped(&addr2->v6.sin6_addr) &&
> +		    addr2->v6.sin6_addr.s6_addr32[3] ==
> +		    addr1->v4.sin_addr.s_addr)
> +			return 1;
> +
>  		if (addr2->sa.sa_family == AF_INET &&
>  		    addr1->sa.sa_family == AF_INET6 &&
> -		    ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) {
> -			if (addr1->v6.sin6_port == addr2->v4.sin_port &&
> -			    addr1->v6.sin6_addr.s6_addr32[3] ==
> -			    addr2->v4.sin_addr.s_addr)
> -				return 1;
> -		}
> +		    ipv6_addr_v4mapped(&addr1->v6.sin6_addr) &&
> +		    addr1->v6.sin6_addr.s6_addr32[3] ==
> +		    addr2->v4.sin_addr.s_addr)
> +			return 1;
> +
>  		return 0;
>  	}
> -	if (addr1->v6.sin6_port != addr2->v6.sin6_port)
> -		return 0;
> +
>  	if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
>  		return 0;
> +
>  	/* If this is a linklocal address, compare the scope_id. */
> -	if (ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
> -		if (addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id &&
> -		    (addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)) {
> -			return 0;
> -		}
> -	}
> +	if ((ipv6_addr_type(&addr1->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
> +	    addr1->v6.sin6_scope_id && addr2->v6.sin6_scope_id &&
> +	    addr1->v6.sin6_scope_id != addr2->v6.sin6_scope_id)
> +		return 0;
>  
>  	return 1;
>  }
>  
> +/* Compare addresses exactly.
> + * v4-mapped-v6 is also in consideration.
> + */
> +static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
> +			    const union sctp_addr *addr2)
> +{
> +	return __sctp_v6_cmp_addr(addr1, addr2) &&
> +	       addr1->v6.sin6_port == addr2->v6.sin6_port;
> +}
> +
>  /* Initialize addr struct to INADDR_ANY. */
>  static void sctp_v6_inaddr_any(union sctp_addr *addr, __be16 port)
>  {
> @@ -846,8 +849,8 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
>  			       const union sctp_addr *addr2,
>  			       struct sctp_sock *opt)
>  {
> -	struct sctp_af *af1, *af2;
>  	struct sock *sk = sctp_opt2sk(opt);
> +	struct sctp_af *af1, *af2;
>  
>  	af1 = sctp_get_af_specific(addr1->sa.sa_family);
>  	af2 = sctp_get_af_specific(addr2->sa.sa_family);
> @@ -863,10 +866,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
>  	if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2))
>  		return 1;
>  
> -	if (addr1->sa.sa_family != addr2->sa.sa_family)
> -		return 0;
> -
> -	return af1->cmp_addr(addr1, addr2);
> +	return __sctp_v6_cmp_addr(addr1, addr2);
>  }
>  
>  /* Verify that the provided sockaddr looks bindable.   Common verification,
> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
Acked-by: Neil Horman <nhorman@tuxdriver.com>

^ permalink raw reply

* [PATCH 3/4] lan78xx: Read LED modes from Device Tree
From: Phil Elwell @ 2018-04-12 13:55 UTC (permalink / raw)
  To: Woojung Huh, Microchip Linux Driver Support, Rob Herring,
	Mark Rutland, David S. Miller, Mauro Carvalho Chehab,
	Greg Kroah-Hartman, Linus Walleij, Andrew Morton, Randy Dunlap,
	netdev, devicetree, linux-kernel, linux-usb
  Cc: Phil Elwell
In-Reply-To: <1523541336-145953-1-git-send-email-phil@raspberrypi.org>

Add support for DT property "microchip,led-modes", a vector of two
cells (u32s) in the range 0-15, each of which sets the mode for one
of the two LEDs. Some possible values are:

    0=link/activity          1=link1000/activity
    2=link100/activity       3=link10/activity
    4=link100/1000/activity  5=link10/1000/activity
    6=link10/100/activity    14=off    15=on

Also use the presence of the DT property to indicate that the
LEDs should be enabled - necessary in the event that no valid OTP
or EEPROM is available.

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
 drivers/net/usb/lan78xx.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index d98397b..ffb483d 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2008,6 +2008,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 {
 	int ret;
 	u32 mii_adv;
+	u32 led_modes[2];
 	struct phy_device *phydev;
 
 	phydev = phy_find_first(dev->mdiobus);
@@ -2097,6 +2098,25 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 		(void)lan78xx_set_eee(dev->net, &edata);
 	}
 
+	if (!of_property_read_u32_array(dev->udev->dev.of_node,
+					"microchip,led-modes",
+					led_modes, ARRAY_SIZE(led_modes))) {
+		u32 reg;
+		int i;
+
+		reg = phy_read(phydev, 0x1d);
+		for (i = 0; i < ARRAY_SIZE(led_modes); i++) {
+			reg &= ~(0xf << (i * 4));
+			reg |= (led_modes[i] & 0xf) << (i * 4);
+		}
+		(void)phy_write(phydev, 0x1d, reg);
+
+		/* Ensure the LEDs are enabled */
+		lan78xx_read_reg(dev, HW_CFG, &reg);
+		reg |= HW_CFG_LED0_EN_ | HW_CFG_LED1_EN_;
+		lan78xx_write_reg(dev, HW_CFG, reg);
+	}
+
 	genphy_config_aneg(phydev);
 
 	dev->fc_autoneg = phydev->autoneg;
-- 
2.7.4

^ permalink raw reply related

* [PATCH 1/4] lan78xx: Read MAC address from DT if present
From: Phil Elwell @ 2018-04-12 13:55 UTC (permalink / raw)
  To: Woojung Huh, Microchip Linux Driver Support, Rob Herring,
	Mark Rutland, David S. Miller, Mauro Carvalho Chehab,
	Greg Kroah-Hartman, Linus Walleij, Andrew Morton, Randy Dunlap,
	netdev, devicetree, linux-kernel, linux-usb
  Cc: Phil Elwell
In-Reply-To: <1523541336-145953-1-git-send-email-phil@raspberrypi.org>

There is a standard mechanism for locating and using a MAC address from
the Device Tree. Use this facility in the lan78xx driver to support
applications without programmed EEPROM or OTP. At the same time,
regularise the handling of the different address sources.

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
 drivers/net/usb/lan78xx.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 55a78eb..d2727b5 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -37,6 +37,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/microchipphy.h>
 #include <linux/phy.h>
+#include <linux/of_net.h>
 #include "lan78xx.h"
 
 #define DRIVER_AUTHOR	"WOOJUNG HUH <woojung.huh@microchip.com>"
@@ -1651,34 +1652,35 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
 	addr[5] = (addr_hi >> 8) & 0xFF;
 
 	if (!is_valid_ether_addr(addr)) {
-		/* reading mac address from EEPROM or OTP */
-		if ((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN,
-					 addr) == 0) ||
-		    (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET, ETH_ALEN,
-				      addr) == 0)) {
-			if (is_valid_ether_addr(addr)) {
-				/* eeprom values are valid so use them */
-				netif_dbg(dev, ifup, dev->net,
-					  "MAC address read from EEPROM");
-			} else {
-				/* generate random MAC */
-				random_ether_addr(addr);
-				netif_dbg(dev, ifup, dev->net,
-					  "MAC address set to random addr");
-			}
+		const u8 *mac_addr;
 
-			addr_lo = addr[0] | (addr[1] << 8) |
-				  (addr[2] << 16) | (addr[3] << 24);
-			addr_hi = addr[4] | (addr[5] << 8);
-
-			ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
-			ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+		mac_addr = of_get_mac_address(dev->udev->dev.of_node);
+		if (mac_addr) {
+			/* valid address present in Device Tree */
+			ether_addr_copy(addr, mac_addr);
+			netif_dbg(dev, ifup, dev->net,
+				  "MAC address read from Device Tree");
+		} else if (((lan78xx_read_eeprom(dev, EEPROM_MAC_OFFSET,
+						 ETH_ALEN, addr) == 0) ||
+			    (lan78xx_read_otp(dev, EEPROM_MAC_OFFSET,
+					      ETH_ALEN, addr) == 0)) &&
+			   is_valid_ether_addr(addr)) {
+			/* eeprom values are valid so use them */
+			netif_dbg(dev, ifup, dev->net,
+				  "MAC address read from EEPROM");
 		} else {
 			/* generate random MAC */
 			random_ether_addr(addr);
 			netif_dbg(dev, ifup, dev->net,
 				  "MAC address set to random addr");
 		}
+
+		addr_lo = addr[0] | (addr[1] << 8) |
+			  (addr[2] << 16) | (addr[3] << 24);
+		addr_hi = addr[4] | (addr[5] << 8);
+
+		ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+		ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
 	}
 
 	ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
-- 
2.7.4

^ permalink raw reply related

* [PATCH 4/4] dt-bindings: Document the DT bindings for lan78xx
From: Phil Elwell @ 2018-04-12 13:55 UTC (permalink / raw)
  To: Woojung Huh, Microchip Linux Driver Support, Rob Herring,
	Mark Rutland, David S. Miller, Mauro Carvalho Chehab,
	Greg Kroah-Hartman, Linus Walleij, Andrew Morton, Randy Dunlap,
	netdev, devicetree, linux-kernel, linux-usb
  Cc: Phil Elwell
In-Reply-To: <1523541336-145953-1-git-send-email-phil@raspberrypi.org>

The Microchip LAN78XX family of devices are Ethernet controllers with
a USB interface. Despite being discoverable devices it can be useful to
be able to configure them from Device Tree, particularly in low-cost
applications without an EEPROM or programmed OTP.

Document the supported properties in a bindings file, adding it to
MAINTAINERS at the same time.

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
 .../devicetree/bindings/net/microchip,lan78xx.txt  | 44 ++++++++++++++++++++++
 MAINTAINERS                                        |  1 +
 2 files changed, 45 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/microchip,lan78xx.txt

diff --git a/Documentation/devicetree/bindings/net/microchip,lan78xx.txt b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt
new file mode 100644
index 0000000..e7d7850
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/microchip,lan78xx.txt
@@ -0,0 +1,44 @@
+Microchip LAN78xx Gigabit Ethernet controller
+
+The LAN78XX devices are usually configured by programming their OTP or with
+an external EEPROM, but some platforms (e.g. Raspberry Pi 3 B+) have neither.
+
+Please refer to ethernet.txt for a description of common Ethernet bindings.
+
+Optional properties:
+- microchip,eee-enabled: if present, enable Energy Efficient Ethernet support;
+- microchip,led-modes: a two-element vector, with each element configuring
+  the operating mode of an LED. The values supported by the device are;
+  0: Link/Activity
+  1: Link1000/Activity
+  2: Link100/Activity
+  3: Link10/Activity
+  4: Link100/1000/Activity
+  5: Link10/1000/Activity
+  6: Link10/100/Activity
+  7: RESERVED
+  8: Duplex/Collision
+  9: Collision
+  10: Activity
+  11: RESERVED
+  12: Auto-negotiation Fault
+  13: RESERVED
+  14: Off
+  15: On
+- microchip,tx-lpi-timer: the delay (in microseconds) between the TX fifo
+  becoming empty and invoking Low Power Idles (default 600).
+
+Example:
+
+	/* Standard configuration for a Raspberry Pi 3 B+ */
+	ethernet: usbether@1 {
+		compatible = "usb424,7800";
+		reg = <1>;
+		microchip,eee-enabled;
+		microchip,tx-lpi-timer = <600>;
+		/*
+		 * led0 = 1:link1000/activity
+		 * led1 = 6:link10/100/activity
+		 */
+		microchip,led-modes = <1 6>;
+	};
diff --git a/MAINTAINERS b/MAINTAINERS
index 2328eed..b637aad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14482,6 +14482,7 @@ M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/usb/lan78xx.*
+F:	Documentation/devicetree/bindings/net/microchip,lan78xx.txt
 
 USB MASS STORAGE DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
-- 
2.7.4

^ permalink raw reply related

* [PATCH 2/4] lan78xx: Read initial EEE setting from Device Tree
From: Phil Elwell @ 2018-04-12 13:55 UTC (permalink / raw)
  To: Woojung Huh, Microchip Linux Driver Support, Rob Herring,
	Mark Rutland, David S. Miller, Mauro Carvalho Chehab,
	Greg Kroah-Hartman, Linus Walleij, Andrew Morton, Randy Dunlap,
	netdev, devicetree, linux-kernel, linux-usb
  Cc: Phil Elwell
In-Reply-To: <1523541336-145953-1-git-send-email-phil@raspberrypi.org>

Add two new Device Tree properties:
* microchip,eee-enabled  - a boolean to enable EEE
* microchip,tx-lpi-timer - time in microseconds to wait after TX goes
                           idle before entering the low power state
                           (default 600)

Signed-off-by: Phil Elwell <phil@raspberrypi.org>
---
 drivers/net/usb/lan78xx.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index d2727b5..d98397b 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2080,6 +2080,23 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 	mii_adv = (u32)mii_advertise_flowctrl(dev->fc_request_control);
 	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
 
+	if (of_property_read_bool(dev->udev->dev.of_node,
+				  "microchip,eee-enabled")) {
+		struct ethtool_eee edata;
+
+		memset(&edata, 0, sizeof(edata));
+		edata.cmd = ETHTOOL_SEEE;
+		edata.advertised = ADVERTISED_1000baseT_Full |
+				   ADVERTISED_100baseT_Full;
+		edata.eee_enabled = true;
+		edata.tx_lpi_enabled = true;
+		if (of_property_read_u32(dev->udev->dev.of_node,
+					 "microchip,tx-lpi-timer",
+					 &edata.tx_lpi_timer))
+			edata.tx_lpi_timer = 600; /* non-aggressive */
+		(void)lan78xx_set_eee(dev->net, &edata);
+	}
+
 	genphy_config_aneg(phydev);
 
 	dev->fc_autoneg = phydev->autoneg;
-- 
2.7.4

^ permalink raw reply related

* [PATCH 0/4] lan78xx: Read configuration from Device Tree
From: Phil Elwell @ 2018-04-12 13:55 UTC (permalink / raw)
  To: Woojung Huh, Microchip Linux Driver Support, Rob Herring,
	Mark Rutland, David S. Miller, Mauro Carvalho Chehab,
	Greg Kroah-Hartman, Linus Walleij, Andrew Morton, Randy Dunlap,
	netdev, devicetree, linux-kernel, linux-usb
  Cc: Phil Elwell

The Microchip LAN78XX family of devices are Ethernet controllers with
a USB interface. Despite being discoverable devices it can be useful to
be able to configure them from Device Tree, particularly in low-cost
applications without an EEPROM or programmed OTP.

This patch set adds support for reading the MAC address, EEE setting
and LED modes from Device Tree.

Phil Elwell (4):
  lan78xx: Read MAC address from DT if present
  lan78xx: Read initial EEE setting from Device Tree
  lan78xx: Read LED modes from Device Tree
  dt-bindings: Document the DT bindings for lan78xx

 .../devicetree/bindings/net/microchip,lan78xx.txt  | 44 ++++++++++++
 MAINTAINERS                                        |  1 +
 drivers/net/usb/lan78xx.c                          | 81 ++++++++++++++++------
 3 files changed, 105 insertions(+), 21 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/microchip,lan78xx.txt

-- 
2.7.4

^ permalink raw reply

* XDP performance regression due to CONFIG_RETPOLINE Spectre V2
From: Jesper Dangaard Brouer @ 2018-04-12 13:50 UTC (permalink / raw)
  To: xdp-newbies@vger.kernel.org, netdev@vger.kernel.org
  Cc: brouer, Christoph Hellwig, David Woodhouse, William Tu,
	Björn Töpel, Karlsson, Magnus, Alexander Duyck,
	Arnaldo Carvalho de Melo

Heads-up XDP performance nerds!

I got an unpleasant surprise when I updated my GCC compiler (to support
the option -mindirect-branch=thunk-extern).  My XDP redirect
performance numbers when cut in half; from approx 13Mpps to 6Mpps
(single CPU core).  I've identified the issue, which is caused by
kernel CONFIG_RETPOLINE, that only have effect when the GCC compiler
have support.  This is mitigation of Spectre variant 2 (CVE-2017-5715)
related to indirect (function call) branches.

XDP_REDIRECT itself only have two primary (per packet) indirect
function calls, ndo_xdp_xmit and invoking bpf_prog, plus any
map_lookup_elem calls in the bpf_prog.  I PoC implemented bulking for
ndo_xdp_xmit, which helped, but not enough. The real root-cause is all
the DMA API calls, which uses function pointers extensively.


Mitigation plan
---------------
Implement support for keeping the DMA mapping through the XDP return
call, to remove RX map/unmap calls.  Implement bulking for XDP
ndo_xdp_xmit and XDP return frame API.  Bulking allows to perform DMA
bulking via scatter-gatter DMA calls, XDP TX need it for DMA
map+unmap. The driver RX DMA-sync (to CPU) per packet calls are harder
to mitigate (via bulk technique). Ask DMA maintainer for a common
case direct call for swiotlb DMA sync call ;-)

Root-cause verification
-----------------------
I have verified that indirect DMA calls are the root-cause, by
removing the DMA sync calls from the code (as they for swiotlb does
nothing), and manually inlined the DMA map calls (basically calling
phys_to_dma(dev, page_to_phys(page)) + offset). For my ixgbe test,
performance "returned" to 11Mpps.

Perf reports
------------
It is not easy to diagnose via perf event tool. I'm coordinating with
ACME to make it easier to pinpoint the hotspots.  Lookout for symbols:
__x86_indirect_thunk_r10, __indirect_thunk_start, __x86_indirect_thunk_rdx
etc.  Be aware that they might not be super high in perf top, but they
stop CPU speculation.  Thus, instead use perf-stat and see the
negative effect of 'insn per cycle'.


Want to understand retpoline at ASM level read this:
 https://support.google.com/faqs/answer/7625886

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [bpf-next PATCH 1/4] bpf: sockmap, free memory on sock close with cork data
From: Simon Horman @ 2018-04-12 13:13 UTC (permalink / raw)
  To: John Fastabend; +Cc: ast, daniel, netdev, davem
In-Reply-To: <20180401150054.24727.78359.stgit@john-Precision-Tower-5810>

On Sun, Apr 01, 2018 at 08:00:54AM -0700, John Fastabend wrote:
> If a socket with pending cork data is closed we do not return the
> memory to the socket until the garbage collector free's the psock
> structure. The garbage collector though can run after the sock has
> completed its close operation. If this ordering happens the sock code
> will through a WARN_ON because there is still outstanding memory

s/through/throw/ ?

> accounted to the sock.
> 
> To resolve this ensure we return memory to the sock when a socket
> is closed.
> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> Fixes: 91843d540a13 ("bpf: sockmap, add msg_cork_bytes() helper")
> ---
>  kernel/bpf/sockmap.c |    6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index d2bda5a..8ddf326 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -211,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
>  	close_fun = psock->save_close;
>  
>  	write_lock_bh(&sk->sk_callback_lock);
> +	if (psock->cork) {
> +		free_start_sg(psock->sock, psock->cork);
> +		kfree(psock->cork);
> +		psock->cork = NULL;
> +	}
> +
>  	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
>  		list_del(&md->list);
>  		free_start_sg(psock->sock, md);
> 

^ permalink raw reply

* Re: iproute2-4.16.0 no longer accepts routes via fe80::1
From: Thomas Deutschmann @ 2018-04-12 12:41 UTC (permalink / raw)
  To: netdev; +Cc: serhe.popovych
In-Reply-To: <53f39aca-b0ac-79a4-3427-175598b17037@gentoo.org>

Hi,

well, it isn't just "fe80::1", it is any IPv6 address which
will be rejected if not called with "-6". I run bisect:

> git bisect start
> # good: [50b8a842e8c098cddb213f5b3076526df88826e8] v4.15.0
> git bisect good 50b8a842e8c098cddb213f5b3076526df88826e8
> # bad: [4b6c4177ee66421770f0bbcc765c29135e44d921] v4.16.0
> git bisect bad 4b6c4177ee66421770f0bbcc765c29135e44d921
> # bad: [5f4892e2c8d4fb22118713e0c83290b352fe0e34] rdma: Make visible the number of arguments
> git bisect bad 5f4892e2c8d4fb22118713e0c83290b352fe0e34
> # good: [8c75f69411bc8c3affe5d173afcf981d15f5da15] Merge branch 'master' into net-next
> git bisect good 8c75f69411bc8c3affe5d173afcf981d15f5da15
> # bad: [27c523e209ab956ff269afec68c6e744e7f5edb6] utils: Introduce get_addr_rta() and inet_addr_match_rta()
> git bisect bad 27c523e209ab956ff269afec68c6e744e7f5edb6
> # bad: [d0bcedd549566a87354aa804df3be6be80681ee9] tc: introduce tc_qdisc_block_exists helper
> git bisect bad d0bcedd549566a87354aa804df3be6be80681ee9
> # bad: [6c4b672738acf680ee98c10e79a52a8dede5f9a6] iplink_geneve: Get rid of inet_get_addr()
> git bisect bad 6c4b672738acf680ee98c10e79a52a8dede5f9a6
> # bad: [93fa12418dc6f5943692250244be303bb162175b] utils: Always specify family and ->bytelen in get_prefix_1()
> git bisect bad 93fa12418dc6f5943692250244be303bb162175b
> # good: [f2522007d8fee924cb098b4afc8af16f2b25829f] utils: Always specify family for address in get_addr_1()
> git bisect good f2522007d8fee924cb098b4afc8af16f2b25829f
> # first bad commit: [93fa12418dc6f5943692250244be303bb162175b] utils: Always specify family and ->bytelen in get_prefix_1()

> From 93fa12418dc6f5943692250244be303bb162175b Mon Sep 17 00:00:00 2001
> From: Serhey Popovych
> Date: Thu, 18 Jan 2018 20:13:43 +0200
> Subject: utils: Always specify family and ->bytelen in get_prefix_1()
> 
> Handle default/all/any special case in get_addr_1() to setup
> ->family and ->bytelen correctly.
> 
> Make get_addr_1() return ->bitlen == -2 instead of -1 to
> distinguish default/all/any special case from the rest:
> it is safe because all callers check ->bitlen < 0, not
> explicit value -1.
> 
> Reduce intendation by one level and get rid of goto/label
> to make code more readable.
> 
> Signed-off-by: Serhey Popovych
> Signed-off-by: David Ahern

https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/commit/?id=93fa12418dc6f5943692250244be303bb162175b

So was this an intended behavior change? I.e. this will require
updates for various user space tools/network configuration scripts
which are relying on ip utilities feature to auto-detect inet family
which was "supported" (at least working) until 4.16.0...


-- 
Regards,
Thomas Deutschmann / Gentoo Linux Developer
C4DD 695F A713 8F24 2AA1 5638 5849 7EE5 1D5D 74A5

^ permalink raw reply

* Re: net: hang in unregister_netdevice: waiting for lo to become free
From: Dmitry Vyukov @ 2018-04-12 12:15 UTC (permalink / raw)
  To: Tommi Rantala
  Cc: Neil Horman, Xin Long, David Ahern, Daniel Borkmann, Cong Wang,
	David Miller, Eric Dumazet, Willem de Bruijn, Jakub Kicinski,
	Rasmus Villemoes, netdev, LKML, Alexey Kuznetsov,
	Hideaki YOSHIFUJI, syzkaller, Dan Streetman, Eric W. Biederman,
	Alexey Kodanev, Marcelo Ricardo Leitner
In-Reply-To: <7d98027d-e810-a079-49c5-0bf8beef390e@nokia.com>

On Wed, Feb 21, 2018 at 3:53 PM, Tommi Rantala
<tommi.t.rantala@nokia.com> wrote:
> On 20.02.2018 18:26, Neil Horman wrote:
>>
>> On Tue, Feb 20, 2018 at 09:14:41AM +0100, Dmitry Vyukov wrote:
>>>
>>> On Tue, Feb 20, 2018 at 8:56 AM, Tommi Rantala
>>> <tommi.t.rantala@nokia.com> wrote:
>>>>
>>>> On 19.02.2018 20:59, Dmitry Vyukov wrote:
>>>>>
>>>>> Is this meant to be fixed already? I am still seeing this on the
>>>>> latest upstream tree.
>>>>>
>>>>
>>>> These two commits are in v4.16-rc1:
>>>>
>>>> commit 4a31a6b19f9ddf498c81f5c9b089742b7472a6f8
>>>> Author: Tommi Rantala <tommi.t.rantala@nokia.com>
>>>> Date:   Mon Feb 5 21:48:14 2018 +0200
>>>>
>>>>      sctp: fix dst refcnt leak in sctp_v4_get_dst
>>>> ...
>>>>      Fixes: 410f03831 ("sctp: add routing output fallback")
>>>>      Fixes: 0ca50d12f ("sctp: fix src address selection if using
>>>> secondary
>>>> addresses")
>>>>
>>>>
>>>> commit 957d761cf91cdbb175ad7d8f5472336a4d54dbf2
>>>> Author: Alexey Kodanev <alexey.kodanev@oracle.com>
>>>> Date:   Mon Feb 5 15:10:35 2018 +0300
>>>>
>>>>      sctp: fix dst refcnt leak in sctp_v6_get_dst()
>>>> ...
>>>>      Fixes: dbc2b5e9a09e ("sctp: fix src address selection if using
>>>> secondary
>>>> addresses for ipv6")
>>>>
>>>>
>>>> I guess we missed something if it's still reproducible.
>>>>
>>>> I can check it later this week, unless someone else beat me to it.
>>>
>>>
>>> Hi Tommi,
>>>
>>> Hmmm, I can't claim that it's exactly the same bug. Perhaps it's
>>> another one then. But I am still seeing these:
>>>
>>> [   58.799130] unregister_netdevice: waiting for lo to become free.
>>> Usage count = 4
>>> [   60.847138] unregister_netdevice: waiting for lo to become free.
>>> Usage count = 4
>>> [   62.895093] unregister_netdevice: waiting for lo to become free.
>>> Usage count = 4
>>> [   64.943103] unregister_netdevice: waiting for lo to become free.
>>> Usage count = 4
>>>
>>> on upstream tree pulled ~12 hours ago.
>>>
>> Can you write a systemtap script to probe dev_hold, and dev_put, printing
>> out a
>> backtrace if the device name matches "lo".  That should tell us
>> definitively if
>> the problem is in the same location or not
>
>
> Hi Dmitry, I tested with the reproducer and the kernel .config file that you
> sent in the first email in this thread:
>
> With 4.16-rc2 unable to reproduce.
>
> With 4.15-rc9 bug reproducible, and I get "unregister_netdevice: waiting for
> lo to become free. Usage count = 3"
>
> With 4.15-rc9 and Alexey's "sctp: fix dst refcnt leak in sctp_v6_get_dst()"
> cherry-picked on top, unable to reproduce.
>
>
> Is syzkaller doing something else now to trigger the bug...?
> Can you still trigger the bug with the same reproducer?

Hi Neil, Tommi,

Reviving this old thread about "unregister_netdevice: waiting for lo
to become free. Usage count = 3" hangs.
I still did not have time to deep dive into what happens there (too
many bugs coming from syzbot). But this still actively happens and I
suspect accounts to a significant portion of various hang reports,
which are quite unpleasant.

One idea that could make it all simpler:

Is this wait loop in netdev_wait_allrefs() supposed to wait for any
prolonged periods of time under any non-buggy conditions? E.g. more
than 1-2 minutes?
If it only supposed to wait briefly for things that already supposed
to be shutting down, and we add a WARNING there after some timeout,
then syzbot will report all info how/when it happens, hopefully
extracting reproducers, and all the nice things.
But this WARNING should not have any false positives under any
realistic conditions (e.g. waiting for arrival of remote packets with
large timeouts).

Looking at some task hung reports, it seems that this code holds some
mutexes, takes workqueue thread and prevents any progress with
destruction of other devices (and net namespace creation/destruction),
so I guess it should not wait for any indefinite periods of time?

^ permalink raw reply

* Re: [PATCH] net: ieee802154: atusb: Replace GFP_ATOMIC with GFP_KERNEL in atusb_probe
From: Stefan Schmidt @ 2018-04-12 12:08 UTC (permalink / raw)
  To: Jia-Ju Bai, alex.aring; +Cc: linux-wpan, netdev, linux-kernel
In-Reply-To: <1523412850-2260-1-git-send-email-baijiaju1990@gmail.com>

Hello.


On 04/11/2018 04:14 AM, Jia-Ju Bai wrote:
> atusb_probe() is never called in atomic context.
> This function is only set as ".probe" in struct usb_driver.
>
> Despite never getting called from atomic context,
> atusb_probe() calls usb_alloc_urb() with GFP_ATOMIC,
> which does not sleep for allocation.
> GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL,
> which can sleep and improve the possibility of sucessful allocation.
>
> This is found by a static analysis tool named DCNS written by myself.
> And I also manually check it.
>
> Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
> ---
>  drivers/net/ieee802154/atusb.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
> index ef68851..ab6a505 100644
> --- a/drivers/net/ieee802154/atusb.c
> +++ b/drivers/net/ieee802154/atusb.c
> @@ -789,7 +789,7 @@ static int atusb_probe(struct usb_interface *interface,
>  	atusb->tx_dr.bRequest = ATUSB_TX;
>  	atusb->tx_dr.wValue = cpu_to_le16(0);
>  
> -	atusb->tx_urb = usb_alloc_urb(0, GFP_ATOMIC);
> +	atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
>  	if (!atusb->tx_urb)
>  		goto fail;
>  

This patch has been applied to the wpan tree and will be
part of the next pull request to net. Thanks!

regards
Stefan Schmidt

^ permalink raw reply

* RE: [RFC PATCH v2 14/14] samples/bpf: sample application for AF_XDP sockets
From: Karlsson, Magnus @ 2018-04-12 11:08 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Björn Töpel
  Cc: Duyck, Alexander H, alexander.duyck@gmail.com,
	john.fastabend@gmail.com, ast@fb.com,
	willemdebruijn.kernel@gmail.com, daniel@iogearbox.net,
	netdev@vger.kernel.org, michael.lundkvist@ericsson.com,
	Brandeburg, Jesse, Singhai, Anjali, Zhang, Qi Z,
	ravineet.singh@ericsson.com, Topel, Bjorn
In-Reply-To: <20180412130521.30d41cbc@redhat.com>



> -----Original Message-----
> From: Jesper Dangaard Brouer [mailto:brouer@redhat.com]
> Sent: Thursday, April 12, 2018 1:05 PM
> To: Björn Töpel <bjorn.topel@gmail.com>
> Cc: Karlsson, Magnus <magnus.karlsson@intel.com>; Duyck, Alexander H
> <alexander.h.duyck@intel.com>; alexander.duyck@gmail.com;
> john.fastabend@gmail.com; ast@fb.com;
> willemdebruijn.kernel@gmail.com; daniel@iogearbox.net;
> netdev@vger.kernel.org; michael.lundkvist@ericsson.com; Brandeburg,
> Jesse <jesse.brandeburg@intel.com>; Singhai, Anjali
> <anjali.singhai@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> ravineet.singh@ericsson.com; Topel, Bjorn <bjorn.topel@intel.com>;
> brouer@redhat.com
> Subject: Re: [RFC PATCH v2 14/14] samples/bpf: sample application for
> AF_XDP sockets
> 
> On Tue, 27 Mar 2018 18:59:19 +0200
> Björn Töpel <bjorn.topel@gmail.com> wrote:
> 
> > +static void dump_stats(void)
> > +{
> > +	unsigned long stop_time = get_nsecs();
> > +	long dt = stop_time - start_time;
> > +	int i;
> > +
> > +	for (i = 0; i < num_socks; i++) {
> > +		double rx_pps = xsks[i]->rx_npkts * 1000000000.
> / dt;
> > +		double tx_pps = xsks[i]->tx_npkts * 1000000000.
> / dt;
> > +		char *fmt = "%-15s %'-11.0f %'-11lu\n";
> > +
> > +		printf("\n sock%d@", i);
> > +		print_benchmark(false);
> > +		printf("\n");
> > +
> > +		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps",
> "pkts",
> > +		       dt / 1000000000.);
> > +		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
> > +		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
> > +	}
> > +}
> > +
> > +static void *poller(void *arg)
> > +{
> > +	(void)arg;
> > +	for (;;) {
> > +		sleep(1);
> > +		dump_stats();
> > +	}
> > +
> > +	return NULL;
> > +}
> 
> You are printing the "pps" (packets per sec) as an average over the entire
> test run... could you please change that to, at least also, have an more up-to-
> date value like between the last measurement?
> 
> The problem is that when you start the test, the first reading will be too low,
> and it takes time to average out/up. For ixgbe, first reading will be zero,
> because it does a link-down+up, which stops my pktgen.
> 
> The second annoyance is that I like to change system/kernel setting during
> the run, and observe the effect. E.g change CPU sleep states (via tuned-
> adm) during the test-run to see the effect, which I cannot with this long
> average.

Good points. Will fix.

/Magnus

> 
> --
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* Re: [RFC PATCH v2 14/14] samples/bpf: sample application for AF_XDP sockets
From: Jesper Dangaard Brouer @ 2018-04-12 11:05 UTC (permalink / raw)
  To: Björn Töpel
  Cc: magnus.karlsson, alexander.h.duyck, alexander.duyck,
	john.fastabend, ast, willemdebruijn.kernel, daniel, netdev,
	michael.lundkvist, jesse.brandeburg, anjali.singhai, qi.z.zhang,
	ravineet.singh, Björn Töpel, brouer
In-Reply-To: <20180327165919.17933-15-bjorn.topel@gmail.com>

On Tue, 27 Mar 2018 18:59:19 +0200
Björn Töpel <bjorn.topel@gmail.com> wrote:

> +static void dump_stats(void)
> +{
> +	unsigned long stop_time = get_nsecs();
> +	long dt = stop_time - start_time;
> +	int i;
> +
> +	for (i = 0; i < num_socks; i++) {
> +		double rx_pps = xsks[i]->rx_npkts * 1000000000. / dt;
> +		double tx_pps = xsks[i]->tx_npkts * 1000000000. / dt;
> +		char *fmt = "%-15s %'-11.0f %'-11lu\n";
> +
> +		printf("\n sock%d@", i);
> +		print_benchmark(false);
> +		printf("\n");
> +
> +		printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts",
> +		       dt / 1000000000.);
> +		printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts);
> +		printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts);
> +	}
> +}
> +
> +static void *poller(void *arg)
> +{
> +	(void)arg;
> +	for (;;) {
> +		sleep(1);
> +		dump_stats();
> +	}
> +
> +	return NULL;
> +}

You are printing the "pps" (packets per sec) as an average over the
entire test run... could you please change that to, at least also, have
an more up-to-date value like between the last measurement?

The problem is that when you start the test, the first reading will be
too low, and it takes time to average out/up. For ixgbe, first reading
will be zero, because it does a link-down+up, which stops my pktgen.

The second annoyance is that I like to change system/kernel setting
during the run, and observe the effect. E.g change CPU sleep states
(via tuned-adm) during the test-run to see the effect, which I cannot
with this long average.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* [PATCH] selftests: add headers_install to lib.mk
From: Anders Roxell @ 2018-04-12 10:23 UTC (permalink / raw)
  To: shuah
  Cc: linux-kselftest, linux-kernel, netdev, linux-gpio, fathi.boudra,
	Anders Roxell

If the kernel headers aren't installed we can't build all the tests.
Add a new make target rule 'khdr' in the file lib.mk to generate the
kernel headers and that gets include for every test-dir Makefile that
includes lib.mk If the testdir in turn have its own sub-dirs the
top_srcdir needs to be set to the linux-rootdir to be able to generate
the kernel headers.

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Reviewed-by: Fathi Boudra <fathi.boudra@linaro.org>
---
 tools/testing/selftests/android/Makefile          | 2 +-
 tools/testing/selftests/android/ion/Makefile      | 1 +
 tools/testing/selftests/bpf/Makefile              | 5 ++---
 tools/testing/selftests/futex/functional/Makefile | 1 +
 tools/testing/selftests/gpio/Makefile             | 4 ----
 tools/testing/selftests/kvm/Makefile              | 6 +-----
 tools/testing/selftests/lib.mk                    | 8 ++++++++
 tools/testing/selftests/vm/Makefile               | 3 ---
 8 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index f6304d2be90c..087390bbad68 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -6,7 +6,7 @@ TEST_PROGS := run.sh
 
 include ../lib.mk
 
-all:
+all: khdr
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index e03695287f76..14ecd9805748 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -11,6 +11,7 @@ $(TEST_GEN_FILES): ipcsocket.c ionutils.c
 TEST_PROGS := ion_test.sh
 
 include ../../lib.mk
+top_srcdir = ../../../../../
 
 $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
 $(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0a315ddabbf4..cc611a284087 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -16,9 +16,8 @@ LDLIBS += -lcap -lelf -lrt -lpthread
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 all: $(TEST_CUSTOM_PROGS)
 
-$(TEST_CUSTOM_PROGS): urandom_read
-
-urandom_read: urandom_read.c
+$(TEST_CUSTOM_PROGS):| khdr
+$(TEST_CUSTOM_PROGS): urandom_read.c
 	$(CC) -o $(TEST_CUSTOM_PROGS) -static $<
 
 # Order correspond to 'make run_tests' order
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index ff8feca49746..9f602fb40241 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -19,5 +19,6 @@ TEST_GEN_FILES := \
 TEST_PROGS := run.sh
 
 include ../../lib.mk
+top_srcdir = ../../../../../
 
 $(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 1bbb47565c55..768b2be010db 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -25,7 +25,3 @@ $(BINARIES): ../../../gpio/gpio-utils.o ../../../../usr/include/linux/gpio.h
 
 ../../../gpio/gpio-utils.o:
 	make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
-
-../../../../usr/include/linux/gpio.h:
-	make -C ../../../.. headers_install INSTALL_HDR_PATH=$(shell pwd)/../../../../usr/
-
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index dc44de904797..ba03ce334212 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -31,9 +31,5 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
 $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
 	$(AR) crs $@ $^
 
-$(LINUX_HDR_PATH):
-	make -C $(top_srcdir) headers_install
-
-all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
+all: $(STATIC_LIBS)
 $(TEST_GEN_PROGS): $(STATIC_LIBS)
-$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 195e9d4739a9..e0bfbc5b1f1f 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -16,8 +16,16 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
+top_srcdir ?= ../../../../
+
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
+.PHONY: khdr
+khdr:
+	make -C $(top_srcdir) headers_install
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES):| khdr
+
 .ONESHELL:
 define RUN_TESTS
 	@export KSFT_TAP_LEVEL=`echo 1`;
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index fdefa2295ddc..1e34a40745ef 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -29,6 +29,3 @@ $(OUTPUT)/userfaultfd: ../../../../usr/include/linux/kernel.h
 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread
 
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
-
-../../../../usr/include/linux/kernel.h:
-	make -C ../../../.. headers_install
-- 
2.16.3

^ permalink raw reply related

* Re: WARNING in kobject_add_internal
From: Dmitry Vyukov @ 2018-04-12 10:04 UTC (permalink / raw)
  To: Yuan, Linyu (NSB - CN/Shanghai)
  Cc: bridge@lists.linux-foundation.org, David Miller,
	Greg Kroah-Hartman, LKML, netdev, stephen hemminger,
	syzkaller-bugs, syzkaller
In-Reply-To: <CACT4Y+bc0hsCdmakcOFpztkocimLgTtWwzGsgCrtf9RnSgkp5A@mail.gmail.com>

On Thu, Apr 12, 2018 at 12:04 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
> On Thu, Apr 12, 2018 at 2:29 AM, Yuan, Linyu (NSB - CN/Shanghai)
> <linyu.yuan@nokia-sbell.com> wrote:
>> Hi,
>>
>> I have a question,
>> "can syzbot auto test each tree with newest changeset" ?
>
> Hi Yuan,
>
> Please elaborate.
> What trees? What newest changeset? Test against what criteria?

+syzkaller mailing list

>>> -----Original Message-----
>>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>>> On Behalf Of Dmitry Vyukov
>>> Sent: Wednesday, April 11, 2018 10:58 PM
>>> To: syzbot
>>> Cc: bridge@lists.linux-foundation.org; David Miller; Greg Kroah-Hartman;
>>> LKML; netdev; stephen hemminger; syzkaller-bugs
>>> Subject: Re: WARNING in kobject_add_internal
>>>
>>> On Fri, Jan 5, 2018 at 10:41 PM, syzbot
>>> <syzbot+e204ced820ef739d71ef5438f5e1976a874abc8d@syzkaller.appspotma
>>> il.com>
>>> wrote:
>>> > syzkaller has found reproducer for the following crash on
>>> > 89876f275e8d562912d9c238cd888b52065cf25c
>>> > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
>>> > compiler: gcc (GCC) 7.1.1 20170620
>>> > .config is attached
>>> > Raw console output is attached.
>>> > C reproducer is attached
>>> > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>>> > for information about syzkaller reproducers
>>> >
>>> >
>>> > IMPORTANT: if you fix the bug, please add the following tag to the commit:
>>> > Reported-by:
>>> >
>>> syzbot+e204ced820ef739d71ef5438f5e1976a874abc8d@syzkaller.appspotmail
>>> .com
>>> > It will help syzbot understand when the bug is fixed.
>>>
>>> #syz dup: WARNING: kobject bug in device_add
>>>
>>> > ------------[ cut here ]------------
>>> > kobject_add_internal failed for   (error: -12 parent: net)
>>> > WARNING: CPU: 1 PID: 3494 at lib/kobject.c:244
>>> > kobject_add_internal+0x3f6/0xbc0 lib/kobject.c:242
>>> > Kernel panic - not syncing: panic_on_warn set ...
>>> >
>>> > CPU: 1 PID: 3494 Comm: syzkaller425998 Not tainted 4.15.0-rc6+ #249
>>> > Hardware name: Google Google Compute Engine/Google Compute Engine,
>>> BIOS
>>> > Google 01/01/2011
>>> > Call Trace:
>>> >  __dump_stack lib/dump_stack.c:17 [inline]
>>> >  dump_stack+0x194/0x257 lib/dump_stack.c:53
>>> >  panic+0x1e4/0x41c kernel/panic.c:183
>>> >  __warn+0x1dc/0x200 kernel/panic.c:547
>>> >  report_bug+0x211/0x2d0 lib/bug.c:184
>>> >  fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
>>> >  fixup_bug arch/x86/kernel/traps.c:247 [inline]
>>> >  do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
>>> >  do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
>>> >  invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:1079
>>> > RIP: 0010:kobject_add_internal+0x3f6/0xbc0 lib/kobject.c:242
>>> > RSP: 0018:ffff8801c53c76f0 EFLAGS: 00010286
>>> > RAX: dffffc0000000008 RBX: ffff8801bf5a88d8 RCX: ffffffff8159da9e
>>> > RDX: 0000000000000000 RSI: 1ffff10038a78e99 RDI: ffff8801c53c73f8
>>> > RBP: ffff8801c53c77e8 R08: 1ffff10038a78e5b R09: 0000000000000000
>>> > R10: ffff8801c53c74b0 R11: 0000000000000000 R12: 1ffff10038a78ee4
>>> > R13: 00000000fffffff4 R14: ffff8801d8359a80 R15: ffffffff86201980
>>> >  kobject_add_varg lib/kobject.c:366 [inline]
>>> >  kobject_add+0x132/0x1f0 lib/kobject.c:411
>>> >  device_add+0x35d/0x1650 drivers/base/core.c:1787
>>> >  netdev_register_kobject+0x183/0x360 net/core/net-sysfs.c:1604
>>> >  register_netdevice+0xb2b/0x1010 net/core/dev.c:7698
>>> >  tun_set_iff drivers/net/tun.c:2319 [inline]
>>> >  __tun_chr_ioctl+0x1d89/0x3dd0 drivers/net/tun.c:2524
>>> >  tun_chr_ioctl+0x2a/0x40 drivers/net/tun.c:2773
>>> >  vfs_ioctl fs/ioctl.c:46 [inline]
>>> >  do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
>>> >  SYSC_ioctl fs/ioctl.c:701 [inline]
>>> >  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
>>> >  entry_SYSCALL_64_fastpath+0x23/0x9a
>>> > RIP: 0033:0x444fc9
>>> > RSP: 002b:00007fff53389dc8 EFLAGS: 00000246 ORIG_RAX:
>>> 0000000000000010
>>> > RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000444fc9
>>> > RDX: 0000000020533000 RSI: 00000000400454ca RDI: 0000000000000004
>>> > RBP: 0000000000000005 R08: 0000000000000002 R09: 0000006f00003131
>>> > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000402500
>>> > R13: 0000000000402590 R14: 0000000000000000 R15: 0000000000000000
>>> >
>>> > Dumping ftrace buffer:
>>> >    (ftrace buffer empty)
>>> > Kernel Offset: disabled
>>> > Rebooting in 86400 seconds..
>>> >

^ permalink raw reply

* Re: WARNING in kobject_add_internal
From: Dmitry Vyukov @ 2018-04-12 10:04 UTC (permalink / raw)
  To: Yuan, Linyu (NSB - CN/Shanghai)
  Cc: bridge@lists.linux-foundation.org, David Miller,
	Greg Kroah-Hartman, LKML, netdev, stephen hemminger,
	syzkaller-bugs
In-Reply-To: <aba0ade61a234e8cac26b137963c7cc3@nokia-sbell.com>

On Thu, Apr 12, 2018 at 2:29 AM, Yuan, Linyu (NSB - CN/Shanghai)
<linyu.yuan@nokia-sbell.com> wrote:
> Hi,
>
> I have a question,
> "can syzbot auto test each tree with newest changeset" ?

Hi Yuan,

Please elaborate.
What trees? What newest changeset? Test against what criteria?

>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org]
>> On Behalf Of Dmitry Vyukov
>> Sent: Wednesday, April 11, 2018 10:58 PM
>> To: syzbot
>> Cc: bridge@lists.linux-foundation.org; David Miller; Greg Kroah-Hartman;
>> LKML; netdev; stephen hemminger; syzkaller-bugs
>> Subject: Re: WARNING in kobject_add_internal
>>
>> On Fri, Jan 5, 2018 at 10:41 PM, syzbot
>> <syzbot+e204ced820ef739d71ef5438f5e1976a874abc8d@syzkaller.appspotma
>> il.com>
>> wrote:
>> > syzkaller has found reproducer for the following crash on
>> > 89876f275e8d562912d9c238cd888b52065cf25c
>> > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/master
>> > compiler: gcc (GCC) 7.1.1 20170620
>> > .config is attached
>> > Raw console output is attached.
>> > C reproducer is attached
>> > syzkaller reproducer is attached. See https://goo.gl/kgGztJ
>> > for information about syzkaller reproducers
>> >
>> >
>> > IMPORTANT: if you fix the bug, please add the following tag to the commit:
>> > Reported-by:
>> >
>> syzbot+e204ced820ef739d71ef5438f5e1976a874abc8d@syzkaller.appspotmail
>> .com
>> > It will help syzbot understand when the bug is fixed.
>>
>> #syz dup: WARNING: kobject bug in device_add
>>
>> > ------------[ cut here ]------------
>> > kobject_add_internal failed for   (error: -12 parent: net)
>> > WARNING: CPU: 1 PID: 3494 at lib/kobject.c:244
>> > kobject_add_internal+0x3f6/0xbc0 lib/kobject.c:242
>> > Kernel panic - not syncing: panic_on_warn set ...
>> >
>> > CPU: 1 PID: 3494 Comm: syzkaller425998 Not tainted 4.15.0-rc6+ #249
>> > Hardware name: Google Google Compute Engine/Google Compute Engine,
>> BIOS
>> > Google 01/01/2011
>> > Call Trace:
>> >  __dump_stack lib/dump_stack.c:17 [inline]
>> >  dump_stack+0x194/0x257 lib/dump_stack.c:53
>> >  panic+0x1e4/0x41c kernel/panic.c:183
>> >  __warn+0x1dc/0x200 kernel/panic.c:547
>> >  report_bug+0x211/0x2d0 lib/bug.c:184
>> >  fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178
>> >  fixup_bug arch/x86/kernel/traps.c:247 [inline]
>> >  do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296
>> >  do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
>> >  invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:1079
>> > RIP: 0010:kobject_add_internal+0x3f6/0xbc0 lib/kobject.c:242
>> > RSP: 0018:ffff8801c53c76f0 EFLAGS: 00010286
>> > RAX: dffffc0000000008 RBX: ffff8801bf5a88d8 RCX: ffffffff8159da9e
>> > RDX: 0000000000000000 RSI: 1ffff10038a78e99 RDI: ffff8801c53c73f8
>> > RBP: ffff8801c53c77e8 R08: 1ffff10038a78e5b R09: 0000000000000000
>> > R10: ffff8801c53c74b0 R11: 0000000000000000 R12: 1ffff10038a78ee4
>> > R13: 00000000fffffff4 R14: ffff8801d8359a80 R15: ffffffff86201980
>> >  kobject_add_varg lib/kobject.c:366 [inline]
>> >  kobject_add+0x132/0x1f0 lib/kobject.c:411
>> >  device_add+0x35d/0x1650 drivers/base/core.c:1787
>> >  netdev_register_kobject+0x183/0x360 net/core/net-sysfs.c:1604
>> >  register_netdevice+0xb2b/0x1010 net/core/dev.c:7698
>> >  tun_set_iff drivers/net/tun.c:2319 [inline]
>> >  __tun_chr_ioctl+0x1d89/0x3dd0 drivers/net/tun.c:2524
>> >  tun_chr_ioctl+0x2a/0x40 drivers/net/tun.c:2773
>> >  vfs_ioctl fs/ioctl.c:46 [inline]
>> >  do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:686
>> >  SYSC_ioctl fs/ioctl.c:701 [inline]
>> >  SyS_ioctl+0x8f/0xc0 fs/ioctl.c:692
>> >  entry_SYSCALL_64_fastpath+0x23/0x9a
>> > RIP: 0033:0x444fc9
>> > RSP: 002b:00007fff53389dc8 EFLAGS: 00000246 ORIG_RAX:
>> 0000000000000010
>> > RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 0000000000444fc9
>> > RDX: 0000000020533000 RSI: 00000000400454ca RDI: 0000000000000004
>> > RBP: 0000000000000005 R08: 0000000000000002 R09: 0000006f00003131
>> > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000402500
>> > R13: 0000000000402590 R14: 0000000000000000 R15: 0000000000000000
>> >
>> > Dumping ftrace buffer:
>> >    (ftrace buffer empty)
>> > Kernel Offset: disabled
>> > Rebooting in 86400 seconds..
>> >

^ permalink raw reply

* Re: [RFC PATCH v2 03/14] xsk: add umem fill queue support and mmap
From: Jesper Dangaard Brouer @ 2018-04-12  8:54 UTC (permalink / raw)
  To: Karlsson, Magnus
  Cc: Michael S. Tsirkin, Björn Töpel, Duyck, Alexander H,
	alexander.duyck@gmail.com, john.fastabend@gmail.com, ast@fb.com,
	willemdebruijn.kernel@gmail.com, daniel@iogearbox.net,
	netdev@vger.kernel.org, michael.lundkvist@ericsson.com,
	Brandeburg, Jesse, Singhai, Anjali, Zhang, Qi Z,
	ravineet.singh@ericsson.com, brouer
In-Reply-To: <AFED4FBCE79F3548A8F74434195ACE39588D1AA4@IRSMSX107.ger.corp.intel.com>

On Thu, 12 Apr 2018 07:38:25 +0000
"Karlsson, Magnus" <magnus.karlsson@intel.com> wrote:

> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:mst@redhat.com]
> > Sent: Thursday, April 12, 2018 4:16 AM
> > To: Björn Töpel <bjorn.topel@gmail.com>
> > Cc: Karlsson, Magnus <magnus.karlsson@intel.com>; Duyck, Alexander H
> > <alexander.h.duyck@intel.com>; alexander.duyck@gmail.com;
> > john.fastabend@gmail.com; ast@fb.com; brouer@redhat.com;
> > willemdebruijn.kernel@gmail.com; daniel@iogearbox.net;
> > netdev@vger.kernel.org; michael.lundkvist@ericsson.com; Brandeburg,
> > Jesse <jesse.brandeburg@intel.com>; Singhai, Anjali
> > <anjali.singhai@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>;
> > ravineet.singh@ericsson.com
> > Subject: Re: [RFC PATCH v2 03/14] xsk: add umem fill queue support and
> > mmap
> > 
> > On Tue, Mar 27, 2018 at 06:59:08PM +0200, Björn Töpel wrote:  
> > > @@ -30,4 +31,18 @@ struct xdp_umem_reg {
> > >  	__u32 frame_headroom; /* Frame head room */  };
> > >
> > > +/* Pgoff for mmaping the rings */
> > > +#define XDP_UMEM_PGOFF_FILL_QUEUE	0x100000000
> > > +
> > > +struct xdp_queue {
> > > +	__u32 head_idx __attribute__((aligned(64)));
> > > +	__u32 tail_idx __attribute__((aligned(64))); };
> > > +
> > > +/* Used for the fill and completion queues for buffers */ struct
> > > +xdp_umem_queue {
> > > +	struct xdp_queue ptrs;
> > > +	__u32 desc[0] __attribute__((aligned(64))); };
> > > +
> > >  #endif /* _LINUX_IF_XDP_H */  
> > 
> > So IIUC it's a head/tail ring of 32 bit descriptors.
> > 
> > In my experience (from implementing ptr_ring) this implies that head/tail
> > cache lines bounce a lot between CPUs. Caching will help some. You are also
> > forced to use barriers to check validity which is slow on some architectures.
> > 
> > If instead you can use a special descriptor value (e.g. 0) as a valid signal,
> > things work much better:
> > 
> > - you read descriptor atomically, if it's not 0 it's fine
> > - same with write - write 0 to pass it to the other side
> > - there is a data dependency so no need for barriers (except on dec alpha)
> > - no need for power of 2 limitations, you can make it any size you like
> > - easy to resize too
> > 
> > architecture (if not implementation) would be shared with ptr_ring so some
> > of the optimization ideas like batched updates could be lifted from there.
> > 
> > When I was building ptr_ring, any head/tail design underperformed storing
> > valid flag with data itself. YMMV.

I fully agree with MST here. This is also my experience.  I even
dropped my own Array-based Lock-Free (ALF) queue implementation[1] in
favor of ptr_ring. (Where I try to amortize this cost by bulking, but
this cause the queue to become non-wait-free)

[1] https://github.com/netoptimizer/prototype-kernel/blob/master/kernel/include/linux/alf_queue.h

> I think you are definitely right in that there are ways in which
> we can improve performance here. That said, the current queue
> performs slightly better than the previous one we had that was
> more or less a copy of one of your first virtio 1.1 proposals
> from little over a year ago. It had bidirectional queues and a
> valid flag in the descriptor itself. The reason we abandoned this
> was not poor performance (it was good), but a need to go to
> unidirectional queues. Maybe I should have only changed that
> aspect and kept the valid flag.
> 
> Anyway, I will take a look at ptr_ring and run some experiments
> along the lines of what you propose to get some
> numbers. Considering your experience with these kind of
> structures, you are likely right. I just need to convince
> myself :-).

When benchmarking, be careful that you don't measure the "wrong"
queue situation.  When doing this kind of "overload" benchmarking, you
will likely create a situation where the queue is always full (which
hopefully isn't a production use-case).  In the almost/always full
queue situation, using the element values to sync-on (like MST propose)
will still cause the cache-line bouncing (that we want to avoid).

MST explain and have addressed this situation for ptr_ring in:
 commit fb9de9704775 ("ptr_ring: batch ring zeroing")
 https://git.kernel.org/torvalds/c/fb9de9704775

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply

* [PATCH v2 net] net: fix deadlock while clearing neighbor proxy table
From: Wolfgang Bumiller @ 2018-04-12  8:46 UTC (permalink / raw)
  To: netdev; +Cc: David S. Miller, Hideaki YOSHIFUJI

When coming from ndisc_netdev_event() in net/ipv6/ndisc.c,
neigh_ifdown() is called with &nd_tbl, locking this while
clearing the proxy neighbor entries when eg. deleting an
interface. Calling the table's pndisc_destructor() with the
lock still held, however, can cause a deadlock: When a
multicast listener is available an IGMP packet of type
ICMPV6_MGM_REDUCTION may be sent out. When reaching
ip6_finish_output2(), if no neighbor entry for the target
address is found, __neigh_create() is called with &nd_tbl,
which it'll want to lock.

Move the elements into their own list, then unlock the table
and perform the destruction.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199289
Fixes: 6fd6ce2056de ("ipv6: Do not depend on rt->n in ip6_finish_output2().")
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
---
Changes to v1:
  * Renamed 'pneigh_ifdown' to 'pneigh_ifdown_and_unlock'.

Btw. I'm not actually sure how much sense the Fixes tag makes as the
commit itself isn't wrong, it just happens to be the most easily
triggerable code path there (and I can't definitively rule out others,
given that the "sending something over the network with the lock held
will deadlock" comment at the top of the file is also from the initial
commit I'd expect other backtraces to be possible from out of that
function) and the other affected lines are mostly the initial git
commit...

 net/core/neighbour.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 7b7a14abba28..e22d2aefbd78 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -55,7 +55,8 @@ static void neigh_timer_handler(struct timer_list *t);
 static void __neigh_notify(struct neighbour *n, int type, int flags,
 			   u32 pid);
 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+				    struct net_device *dev);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations neigh_stat_seq_fops;
@@ -291,8 +292,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 {
 	write_lock_bh(&tbl->lock);
 	neigh_flush_dev(tbl, dev);
-	pneigh_ifdown(tbl, dev);
-	write_unlock_bh(&tbl->lock);
+	pneigh_ifdown_and_unlock(tbl, dev);
 
 	del_timer_sync(&tbl->proxy_timer);
 	pneigh_queue_purge(&tbl->proxy_queue);
@@ -681,9 +681,10 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 	return -ENOENT;
 }
 
-static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+				    struct net_device *dev)
 {
-	struct pneigh_entry *n, **np;
+	struct pneigh_entry *n, **np, *freelist = NULL;
 	u32 h;
 
 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
@@ -691,16 +692,23 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 		while ((n = *np) != NULL) {
 			if (!dev || n->dev == dev) {
 				*np = n->next;
-				if (tbl->pdestructor)
-					tbl->pdestructor(n);
-				if (n->dev)
-					dev_put(n->dev);
-				kfree(n);
+				n->next = freelist;
+				freelist = n;
 				continue;
 			}
 			np = &n->next;
 		}
 	}
+	write_unlock_bh(&tbl->lock);
+	while ((n = freelist)) {
+		freelist = n->next;
+		n->next = NULL;
+		if (tbl->pdestructor)
+			tbl->pdestructor(n);
+		if (n->dev)
+			dev_put(n->dev);
+		kfree(n);
+	}
 	return -ENOENT;
 }
 
-- 
2.11.0

^ permalink raw reply related

* Re: net_dim() may use uninitialized data
From: Tal Gilboa @ 2018-04-12  8:32 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: netdev, Linux Kernel Mailing List
In-Reply-To: <CAMuHMdWovmqqgrOvtA=cyineJigwOtv-tGwz_PBVRFrgtryumw@mail.gmail.com>

On 4/5/2018 4:13 PM, Geert Uytterhoeven wrote:
> Hi Tal,
> 
> With gcc-4.1.2:
> 
>      drivers/net/ethernet/broadcom/bcmsysport.c: In function ‘bcm_sysport_poll’:
>      include/linux/net_dim.h:354: warning: ‘curr_stats.ppms’ may be
> used uninitialized in this function
>      include/linux/net_dim.h:354: warning: ‘curr_stats.bpms’ may be
> used uninitialized in this function
>      include/linux/net_dim.h:354: warning: ‘curr_stats.epms’ may be
> used uninitialized in this function
> 
> Indeed, ...
> 
> | static inline void net_dim_calc_stats(struct net_dim_sample *start,
> |                                       struct net_dim_sample *end,
> |                                       struct net_dim_stats *curr_stats)
> | {
> |         /* u32 holds up to 71 minutes, should be enough */
> |         u32 delta_us = ktime_us_delta(end->time, start->time);
> |         u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
> |         u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
> |                              start->byte_ctr);
> |
> |         if (!delta_us)
> |                 return;
> 
> ... if delta_us is zero, none of the below will be initialized ...
> 
> |         curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
> |         curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
> |         curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC,
> |                                         delta_us);
> | }
> |
> | static inline void net_dim(struct net_dim *dim,
> |                            struct net_dim_sample end_sample)
> | {
> |         struct net_dim_stats curr_stats;
> |         u16 nevents;
> |
> |         switch (dim->state) {
> |         case NET_DIM_MEASURE_IN_PROGRESS:
> |                 nevents = BIT_GAP(BITS_PER_TYPE(u16),
> |                                   end_sample.event_ctr,
> |                                   dim->start_sample.event_ctr);
> |                 if (nevents < NET_DIM_NEVENTS)
> |                         break;
> |                 net_dim_calc_stats(&dim->start_sample, &end_sample,
> |                                    &curr_stats);
> 
> ... in the output parameter curr_stats ...
> 
> |                 if (net_dim_decision(&curr_stats, dim)) {
> 
> ... and net_dim_decision will make some funky decisions based on
> uninitialized data.
> 
> What are the proper values to initialize curr_stats with?
> Alternatively, perhaps the call to net_dim_decision() should be made
> dependent on delta_us being non-zero?

First, thanks a lot for pointing this out. There are no valid values for 
initializing curr_stats. If we consider the most straightforward (all 
0s) this may result in a (big) negative delta between current and 
previous stats and a wrong decision. Any other value would make very 
little sense.
The case of !delta_us is an error flow (0 time passed or more probably 
issues when setting start and/or end times). I suggest adding a return 
value to net_dim_calc_stats() and abort the net_dim cycle if an error 
occurs.

> 
> |                         dim->state = NET_DIM_APPLY_NEW_PROFILE;
> |                         schedule_work(&dim->work);
> |                         break;
> |                 }
> |                 /* fall through */
> |         case NET_DIM_START_MEASURE:
> |                 dim->state = NET_DIM_MEASURE_IN_PROGRESS;
> |                 break;
> |         case NET_DIM_APPLY_NEW_PROFILE:
> |                 break;
> |         }
> | }
> 
> Gr{oetje,eeting}s,
> 
>                          Geert
> 

^ permalink raw reply

* Re: KMSAN: uninit-value in __netif_receive_skb_core
From: Dmitry Vyukov @ 2018-04-12  8:03 UTC (permalink / raw)
  To: syzbot
  Cc: bpoirier, David Miller, Eric Dumazet, Reshetova, Elena,
	Hans Liljestrand, Kees Cook, LKML, Mike Maloney, netdev,
	rami.rosen, syzkaller-bugs, Willem de Bruijn, makita.toshiaki
In-Reply-To: <94eb2c059ce01f643c0569a228ee@google.com>

On Thu, Apr 12, 2018 at 10:01 AM, syzbot
<syzbot+b202b7208664142954fa@syzkaller.appspotmail.com> wrote:
> Hello,
>
> syzbot hit the following crash on https://github.com/google/kmsan.git/master
> commit
> e2ab7e8abba47a2f2698216258e5d8727ae58717 (Fri Apr 6 16:24:31 2018 +0000)
> kmsan: temporarily disable visitAsmInstruction() to help syzbot
> syzbot dashboard link:
> https://syzkaller.appspot.com/bug?extid=b202b7208664142954fa
>
> Unfortunately, I don't have any reproducer for this crash yet.
> Raw console output:
> https://syzkaller.appspot.com/x/log.txt?id=5356516437655552
> Kernel config:
> https://syzkaller.appspot.com/x/.config?id=6627248707860932248
> compiler: clang version 7.0.0 (trunk 329391)

+Toshiaki as this seems to be related to the recent vlan tagging changes.
This also seems to be related to
https://groups.google.com/d/msg/syzkaller-bugs/VRH9NnUi2k0/90GYsAeRBgAJ

> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+b202b7208664142954fa@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> ==================================================================
> BUG: KMSAN: uninit-value in __read_once_size include/linux/compiler.h:197
> [inline]
> BUG: KMSAN: uninit-value in deliver_ptype_list_skb net/core/dev.c:1908
> [inline]
> BUG: KMSAN: uninit-value in __netif_receive_skb_core+0x4630/0x4a80
> net/core/dev.c:4545
> CPU: 0 PID: 5999 Comm: syz-executor3 Not tainted 4.16.0+ #82
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  <IRQ>
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x185/0x1d0 lib/dump_stack.c:53
>  kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
>  __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676
>  __read_once_size include/linux/compiler.h:197 [inline]
>  deliver_ptype_list_skb net/core/dev.c:1908 [inline]
>  __netif_receive_skb_core+0x4630/0x4a80 net/core/dev.c:4545
>  __netif_receive_skb net/core/dev.c:4627 [inline]
>  process_backlog+0x62d/0xe20 net/core/dev.c:5307
>  napi_poll net/core/dev.c:5705 [inline]
>  net_rx_action+0x7c1/0x1a70 net/core/dev.c:5771
>  __do_softirq+0x56d/0x93d kernel/softirq.c:285
>  do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1040
>  </IRQ>
>  do_softirq kernel/softirq.c:329 [inline]
>  __local_bh_enable_ip+0x114/0x140 kernel/softirq.c:182
>  local_bh_enable+0x36/0x40 include/linux/bottom_half.h:32
>  rcu_read_unlock_bh include/linux/rcupdate.h:726 [inline]
>  __dev_queue_xmit+0x2a31/0x2b60 net/core/dev.c:3584
>  dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590
>  packet_snd net/packet/af_packet.c:2944 [inline]
>  packet_sendmsg+0x7c57/0x8a10 net/packet/af_packet.c:2969
>  sock_sendmsg_nosec net/socket.c:630 [inline]
>  sock_sendmsg net/socket.c:640 [inline]
>  sock_write_iter+0x3b9/0x470 net/socket.c:909
>  do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
>  do_iter_write+0x30d/0xd40 fs/read_write.c:932
>  vfs_writev fs/read_write.c:977 [inline]
>  do_writev+0x3c9/0x830 fs/read_write.c:1012
>  SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
>  SyS_writev+0x56/0x80 fs/read_write.c:1082
>  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
>  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> RIP: 0033:0x455259
> RSP: 002b:00007fb53ede8c68 EFLAGS: 00000246 ORIG_RAX: 0000000000000014
> RAX: ffffffffffffffda RBX: 00007fb53ede96d4 RCX: 0000000000455259
> RDX: 0000000000000001 RSI: 00000000200010c0 RDI: 0000000000000013
> RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
> R13: 00000000000006cd R14: 00000000006fd3d8 R15: 0000000000000000
>
> Uninit was stored to memory at:
>  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
>  kmsan_save_stack mm/kmsan/kmsan.c:293 [inline]
>  kmsan_internal_chain_origin+0x12b/0x210 mm/kmsan/kmsan.c:684
>  __msan_chain_origin+0x69/0xc0 mm/kmsan/kmsan_instr.c:521
>  skb_vlan_untag+0x950/0xee0 include/linux/if_vlan.h:597
>  __netif_receive_skb_core+0x70a/0x4a80 net/core/dev.c:4460
>  __netif_receive_skb net/core/dev.c:4627 [inline]
>  process_backlog+0x62d/0xe20 net/core/dev.c:5307
>  napi_poll net/core/dev.c:5705 [inline]
>  net_rx_action+0x7c1/0x1a70 net/core/dev.c:5771
>  __do_softirq+0x56d/0x93d kernel/softirq.c:285
> Uninit was created at:
>  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
>  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
>  kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
>  kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
>  slab_post_alloc_hook mm/slab.h:445 [inline]
>  slab_alloc_node mm/slub.c:2737 [inline]
>  __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
>  __kmalloc_reserve net/core/skbuff.c:138 [inline]
>  __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
>  alloc_skb include/linux/skbuff.h:984 [inline]
>  alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
>  sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
>  packet_alloc_skb net/packet/af_packet.c:2803 [inline]
>  packet_snd net/packet/af_packet.c:2894 [inline]
>  packet_sendmsg+0x6444/0x8a10 net/packet/af_packet.c:2969
>  sock_sendmsg_nosec net/socket.c:630 [inline]
>  sock_sendmsg net/socket.c:640 [inline]
>  sock_write_iter+0x3b9/0x470 net/socket.c:909
>  do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
>  do_iter_write+0x30d/0xd40 fs/read_write.c:932
>  vfs_writev fs/read_write.c:977 [inline]
>  do_writev+0x3c9/0x830 fs/read_write.c:1012
>  SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
>  SyS_writev+0x56/0x80 fs/read_write.c:1082
>  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
>  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> ==================================================================
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line in the email body.
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/syzkaller-bugs/94eb2c059ce01f643c0569a228ee%40google.com.
> For more options, visit https://groups.google.com/d/optout.

^ permalink raw reply

* Re: KMSAN: uninit-value in netif_skb_features
From: Dmitry Vyukov @ 2018-04-12  8:03 UTC (permalink / raw)
  To: syzbot
  Cc: bpoirier, David Miller, Eric Dumazet, Reshetova, Elena, Kees Cook,
	LKML, Mike Maloney, netdev, rami.rosen, syzkaller-bugs,
	Willem de Bruijn, makita.toshiaki
In-Reply-To: <089e082d0cb81b67d10569a2283f@google.com>

On Thu, Apr 12, 2018 at 10:01 AM, syzbot
<syzbot+0bbe42c764feafa82c5a@syzkaller.appspotmail.com> wrote:
> Hello,
>
> syzbot hit the following crash on https://github.com/google/kmsan.git/master
> commit
> e2ab7e8abba47a2f2698216258e5d8727ae58717 (Fri Apr 6 16:24:31 2018 +0000)
> kmsan: temporarily disable visitAsmInstruction() to help syzbot
> syzbot dashboard link:
> https://syzkaller.appspot.com/bug?extid=0bbe42c764feafa82c5a
>
> So far this crash happened 30 times on
> https://github.com/google/kmsan.git/master.
> C reproducer: https://syzkaller.appspot.com/x/repro.c?id=4850744041668608
> syzkaller reproducer:
> https://syzkaller.appspot.com/x/repro.syz?id=6289386287136768
> Raw console output:
> https://syzkaller.appspot.com/x/log.txt?id=4577411249209344
> Kernel config:
> https://syzkaller.appspot.com/x/.config?id=6627248707860932248
> compiler: clang version 7.0.0 (trunk 329391)

+Toshiaki as this seems to be related to the recent vlan tagging changes.
This also seems to be related to
https://groups.google.com/d/msg/syzkaller-bugs/FNEavkB4QaM/efXl2AeRBgAJ



> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+0bbe42c764feafa82c5a@syzkaller.appspotmail.com
> It will help syzbot understand when the bug is fixed. See footer for
> details.
> If you forward the report, please keep this part and the footer.
>
> ==================================================================
> BUG: KMSAN: uninit-value in eth_type_vlan include/linux/if_vlan.h:283
> [inline]
> BUG: KMSAN: uninit-value in skb_vlan_tagged_multi
> include/linux/if_vlan.h:656 [inline]
> BUG: KMSAN: uninit-value in vlan_features_check include/linux/if_vlan.h:672
> [inline]
> BUG: KMSAN: uninit-value in dflt_features_check net/core/dev.c:2949 [inline]
> BUG: KMSAN: uninit-value in netif_skb_features+0xd1b/0xdc0
> net/core/dev.c:3009
> CPU: 1 PID: 3582 Comm: syzkaller435149 Not tainted 4.16.0+ #82
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
> Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:17 [inline]
>  dump_stack+0x185/0x1d0 lib/dump_stack.c:53
>  kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067
>  __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:676
>  eth_type_vlan include/linux/if_vlan.h:283 [inline]
>  skb_vlan_tagged_multi include/linux/if_vlan.h:656 [inline]
>  vlan_features_check include/linux/if_vlan.h:672 [inline]
>  dflt_features_check net/core/dev.c:2949 [inline]
>  netif_skb_features+0xd1b/0xdc0 net/core/dev.c:3009
>  validate_xmit_skb+0x89/0x1320 net/core/dev.c:3084
>  __dev_queue_xmit+0x1cb2/0x2b60 net/core/dev.c:3549
>  dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590
>  packet_snd net/packet/af_packet.c:2944 [inline]
>  packet_sendmsg+0x7c57/0x8a10 net/packet/af_packet.c:2969
>  sock_sendmsg_nosec net/socket.c:630 [inline]
>  sock_sendmsg net/socket.c:640 [inline]
>  sock_write_iter+0x3b9/0x470 net/socket.c:909
>  do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
>  do_iter_write+0x30d/0xd40 fs/read_write.c:932
>  vfs_writev fs/read_write.c:977 [inline]
>  do_writev+0x3c9/0x830 fs/read_write.c:1012
>  SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
>  SyS_writev+0x56/0x80 fs/read_write.c:1082
>  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
>  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> RIP: 0033:0x43ffa9
> RSP: 002b:00007fff2cff3948 EFLAGS: 00000217 ORIG_RAX: 0000000000000014
> RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043ffa9
> RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003
> RBP: 00000000006cb018 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000217 R12: 00000000004018d0
> R13: 0000000000401960 R14: 0000000000000000 R15: 0000000000000000
>
> Uninit was created at:
>  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline]
>  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188
>  kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314
>  kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321
>  slab_post_alloc_hook mm/slab.h:445 [inline]
>  slab_alloc_node mm/slub.c:2737 [inline]
>  __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369
>  __kmalloc_reserve net/core/skbuff.c:138 [inline]
>  __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206
>  alloc_skb include/linux/skbuff.h:984 [inline]
>  alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234
>  sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085
>  packet_alloc_skb net/packet/af_packet.c:2803 [inline]
>  packet_snd net/packet/af_packet.c:2894 [inline]
>  packet_sendmsg+0x6444/0x8a10 net/packet/af_packet.c:2969
>  sock_sendmsg_nosec net/socket.c:630 [inline]
>  sock_sendmsg net/socket.c:640 [inline]
>  sock_write_iter+0x3b9/0x470 net/socket.c:909
>  do_iter_readv_writev+0x7bb/0x970 include/linux/fs.h:1776
>  do_iter_write+0x30d/0xd40 fs/read_write.c:932
>  vfs_writev fs/read_write.c:977 [inline]
>  do_writev+0x3c9/0x830 fs/read_write.c:1012
>  SYSC_writev+0x9b/0xb0 fs/read_write.c:1085
>  SyS_writev+0x56/0x80 fs/read_write.c:1082
>  do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287
>  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
> ==================================================================
>
>
> ---
> This bug is generated by a dumb bot. It may contain errors.
> See https://goo.gl/tpsmEJ for details.
> Direct all questions to syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report.
> If you forgot to add the Reported-by tag, once the fix for this bug is
> merged
> into any tree, please reply to this email with:
> #syz fix: exact-commit-title
> If you want to test a patch for this bug, please reply with:
> #syz test: git://repo/address.git branch
> and provide the patch inline or as an attachment.
> To mark this as a duplicate of another syzbot report, please reply with:
> #syz dup: exact-subject-of-another-report
> If it's a one-off invalid bug report, please reply with:
> #syz invalid
> Note: if the crash happens again, it will cause creation of a new bug
> report.
> Note: all commands must start from beginning of the line in the email body.
>
> --
> You received this message because you are subscribed to the Google Groups
> "syzkaller-bugs" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to syzkaller-bugs+unsubscribe@googlegroups.com.
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/syzkaller-bugs/089e082d0cb81b67d10569a2283f%40google.com.
> For more options, visit https://groups.google.com/d/optout.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox