Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 4/5] can: implement companion-can driver
From: Mark Jonas @ 2018-06-05 18:43 UTC (permalink / raw)
  To: Wolfgang Grandegger, Marc Kleine-Budde
  Cc: linux-can, netdev, linux-kernel, hs, yi.zhu5, Mark Jonas
In-Reply-To: <1528224240-30786-1-git-send-email-mark.jonas@de.bosch.com>

From: Zhu Yi <yi.zhu5@cn.bosch.com>

The upper level companion-can driver provides SocketCAN interface to
userspace for communicate CAN messages with the companion processor.

Signed-off-by: Zhu Yi <yi.zhu5@cn.bosch.com>
Signed-off-by: Mark Jonas <mark.jonas@de.bosch.com>
---
 drivers/net/can/Kconfig         |   8 +
 drivers/net/can/Makefile        |   1 +
 drivers/net/can/companion-can.c | 694 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 703 insertions(+)
 create mode 100644 drivers/net/can/companion-can.c

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index ac4ff39..e403a7e 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -155,6 +155,14 @@ config PCH_CAN
 	  is an IOH for x86 embedded processor (Intel Atom E6xx series).
 	  This driver can access CAN bus.
 
+config COMPANION_CAN
+	tristate "Network device for companion communication (Bosch)"
+	depends on COMPANION_SPI
+	---help---
+	  The network device allows the userspace to use SocketCAN interface
+	  to communicate with the Bosch companion processor via the companion
+	  SPI driver.
+
 source "drivers/net/can/c_can/Kconfig"
 source "drivers/net/can/cc770/Kconfig"
 source "drivers/net/can/ifi_canfd/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 02b8ed7..a66a1f9 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -34,5 +34,6 @@ obj-$(CONFIG_CAN_SUN4I)		+= sun4i_can.o
 obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
 obj-$(CONFIG_CAN_XILINXCAN)	+= xilinx_can.o
 obj-$(CONFIG_PCH_CAN)		+= pch_can.o
+obj-$(CONFIG_COMPANION_CAN)	+= companion-can.o
 
 subdir-ccflags-$(CONFIG_CAN_DEBUG_DEVICES) += -DDEBUG
diff --git a/drivers/net/can/companion-can.c b/drivers/net/can/companion-can.c
new file mode 100644
index 0000000..5078640
--- /dev/null
+++ b/drivers/net/can/companion-can.c
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Companion upper level can network device
+ *
+ * Copyright (C) 2015-2018 Bosch Sicherheitssysteme GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/can/dev.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/companion.h>
+
+#define TX_QUEUE_DEPTH  16
+#define NUM_TX_QUEUES   8
+#define NUM_RX_QUEUES   1
+#define TX_ECHO_SKB_MAX NUM_TX_QUEUES * TX_QUEUE_DEPTH
+#define DRIVER_NAME     "bosch,companion-can"
+
+/**
+ * struct companion_can_priv - companion-can private data structure
+ * @can:     standard common CAN private data, must be first member
+ * @parent:  address of the associated parent device
+ * @dev:     address of the associated network device
+ * @port:    the companion CAN port number
+ * @tx_head: array of all tx queue head
+ * @tx_tail: arrat of all tx queue tail
+ */
+struct companion_can_priv {
+	struct can_priv	   can;
+	struct device     *parent;
+	struct net_device *dev;
+	u8                 port;
+	u8                 tx_head[NUM_TX_QUEUES];
+	u8                 tx_tail[NUM_TX_QUEUES];
+};
+
+/**
+ * companion_can_put_echo_skb() - put echo skb into ring buffer
+ * @priv: address of companion-can private data
+ * @prio: which CAN queue to put
+ * @skb:  address of the packet to put
+ */
+static void companion_can_put_echo_skb(struct companion_can_priv *priv,
+                                       u8                         prio,
+                                       struct sk_buff            *skb)
+{
+	u8 offset = prio * TX_QUEUE_DEPTH;
+	u8 index  = priv->tx_head[prio] % TX_QUEUE_DEPTH;
+	can_put_echo_skb(skb, priv->dev, offset + index);
+	priv->tx_head[prio]++;
+}
+
+/**
+ * companion_can_get_echo_skb() - get echo skb from ring buffer
+ * @priv: address of companion-can private data
+ * @prio: which CAN queue to get
+ */
+static u8 companion_can_get_echo_skb(struct companion_can_priv *priv,
+                                     u8                         prio)
+{
+	u8 offset, index, result = 0;
+
+	if (priv->tx_head[prio] != priv->tx_tail[prio]) {
+		offset = prio * TX_QUEUE_DEPTH;
+		index  = priv->tx_tail[prio] % TX_QUEUE_DEPTH;
+		result = can_get_echo_skb(priv->dev, offset + index);
+		priv->tx_tail[prio]++;
+	}
+	return result;
+}
+
+/**
+ * companion_can_free_echo_skb() - free echo skb from ring buffer
+ * @priv: address of companion-can private data
+ * @prio: which CAN queue to free
+ */
+static void companion_can_free_echo_skb(struct companion_can_priv *priv,
+                                        u8                         prio)
+{
+	u8 offset, index;
+
+	if (priv->tx_head[prio] != priv->tx_tail[prio]) {
+		offset = prio * TX_QUEUE_DEPTH;
+		index  = priv->tx_tail[prio] % TX_QUEUE_DEPTH;
+		can_free_echo_skb(priv->dev, offset + index);
+		priv->tx_tail[prio]++;
+	}
+}
+
+/**
+ * companion_can_set_bittiming() - set CAN bittiming
+ * @dev: address of the associated network device
+ */
+static int companion_can_set_bittiming(struct net_device *dev)
+{
+	struct companion_can_priv  *priv = netdev_priv(dev);
+	const struct can_bittiming *bt   = &priv->can.bittiming;
+	u32                         ctrl = priv->can.ctrlmode;
+	int                         err;
+
+	err = companion_do_set_can_bittiming(priv->parent, priv->port, bt);
+	if (err)
+		return err;
+
+	if (ctrl & CAN_CTRLMODE_LISTENONLY) {
+		err = companion_do_set_can_ctrlmode(priv->parent,
+		                                    priv->port,
+		                                    ctrl);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/**
+ * companion_can_set_mode() - set CAN mode
+ * @dev:  address of the associated network device
+ * @mode: the CAN mode to set
+ */
+static int companion_can_set_mode(struct net_device *dev,
+                                  enum can_mode      mode)
+{
+	struct companion_can_priv *priv = netdev_priv(dev);
+	int                        err;
+
+	switch (mode) {
+	case CAN_MODE_START:
+		err = companion_can_set_bittiming(dev);
+		if (err)
+			return err;
+		/* fall through */
+
+	case CAN_MODE_STOP:
+		err = companion_do_set_can_mode(priv->parent,
+		                                priv->port,
+		                                mode);
+		if (err)
+			return err;
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/**
+ * companion_can_get_berr_counter() - get CAN error counter
+ * @dev: address of the associated network device
+ * @bec: address of the CAN error counter to store
+ */
+static int companion_can_get_berr_counter(const struct net_device *dev,
+                                          struct can_berr_counter *bec)
+{
+	struct companion_can_priv *priv = netdev_priv(dev);
+	return companion_do_get_can_status(priv->parent, priv->port, bec);
+}
+
+/**
+ * companion_can_handle_state() - handle CAN state transition
+ * @dev:   address of the associated network device
+ * @cf:    address of the CAN frame to store CAN state
+ * @bec:   address of the CAN error counter
+ * @state: the companion CAN state
+ */
+static void companion_can_handle_state(struct net_device       *dev,
+                                       struct can_frame        *cf,
+                                       struct can_berr_counter *bec,
+                                       u8                       state)
+{
+	struct companion_can_priv *priv      = netdev_priv(dev);
+	enum   can_state           new_state = CAN_STATE_ERROR_ACTIVE;
+	enum   can_state           rx_state  = CAN_STATE_ERROR_ACTIVE;
+	enum   can_state           tx_state  = CAN_STATE_ERROR_ACTIVE;
+
+	if (state & COMPANION_CAN_STATE_BUS_OFF) {
+		new_state = CAN_STATE_BUS_OFF;
+		rx_state  = bec->rxerr >= bec->txerr ? new_state : rx_state;
+		tx_state  = bec->txerr >= bec->rxerr ? new_state : tx_state;
+	} else if (state & COMPANION_CAN_STATE_PASSIVE) {
+		new_state = CAN_STATE_ERROR_PASSIVE;
+		rx_state  = bec->rxerr > 127 ? new_state : rx_state;
+		tx_state  = bec->txerr > 127 ? new_state : tx_state;
+	} else if (state & COMPANION_CAN_STATE_WARNING) {
+		new_state = CAN_STATE_ERROR_WARNING;
+		rx_state  = bec->rxerr >= bec->txerr ? new_state : rx_state;
+		tx_state  = bec->txerr >= bec->rxerr ? new_state : tx_state;
+	}
+
+	if (new_state != priv->can.state) {
+		can_change_state(dev, cf, tx_state, rx_state);
+
+		if (new_state == CAN_STATE_BUS_OFF)
+			can_bus_off(dev);
+	}
+}
+
+/**
+ * companion_can_handle_error() - handle CAN error
+ * @dev:  address of the associated network device
+ * @cf:   address of the CAN frame to store CAN error
+ * @code: the companion CAN error code
+ */
+static void companion_can_handle_error(struct net_device *dev,
+                                       struct can_frame  *cf,
+                                       u8                 code)
+{
+	struct companion_can_priv *priv = netdev_priv(dev);
+
+	if (code & COMPANION_CAN_ERROR_RXOV) {
+		cf->can_id  |= CAN_ERR_CRTL;
+		cf->data[1] |= CAN_ERR_CRTL_RX_OVERFLOW;
+		dev->stats.rx_over_errors++;
+		dev->stats.rx_errors++;
+	}
+
+	if (code & (COMPANION_CAN_ERROR_STUFF |
+	            COMPANION_CAN_ERROR_FORM  |
+	            COMPANION_CAN_ERROR_ACK   |
+	            COMPANION_CAN_ERROR_BIT1  |
+	            COMPANION_CAN_ERROR_BIT0  |
+	            COMPANION_CAN_ERROR_CRC))
+	{
+		cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
+
+		if (code & COMPANION_CAN_ERROR_STUFF) {
+			cf->data[2] |= CAN_ERR_PROT_STUFF;
+			dev->stats.rx_errors++;
+		}
+
+		if (code & COMPANION_CAN_ERROR_FORM) {
+			cf->data[2] |= CAN_ERR_PROT_FORM;
+			dev->stats.rx_errors++;
+		}
+
+		if (code & COMPANION_CAN_ERROR_ACK) {
+			cf->can_id  |= CAN_ERR_ACK;
+			cf->data[3]  = CAN_ERR_PROT_LOC_ACK;
+			dev->stats.tx_errors++;
+		}
+
+		if (code & COMPANION_CAN_ERROR_BIT1) {
+			cf->data[2] |= CAN_ERR_PROT_BIT1;
+			dev->stats.tx_errors++;
+		}
+
+		if (code & COMPANION_CAN_ERROR_BIT0) {
+			cf->data[2] |= CAN_ERR_PROT_BIT0;
+			dev->stats.tx_errors++;
+		}
+
+		if (code & COMPANION_CAN_ERROR_CRC) {
+			cf->data[2] |= CAN_ERR_PROT_BIT;
+			cf->data[3]  = CAN_ERR_PROT_LOC_CRC_SEQ;
+			dev->stats.rx_errors++;
+		}
+
+		priv->can.can_stats.bus_error++;
+	}
+}
+
+/**
+ * companion_can_poll_err() - poll CAN error packet from companion
+ * @dev: address of the associated network device
+ */
+static bool companion_can_poll_err(struct net_device *dev)
+{
+	struct companion_can_priv *priv = netdev_priv(dev);
+	struct can_berr_counter    bec;
+	u8                         state;
+	u8                         code;
+	struct sk_buff            *skb;
+	struct can_frame          *cf;
+
+	if (companion_do_can_err(priv->parent,
+	                         priv->port,
+	                         &bec,
+	                         &state,
+	                         &code) != 0)
+		return false;
+
+	skb = alloc_can_err_skb(dev, &cf);
+	if (!skb) {
+		dev_err(&dev->dev, "cannot alloc err skb\n");
+		return false;
+	}
+
+	companion_can_handle_state(dev, cf, &bec, state);
+	companion_can_handle_error(dev, cf, code);
+
+	dev->stats.rx_bytes += cf->can_dlc;
+	dev->stats.rx_packets++;
+	netif_rx(skb);
+	return true;
+}
+
+/**
+ * companion_can_poll_data() - poll CAN data packet from companion
+ * @dev: address of the associated network device
+ */
+static bool companion_can_poll_data(struct net_device *dev)
+{
+	struct companion_can_priv *priv = netdev_priv(dev);
+	struct sk_buff            *skb;
+	struct can_frame          *cf;
+
+	skb = alloc_can_skb(dev, &cf);
+	if (!skb) {
+		dev_err(&dev->dev, "cannot alloc rx skb\n");
+		dev->stats.rx_dropped++;
+		return false;
+	}
+
+	if (companion_do_can_rx(priv->parent, priv->port, cf) != 0) {
+		dev_kfree_skb_any(skb);
+		return false;
+	}
+
+	dev->stats.rx_bytes += cf->can_dlc;
+	dev->stats.rx_packets++;
+	netif_rx(skb);
+	can_led_event(dev, CAN_LED_EVENT_RX);
+	return true;
+}
+
+/**
+ * companion_can_on_tx_done() - CAN tx done callback
+ * @data:          address of user supplied callback data
+ * @prio:          which CAN queue is done
+ * @lost_seq_sync: flag indicate lost sequence happened
+ * @success:       flag indicate last send is succeed or not
+ */
+static void companion_can_on_tx_done(void *data,
+                                     u8    prio,
+                                     bool  lost_seq_sync,
+                                     bool  success)
+{
+	struct companion_can_priv *priv  = data;
+	struct net_device         *dev   = priv->dev;
+	struct net_device_stats   *stats = &dev->stats;
+	int                        err;
+
+	if (success) {
+		stats->tx_bytes += companion_can_get_echo_skb(priv, prio);
+		stats->tx_packets++;
+		can_led_event(dev, CAN_LED_EVENT_TX);
+	} else {
+		companion_can_free_echo_skb(priv, prio);
+		dev_err(&dev->dev, "on_tx_done(%d) failed\n", prio);
+	}
+
+	/*TODO: what else action should take in case lost sequence?*/
+	if (lost_seq_sync)
+		dev_err(&dev->dev, "txq[%d] lost sequence sync\n", prio);
+
+	err = companion_do_can_stop_tx_timer(priv->parent, priv->port, prio);
+	if (err)
+		dev_err(&dev->dev,
+		        "stop txq[%d] tx timer failed: %d\n",
+		        prio, err);
+
+	netif_wake_subqueue(dev, prio);
+}
+
+/**
+ * companion_can_on_rx_done() - CAN rx done callback
+ * @data: address of user supplied callback data
+ */
+static void companion_can_on_rx_done(void *data)
+{
+	struct companion_can_priv *priv = data;
+	while (companion_can_poll_data(priv->dev));
+}
+
+/**
+ * companion_can_on_error() - CAN error callback
+ * @data: address of user supplied callback data
+ */
+static void companion_can_on_error(void *data)
+{
+	struct companion_can_priv *priv = data;
+	while (companion_can_poll_err(priv->dev));
+}
+
+/**
+ * companion_can_on_tx_timeout() - CAN tx timeout callback
+ * @data: address of user supplied callback data
+ * @prio: which CAN queue tx timed out
+ */
+static void companion_can_on_tx_timeout(void *data, u8 prio)
+{
+	struct companion_can_priv *priv          = data;
+	bool                       lost_txq_sync = false;
+	int                        err;
+
+	err = companion_do_get_can_txq_status(priv->parent,
+	                                      priv->port,
+	                                      prio,
+	                                      &lost_txq_sync);
+	if (err) {
+		dev_err(&priv->dev->dev,
+		        "get can txq[%d] status failed: %d\n", prio, err);
+
+		if (err != -EINVAL)
+			companion_do_can_start_tx_timer(priv->parent,
+			                                priv->port,
+			                                prio);
+		return;
+	}
+
+	if (lost_txq_sync) {
+		dev_err(&priv->dev->dev,
+		        "txq[%d] out of sync, restart data flow\n", prio);
+		companion_can_free_echo_skb(priv, prio);
+		netif_wake_subqueue(priv->dev, prio);
+	} else {
+		dev_err(&priv->dev->dev,
+		        "txq[%d] is sync'd, but no ack, wait again\n", prio);
+		companion_do_can_start_tx_timer(priv->parent, priv->port, prio);
+	}
+}
+
+static struct companion_can_ops companion_can_can_ops = {
+	.on_tx_done    = companion_can_on_tx_done,
+	.on_rx_done    = companion_can_on_rx_done,
+	.on_error      = companion_can_on_error,
+	.on_tx_timeout = companion_can_on_tx_timeout,
+};
+
+/**
+ * companion_can_open() - ndo_open callback
+ * @dev: address of the associated network device
+ */
+static int companion_can_open(struct net_device *dev)
+{
+	struct companion_can_priv *priv      = netdev_priv(dev);
+	bool                       has_space = false;
+	int                        err, i;
+
+	err = companion_can_ops_register(priv->parent,
+	                                 priv->port,
+	                                 &companion_can_can_ops,
+	                                 priv);
+	if (err) {
+		dev_err(&dev->dev,
+			"companion_can_ops_register() failed: %d\n", err);
+		goto out;
+	}
+
+	err = companion_can_set_mode(dev, CAN_MODE_START);
+	if (err) {
+		dev_err(&dev->dev,
+		        "companion_can_set_mode() failed: %d\n", err);
+		goto out_register;
+	}
+
+	err = companion_do_get_can_txq_status_all(priv->parent, priv->port);
+	if (err) {
+		dev_err(&dev->dev,
+		        "companion_do_get_can_txq_status_all() failed: %d\n",
+		        err);
+		goto out_mode;
+	}
+
+	err = open_candev(dev);
+	if (err) {
+		dev_err(&dev->dev, "open_candev() failed: %d\n", err);
+		goto out_mode;
+	}
+
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	can_led_event(dev, CAN_LED_EVENT_OPEN);
+
+	/*TODO: start all here or start depends on queue space?*/
+	for (i = 0; i < NUM_TX_QUEUES; ++i) {
+		err = companion_do_can_txq_has_space(priv->parent,
+		                                     priv->port,
+		                                     i,
+		                                     &has_space);
+
+		if (!err && has_space) {
+			netif_tx_start_queue(netdev_get_tx_queue(dev, i));
+		} else {
+			netif_tx_stop_queue(netdev_get_tx_queue(dev, i));
+			dev_err(&dev->dev, "txq[%d] is not started\n", i);
+		}
+	}
+
+	return 0;
+
+out_mode:
+	companion_can_set_mode(dev, CAN_MODE_STOP);
+out_register:
+	companion_can_ops_unregister(priv->parent, priv->port);
+out:
+	return err;
+}
+
+/**
+ * companion_can_release() - ndo_close callback
+ * @dev: address of the associated network device
+ */
+static int companion_can_release(struct net_device *dev)
+{
+	struct companion_can_priv *priv = netdev_priv(dev);
+	int                        result;
+
+	netif_tx_stop_all_queues(dev);
+	can_led_event(dev, CAN_LED_EVENT_STOP);
+	priv->can.state = CAN_STATE_STOPPED;
+	close_candev(dev);
+	result = companion_can_set_mode(dev, CAN_MODE_STOP);
+	companion_can_ops_unregister(priv->parent, priv->port);
+	return result;
+}
+
+/**
+ * companion_can_start_xmit() - ndo_start_xmit callback
+ * @skb: address of the packet to send
+ * @dev: address of the associated network device
+ */
+static int companion_can_start_xmit(struct sk_buff    *skb,
+                                    struct net_device *dev)
+{
+	struct companion_can_priv *priv    = netdev_priv(dev);
+	struct can_frame          *cf      = (struct can_frame*)skb->data;
+	u16                        prio    = skb_get_queue_mapping(skb);
+	bool                       is_full = false;
+	int                        err;
+
+	if (can_dropped_invalid_skb(dev, skb)) {
+		dev_err(&dev->dev, "dropped invalid skb on txq[%d]\n", prio);
+		return NETDEV_TX_OK;
+	}
+
+	err = companion_do_can_tx(priv->parent, priv->port, prio, cf);
+	if (err) {
+		dev_err(&dev->dev, "dropped packet on txq[%d]\n", prio);
+		dev_kfree_skb_any(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	err = companion_do_can_txq_is_full(priv->parent,
+	                                   priv->port,
+	                                   prio,
+	                                   &is_full);
+	if (!err && is_full) {
+		netif_stop_subqueue(dev, prio);
+		err = companion_do_can_start_tx_timer(priv->parent,
+		                                      priv->port,
+		                                      prio);
+		if (err)
+			dev_err(&dev->dev,
+			        "start txq[%d] tx timer failed: %d\n",
+			        prio, err);
+	}
+
+	companion_can_put_echo_skb(priv, prio, skb);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops companion_can_netdev_ops = {
+	.ndo_open       = companion_can_open,
+	.ndo_stop       = companion_can_release,
+	.ndo_start_xmit = companion_can_start_xmit,
+};
+
+static const struct of_device_id companion_can_of_match[] = {
+	{ .compatible = DRIVER_NAME, .data = NULL, },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, companion_can_of_match);
+
+static const struct can_bittiming_const companion_can_bittiming_const = {
+	.name      = "bosch,companion",
+	.tseg1_min = 2,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max   = 4,
+	.brp_min   = 1,
+	.brp_max   = 1024,
+	.brp_inc   = 1,
+};
+
+/**
+ * companion_can_probe() - probe callback
+ * @pdev: address of the platform device
+ */
+static int companion_can_probe(struct platform_device *pdev)
+{
+	struct device_node        *node = pdev->dev.of_node;
+	struct net_device         *dev;
+	struct companion_can_priv *priv;
+	u32                        port, freq;
+	int                        err;
+
+	if (!node) {
+		dev_err(&pdev->dev, "no device tree data\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(node, "port", &port)) {
+		dev_err(&pdev->dev, "no port property\n");
+		return -ENODEV;
+	}
+
+	if ((port != 0) && (port != 1)) {
+		dev_err(&pdev->dev,
+		        "invalid port %d, valid range is [0,1]\n", port);
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(node, "clock-frequency", &freq)) {
+		dev_err(&pdev->dev, "no clock-frequency property\n");
+		return -ENODEV;
+	}
+
+	if (!pdev->dev.parent) {
+		dev_err(&pdev->dev, "no parent device\n");
+		return -ENODEV;
+	}
+
+	dev = alloc_candev_mqs(sizeof(*priv),
+	                       TX_ECHO_SKB_MAX,
+	                       NUM_TX_QUEUES,
+	                       NUM_RX_QUEUES);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->netdev_ops               = &companion_can_netdev_ops;
+	dev->flags                   |= IFF_ECHO;
+	dev->real_num_tx_queues       = NUM_TX_QUEUES;
+
+	priv                          = netdev_priv(dev);
+	priv->can.clock.freq          = freq;
+	priv->can.bittiming_const     = &companion_can_bittiming_const;
+	priv->can.do_set_mode         = companion_can_set_mode;
+	priv->can.do_get_berr_counter = companion_can_get_berr_counter;
+	priv->can.ctrlmode_supported  = CAN_CTRLMODE_LISTENONLY |
+	                                CAN_CTRLMODE_BERR_REPORTING;
+	priv->parent                  = pdev->dev.parent;
+	priv->dev                     = dev;
+	priv->port                    = port;
+
+	platform_set_drvdata(pdev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	err = register_candev(dev);
+	if (err) {
+		dev_err(&pdev->dev, "register_candev() failed: %d\n", err);
+		free_candev(dev);
+		return err;
+	}
+
+	devm_can_led_init(dev);
+	return 0;
+}
+
+/**
+ * companion_can_remove() - remove callback
+ * @pdev: address of the platform device
+ */
+static int companion_can_remove(struct platform_device *pdev)
+{
+	struct net_device *dev = platform_get_drvdata(pdev);
+
+	unregister_candev(dev);
+	free_candev(dev);
+	return 0;
+}
+
+static struct platform_driver companion_can_driver = {
+	.driver = {
+		.name           = DRIVER_NAME,
+		.owner          = THIS_MODULE,
+		.of_match_table = of_match_ptr(companion_can_of_match),
+	},
+	.probe  = companion_can_probe,
+	.remove = companion_can_remove,
+};
+module_platform_driver(companion_can_driver);
+
+MODULE_AUTHOR("Zhu Yi <yi.zhu5@cn.bosch.com>");
+MODULE_DESCRIPTION("Companion upper level can network device");
+MODULE_LICENSE("GPL v2");
-- 
2.7.4

^ permalink raw reply related

* [PATCH 5/5] spi,can,char: add companion DT binding documentation
From: Mark Jonas @ 2018-06-05 18:44 UTC (permalink / raw)
  To: Wolfgang Grandegger, Marc Kleine-Budde
  Cc: linux-can, netdev, linux-kernel, hs, yi.zhu5, Mark Jonas
In-Reply-To: <1528224240-30786-1-git-send-email-mark.jonas@de.bosch.com>

From: Zhu Yi <yi.zhu5@cn.bosch.com>

Signed-off-by: Zhu Yi <yi.zhu5@cn.bosch.com>
Signed-off-by: Mark Jonas <mark.jonas@de.bosch.com>
---
 .../devicetree/bindings/spi/bosch,companion.txt    | 82 ++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/spi/bosch,companion.txt

diff --git a/Documentation/devicetree/bindings/spi/bosch,companion.txt b/Documentation/devicetree/bindings/spi/bosch,companion.txt
new file mode 100644
index 0000000..5ded325
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/bosch,companion.txt
@@ -0,0 +1,82 @@
+Bosch Companion SPI slave device
+
+The functionality bases on an external peripheral chip named Companion.
+It offers two CAN interfaces, each has 8 prioritized transmit FIFOs as
+well as one receive FIFO. Besides CAN, undisclosed additional functions
+can be accessed through the char device.
+
+A standard SPI interface with two additional lines for flow control is
+used. The Companion chip is the SPI slave.
+
+The driver suite consists of three separate drivers. The following
+diagram illustrates the dependencies in layers.
+
+           /dev/companion       SocketCAN                User Space
+-------------------------------------------------------------------
+         +----------------+ +---------------+
+         | companion-char | | companion-can |
+         +----------------+ +---------------+
+         +----------------------------------+
+         |          companion-spi           |
+         +----------------------------------+
+         +----------------------------------+
+         |     standard SPI subsystem       |
+         +----------------------------------+          Linux Kernel
+-------------------------------------------------------------------
+               | | | |      | |                            Hardware
+            CS-+ | | |      | +-BUSY
+            CLK--+ | |      +---REQUEST
+            MOSI---+ |
+            MISO-----+
+
+Required properties:
+
+- compatible       : must be "bosch,companion-spi"
+- interrupt-parent : the phandle of the GPIO controller
+- interrupts       : (GPIO) interrupt to which 'request-gpios' is
+                     connected to
+- request-gpios    : GPIO pin to request SPI master to receive data
+- busy-gpios       : GPIO pin to indicate SPI slave is busy
+- cs-gpios         : GPIO pin to select SPI slave
+
+Optional properties:
+
+The controller supports at most 2 CAN and 1 char device subnodes. When
+optionally specify the subnodes, the following properties are required:
+
+- CAN subnode
+  - compatible     : must be "bosch,companion-can"
+  - clock-frequency: CAN device clock in Hz
+  - port           : must be 0 or 1
+
+- Char device subnode
+  - compatible     : must be "bosch,companion-char"
+
+Example:
+
+&ecspi1 {
+	companion-spi@0 {
+		compatible = "bosch,companion-spi";
+		interrupt-parent = <&gpio1>;
+		interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+		request-gpios = <&gpio1 26 GPIO_ACTIVE_LOW>;
+		busy-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
+		cs-gpios = <&gpio4 9 GPIO_ACTIVE_LOW>;
+
+		companion-can0 {
+			compatible = "bosch,companion-can";
+			clock-frequency = <28000000>;
+			port = <0>;
+		};
+
+		companion-can1 {
+			compatible = "bosch,companion-can";
+			clock-frequency = <28000000>;
+			port = <1>;
+		};
+
+		companion-char {
+			compatible = "bosch,companion-char";
+		};
+	};
+};
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH V4] mlx4_core: allocate ICM memory in page size chunks
From: Qing Huang @ 2018-06-05 18:51 UTC (permalink / raw)
  To: Vlastimil Babka, Michal Hocko
  Cc: Eric Dumazet, David Miller, tariqt, haakon.bugge, yanjun.zhu,
	netdev, linux-rdma, linux-kernel, gi-oh.kim,
	santosh.shilimkar@oracle.com, rama nichanamatlu
In-Reply-To: <c67934a5-8858-0de7-a7b5-532d14b5c881@suse.cz>



On 6/4/2018 5:40 AM, Vlastimil Babka wrote:
> On 06/04/2018 08:27 AM, Michal Hocko wrote:
>> On Fri 01-06-18 15:05:26, Qing Huang wrote:
>>>
>>> On 6/1/2018 12:31 AM, Michal Hocko wrote:
>>>> On Thu 31-05-18 19:04:46, Qing Huang wrote:
>>>>> On 5/31/2018 2:10 AM, Michal Hocko wrote:
>>>>>> On Thu 31-05-18 10:55:32, Michal Hocko wrote:
>>>>>>> On Thu 31-05-18 04:35:31, Eric Dumazet wrote:
>>>>>> [...]
>>>>>>>> I merely copied/pasted from alloc_skb_with_frags() :/
>>>>>>> I will have a look at it. Thanks!
>>>>>> OK, so this is an example of an incremental development ;).
>>>>>>
>>>>>> __GFP_NORETRY was added by ed98df3361f0 ("net: use __GFP_NORETRY for
>>>>>> high order allocations") to prevent from OOM killer. Yet this was
>>>>>> not enough because fb05e7a89f50 ("net: don't wait for order-3 page
>>>>>> allocation") didn't want an excessive reclaim for non-costly orders
>>>>>> so it made it completely NOWAIT while it preserved __GFP_NORETRY in
>>>>>> place which is now redundant. Should I send a patch?
>>>>>>
>>>>> Just curious, how about GFP_ATOMIC flag? Would it work in a similar fashion?
>>>>> We experimented
>>>>> with it a bit in the past but it seemed to cause other issue in our tests.
>>>>> :-)
>>>> GFP_ATOMIC is a non-sleeping (aka no reclaim) context with an access to
>>>> memory reserves. So the risk is that you deplete those reserves and
>>>> cause issues to other subsystems which need them as well.
>>>>
>>>>> By the way, we didn't encounter any OOM killer events. It seemed that the
>>>>> mlx4_alloc_icm() triggered slowpath.
>>>>> We still had about 2GB free memory while it was highly fragmented.
>>>> The compaction was able to make a reasonable forward progress for you.
>>>> But considering mlx4_alloc_icm is called with GFP_KERNEL resp. GFP_HIGHUSER
>>>> then the OOM killer is clearly possible as long as the order is lower
>>>> than 4.
>>> The allocation was 256KB so the order was much higher than 4. The compaction
>>> seemed to be the root
>>> cause for our problem. It took too long to finish its work while putting
>>> mlx4_alloc_icm to sleep in a heavily
>>> fragmented memory situation . Will NORETRY flag avoid the compaction ops and
>>> fail the 256KB allocation
>>> immediately so mlx4_alloc_icm can enter adjustable lower order allocation
>>> code path quickly?
>> Costly orders should only perform a light compaction attempt unless
>> __GFP_RETRY_MAY_FAIL is used IIRC. CCing Vlastimil. So __GFP_NORETRY
>> shouldn't make any difference.
> It's a bit more complicated. Costly allocations will try the light
> compaction attempt first, even before reclaim. This is followed by
> reclaim and a more costly compaction attempt. With __GFP_NORETRY, the
> second compaction attempt is also only the light one, so the flag does
> make a difference here.

Thanks for the clarification!

Looks like our production kernel is kinda old, neither 
__GFP_DIRECT_RECLAIM nor __GFP_NORETRY
has been used in __alloc_pages_slowpath() in our kernel.

^ permalink raw reply

* Re: [PATCH net] failover: eliminate callback hell
From: Stephen Hemminger @ 2018-06-05 18:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: kys, haiyangz, davem, sridhar.samudrala, netdev,
	Stephen Hemminger
In-Reply-To: <20180605211927-mutt-send-email-mst@kernel.org>

On Tue, 5 Jun 2018 21:35:26 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> Thanks, I think this is nice patch but I wonder whether it can be split
> up somewhat. Not all of it is uncontroversial.

I started that way, but then I was fixing code that was later deleted.
The big change was eliminating the callbacks.

> 
> On Mon, Jun 04, 2018 at 08:42:31PM -0700, Stephen Hemminger wrote:
> >   * The matching of secondary device to primary device policy
> >     is up to the network device. Both net_failover and netvsc
> >     will use MAC for now but can change separately.  
> 
> I actually suspect both will change to a serial number
> down the road.
> 
> >   * The match policy is only used during initial discovery; after
> >     that the secondary device knows what the upper device is because
> >     of the parent/child relationship; no searching is required.  
> 
> That would obviously be an improvement - does it have to be tied with
> rest of changes?

This was not possible with the version of the common code that
is in net now.

> 
> >   * Now, netvsc and net_failover use the same delayed work type
> >     mechanism for setup. Previously, net_failover code was triggering off
> >     name change but a similar policy was rejected for netvsc.
> >     "what is good for the goose is good for the gander"  
> 
> I don't really understand what you are saying here.  I think the delayed
> hack is kind of ugly and seems racy.  Current failover code was rejected
> by whom?  Why is new one good and for whom?  Did you want to do a name
> change in netvsc but it was rejected? Could you clarify please?

See:
   https://patchwork.ozlabs.org/patch/851711/

> 
> >   * The net_failover private device info 'struct net_failover_info'
> >     should have been private to the driver file, not a visible
> >     API.
> > 
> >   * The net_failover device should use SET_NETDEV_DEV
> >     that is intended only for physical devices not virtual devices.  
> 
> You mean should not.

Yes. Virtual device should not set device parent.

> 
> >   * No point in having DocBook style comments on a driver file.
> >     They only make sense on an external exposed API.
> > 
> >   * net_failover only supports Ethernet, so use ether_addr_copy.  
> 
> It is since you need to know about all the things you need to copy, and
> because of mac matching.  But it isn't too much effort to add more
> transports and I don't see value in going in the reverse direction and
> making it more ethernet specific that it already is.

Sure, then do memcpy base on addr_len

> 
> >   * Set permanent and current address of net_failover device
> >     to match the primary.
> > 
> >   * Carrier should be marked off before registering device
> >     the net_failover device.  
> 
> Are above two bugfixes?

Yes.

> 
> >   * Use netdev_XXX for log messages, in net_failover (not dev_xxx)
> > 
> >   * Since failover infrastructure is about linking devices just
> >     use RTNL no need for other locking in init and teardown.
> > 
> >   * Don't bother with ERR_PTR() style return if only possible
> >     return is success or no memory.
> > 
> >   * As much as possible, the terms master and slave should be avoided
> >     because of their cultural connotations.  
> 
> Also for consistency, failover is calling these primary and standby now.

Good, let's standardize on that. 

> 
> > Note; this code has been tested on Hyper-V
> > but is compile tested only on virtio.
> > 
> > Fixes: 30c8bd5aa8b2 ("net: Introduce generic failover module")
> > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> > ---
> > 
> > Although this patch needs to go into 4.18 (linux-net),  
> 
> I'd rather we focused on fixing bugs in 4.18, and left refactoring to
> 4.19.
>

Either we fix or revert the current code in 4.18.
Sorry, I am not having callback hell code in any vendor or upstream kernel.

^ permalink raw reply

* Re: [PATCH net] net: sched: cls: Fix offloading when ingress dev is vxlan
From: Jakub Kicinski @ 2018-06-05 18:57 UTC (permalink / raw)
  To: Paul Blakey
  Cc: Jiri Pirko, Cong Wang, Jamal Hadi Salim, David Miller, netdev,
	Yevgeny Kliteynik, Roi Dayan, Shahar Klein, Mark Bloch,
	Or Gerlitz
In-Reply-To: <1528185843-18645-1-git-send-email-paulb@mellanox.com>

On Tue,  5 Jun 2018 11:04:03 +0300, Paul Blakey wrote:
> When using a vxlan device as the ingress dev, we count it as a
> "no offload dev", so when such a rule comes and err stop is true,
> we fail early and don't try the egdev route which can offload it
> through the egress device.
> 
> Fix that by not calling the block offload if one of the devices
> attached to it is not offload capable, but make sure egress on such case
> is capable instead.
> 
> Fixes: caa7260156eb ("net: sched: keep track of offloaded filters [..]")
> Reviewed-by: Roi Dayan <roid@mellanox.com>
> Acked-by: Jiri Pirko <jiri@mellanox.com>
> Signed-off-by: Paul Blakey <paulb@mellanox.com>

Very poor commit message.  What you're doing is re-enabling skip_sw
filters on tunnel devices which semantically make no sense and
shouldn't have been allowed in the first place.

This will breaks block sharing between tunnels and HW netdevs (because
you skip the tcf_block_cb_call() completely).  The entire egdev idea
remains badly broken accepting filters like this:

# tc filter add dev vxlan0 ingress \
	matchall action skip_sw \
		mirred egress redirect dev lo \
		mirred egress redirect dev sw1np0

Do we still care about correctness and not breaking backward
compatibility?

^ permalink raw reply

* Re: [PATCH net] net: sched: cls: Fix offloading when ingress dev is vxlan
From: Jakub Kicinski @ 2018-06-05 18:59 UTC (permalink / raw)
  To: Paul Blakey
  Cc: Jiri Pirko, Cong Wang, Jamal Hadi Salim, David Miller, netdev,
	Yevgeny Kliteynik, Roi Dayan, Shahar Klein, Mark Bloch,
	Or Gerlitz
In-Reply-To: <20180605115747.0e939ac4@cakuba.netronome.com>

On Tue, 5 Jun 2018 11:57:47 -0700, Jakub Kicinski wrote:
> On Tue,  5 Jun 2018 11:04:03 +0300, Paul Blakey wrote:
> > When using a vxlan device as the ingress dev, we count it as a
> > "no offload dev", so when such a rule comes and err stop is true,
> > we fail early and don't try the egdev route which can offload it
> > through the egress device.
> > 
> > Fix that by not calling the block offload if one of the devices
> > attached to it is not offload capable, but make sure egress on such case
> > is capable instead.
> > 
> > Fixes: caa7260156eb ("net: sched: keep track of offloaded filters [..]")
> > Reviewed-by: Roi Dayan <roid@mellanox.com>
> > Acked-by: Jiri Pirko <jiri@mellanox.com>
> > Signed-off-by: Paul Blakey <paulb@mellanox.com>  
> 
> Very poor commit message.  What you're doing is re-enabling skip_sw
> filters on tunnel devices which semantically make no sense and
> shouldn't have been allowed in the first place.
> 
> This will breaks block sharing between tunnels and HW netdevs (because
> you skip the tcf_block_cb_call() completely).  The entire egdev idea
> remains badly broken accepting filters like this:
> 
> # tc filter add dev vxlan0 ingress \
> 	matchall action skip_sw \
> 		mirred egress redirect dev lo \
> 		mirred egress redirect dev sw1np0

For above we probably need something like this (untested):

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3f4cf930f809..71e5eebec31a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1511,7 +1511,7 @@ int tc_setup_cb_egdev_call(const struct net_device *dev,
        struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
 
        if (!egdev)
-               return 0;
+               return err_stop ? -EOPNOTSUPP : 0;
        return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
 }
 EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);

But the correct fix is to remove egdev crutch completely IMO.

^ permalink raw reply related

* Re: [PATCH net] net: sched: cls: Fix offloading when ingress dev is vxlan
From: David Miller @ 2018-06-05 19:06 UTC (permalink / raw)
  To: kubakici
  Cc: paulb, jiri, xiyou.wangcong, jhs, netdev, kliteyn, roid, shahark,
	markb, ogerlitz
In-Reply-To: <20180605115747.0e939ac4@cakuba.netronome.com>

From: Jakub Kicinski <kubakici@wp.pl>
Date: Tue, 5 Jun 2018 11:57:47 -0700

> Do we still care about correctness and not breaking backward
> compatibility?

Jakub let me know if you want me to revert this change.

^ permalink raw reply

* Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
From: Bjorn Helgaas @ 2018-06-05 19:11 UTC (permalink / raw)
  To: Ryankao
  Cc: Kai Heng Feng, jrg.otte@gmail.com, David Miller, Hayes Wang,
	hkallweit1@gmail.com, romieu@fr.zoreil.com, Linux Netdev List,
	Linux Kernel Mailing List, Hau, linux-pci
In-Reply-To: <20180605172805.GD30381@bhelgaas-glaptop.roam.corp.google.com>

[+cc linux-pci]

On Tue, Jun 05, 2018 at 12:28:05PM -0500, Bjorn Helgaas wrote:
> On Tue, Jun 05, 2018 at 06:34:09AM +0000, Ryankao wrote:
> > Add realtek folk Hau
> > 
> > -----Original Message-----
> > From: Kai Heng Feng [mailto:kai.heng.feng@canonical.com] 
> > Sent: Tuesday, June 05, 2018 1:02 PM
> > To: jrg.otte@gmail.com
> > Cc: David Miller <davem@davemloft.net>; Hayes Wang <hayeswang@realtek.com>; hkallweit1@gmail.com; romieu@fr.zoreil.com; Linux Netdev List <netdev@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Ryankao <ryankao@realtek.com>
> > Subject: Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
> > 
> > Hi Jörg Otte,
> > 
> > Can you give this patch a try?
> > 
> > Since you are the only one that reported ALDPS/ASPM regression,
> > 
> > And I think this patch should solve the issue you had [1].
> > 
> > Hopefully we don't need to go down the rabbit hole of blacklist/whitelist...
> > 
> > Kai-Heng
> > 
> > [1] https://lkml.org/lkml/2013/1/5/36
> 
> I have no idea what ALDPS is.  It's not mentioned in the PCIe spec, so
> presumably it's some Realtek-specific thing.  ASPM is a generic PCIe
> thing.  Changes to these two things should be in separate patches so
> they don't get tangled up.
> 
> > > On Jun 5, 2018, at 12:58 PM, Kai-Heng Feng 
> > > <kai.heng.feng@canonical.com>
> > > wrote:
> > >
> > > This patch reinstate ALDPS and ASPM support on r8169.
> > >
> > > On some Intel platforms, ASPM support on r8169 is the key factor to 
> > > let Package C-State achieve PC8. Without ASPM support, the deepest 
> > > Package C-State can hit is PC3. PC8 can save additional ~3W in 
> > > comparison with PC3.
> > >
> > > This patch is from Realtek.
> > >
> > > Fixes: e0c075577965 ("r8169: enable ALDPS for power saving")
> > > Fixes: d64ec841517a ("r8169: enable internal ASPM and clock request
> > > settings")
> 
> > > +3507,15 @@ static void rtl8168e_1_hw_phy_config(struct 
> > > rtl8169_private *tp)
> > >  	rtl_writephy(tp, 0x0d, 0x4007);
> > >  	rtl_writephy(tp, 0x0e, 0x0000);
> > >  	rtl_writephy(tp, 0x0d, 0x0000);
> > > +
> > > +	/* Check ALDPS bit, disable it if enabled */
> > > +	rtl_writephy(tp, 0x1f, 0x0000);
> > > +	if (enable_aldps)
> > > +		rtl_w0w1_phy(tp, 0x15, 0x1000, 0x0000);
> > > +	else if (rtl_readphy(tp, 0x15) & 0x1000)
> > > +		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x1000);
> 
> There's a lot of repetition of this code with minor variations.  You
> could probably factor it out and make it more concise and more
> readable.
> 
> > > +static void rtl8169_check_link_status(struct net_device *dev,
> > > +				      struct rtl8169_private *tp) {
> > > +	struct device *d = tp_to_dev(tp);
> > > +
> > > +	if (tp->link_ok(tp)) {
> > > +		rtl_link_chg_patch(tp);
> > > +		/* This is to cancel a scheduled suspend if there's one. */
> > > +		if (pm_request_resume(d))
> > > +			_rtl_reset_work(tp);
> > > +		netif_carrier_on(dev);
> > > +		if (net_ratelimit())
> > > +			netif_info(tp, ifup, dev, "link up\n");
> > > +	} else {
> > > +		netif_carrier_off(dev);
> > > +		netif_info(tp, ifdown, dev, "link down\n");
> > > +		pm_runtime_idle(d);
> > > +	}
> > > +}
> 
> This function apparently just got moved around without changing
> anything.  That's fine, but the move should be in a separate patch to
> make the real changes easier to review.
> 
> > > @@ -7649,8 +7757,12 @@ static int rtl_init_one(struct pci_dev *pdev, 
> > > const struct pci_device_id *ent)
> > >
> > >  	/* disable ASPM completely as that cause random device stop working
> > >  	 * problems as well as full system hangs for some PCIe devices users */
> > > -	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
> > > -				     PCIE_LINK_STATE_CLKPM);
> > > +	if (!enable_aspm) {
> > > +		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
> > > +					     PCIE_LINK_STATE_L1 |
> > > +					     PCIE_LINK_STATE_CLKPM);
> > > +		netif_info(tp, probe, dev, "ASPM disabled\n");
> > > +	}
> 
> ASPM is a generic PCIe feature that should be configured by the PCI
> core without any help from the device driver.
> 
> If code in the driver is needed, that means either the PCI core is
> doing it wrong and we should fix it there, or the device is broken and
> the driver is working around the erratum.
> 
> If this is an erratum, you should include details about exactly what's
> broken and (ideally) a URL to the published erratum.  Otherwise this
> is just unmaintainable black magic and likely to be broken by future
> ASPM changes in the PCI core.
> 
> ASPM configuration is done by the PCI core before drivers are bound to
> the device.  If you need device-specific workarounds, they should
> probably be in quirks so they're done before the core does that ASPM
> configuration.
> 
> > >  	/* enable device (incl. PCI PM wakeup and hotplug setup) */
> > >  	rc = pcim_enable_device(pdev);
> > > --
> > > 2.17.0
> > 
> > ------Please consider the environment before printing this e-mail.

^ permalink raw reply

* Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
From: Heiner Kallweit @ 2018-06-05 19:13 UTC (permalink / raw)
  To: Kai-Heng Feng, davem; +Cc: hayeswang, romieu, netdev, linux-kernel, Ryankao
In-Reply-To: <20180605045812.17977-1-kai.heng.feng@canonical.com>

On 05.06.2018 06:58, Kai-Heng Feng wrote:
> This patch reinstate ALDPS and ASPM support on r8169.
> 
> On some Intel platforms, ASPM support on r8169 is the key factor to let
> Package C-State achieve PC8. Without ASPM support, the deepest Package
> C-State can hit is PC3. PC8 can save additional ~3W in comparison with
> PC3.
> 
> This patch is from Realtek.
> 
> Fixes: e0c075577965 ("r8169: enable ALDPS for power saving")
> Fixes: d64ec841517a ("r8169: enable internal ASPM and clock request settings")
> 
> Cc: Ryankao <ryankao@realtek.com>
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> ---
>  drivers/net/ethernet/realtek/r8169.c | 190 +++++++++++++++++++++------
>  1 file changed, 151 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
> index 75dfac0248f4..a28ef20be221 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -319,6 +319,8 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
>  
>  MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
>  
> +static int enable_aspm = 1;
> +static int enable_aldps = 1;
>  static int use_dac = -1;
>  static struct {
>  	u32 msg_enable;
> @@ -817,6 +819,10 @@ struct rtl8169_private {
>  
>  MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
>  MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
> +module_param(enable_aspm, int, 0);
> +MODULE_PARM_DESC(enable_aspm, "Enable ASPM");
> +module_param(enable_aldps, int, 0);
> +MODULE_PARM_DESC(enable_aldps, "Enable ALDPS");
>  module_param(use_dac, int, 0);
>  MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot.");
>  module_param_named(debug, debug.msg_enable, int, 0);
> @@ -1567,25 +1573,6 @@ static void rtl_link_chg_patch(struct rtl8169_private *tp)
>  	}
>  }
>  
> -static void rtl8169_check_link_status(struct net_device *dev,
> -				      struct rtl8169_private *tp)
> -{
> -	struct device *d = tp_to_dev(tp);
> -
> -	if (tp->link_ok(tp)) {
> -		rtl_link_chg_patch(tp);
> -		/* This is to cancel a scheduled suspend if there's one. */
> -		pm_request_resume(d);
> -		netif_carrier_on(dev);
> -		if (net_ratelimit())
> -			netif_info(tp, ifup, dev, "link up\n");
> -	} else {
> -		netif_carrier_off(dev);
> -		netif_info(tp, ifdown, dev, "link down\n");
> -		pm_runtime_idle(d);
> -	}
> -}
> -
>  #define WAKE_ANY (WAKE_PHY | WAKE_MAGIC | WAKE_UCAST | WAKE_BCAST | WAKE_MCAST)
>  
>  static u32 __rtl8169_get_wol(struct rtl8169_private *tp)
> @@ -3520,6 +3507,15 @@ static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp)
>  	rtl_writephy(tp, 0x0d, 0x4007);
>  	rtl_writephy(tp, 0x0e, 0x0000);
>  	rtl_writephy(tp, 0x0d, 0x0000);
> +
> +	/* Check ALDPS bit, disable it if enabled */
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x15, 0x1000, 0x0000);
> +	else if (rtl_readphy(tp, 0x15) & 0x1000)
> +		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x1000);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);

Few remarks:
- The comment isn't applicable any longer.
- The second rtl_writephy(tp, 0x1f, 0x0000) isn't needed because you don't
  switch the page in between.
- The code is a little hard to read, instead you could use the following
  and create a helper, ideally with register and bit number as
  parameters so that you can use it for all affected chip types.

val = rtl_readphy(tp, 0x15);
val &= ~BIT(12);
if (enable_aldps)
	val |= BIT(12);
rtl_writephy(tp, 0x15, val);

>  }
>  
>  static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr)
> @@ -3627,6 +3623,15 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
>  
>  	/* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
>  	rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
> +
> +	/* Check ALDPS bit, disable it if enabled */
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x15, 0x1000, 0x0000);
> +	else if (rtl_readphy(tp, 0x15) & 0x1000)
> +		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x1000);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
>  }
>  
>  static void rtl8168f_hw_phy_config(struct rtl8169_private *tp)
> @@ -3649,6 +3654,15 @@ static void rtl8168f_hw_phy_config(struct rtl8169_private *tp)
>  	rtl_writephy(tp, 0x05, 0x8b86);
>  	rtl_w0w1_phy(tp, 0x06, 0x0001, 0x0000);
>  	rtl_writephy(tp, 0x1f, 0x0000);
> +
> +	/* Check ALDPS bit, disable it if enabled */
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x15, 0x1000, 0x0000);
> +	else if (rtl_readphy(tp, 0x15) & 0x1000)
> +		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x1000);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
>  }
>  
>  static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp)
> @@ -3865,7 +3879,9 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
>  
>  	/* Check ALDPS bit, disable it if enabled */
>  	rtl_writephy(tp, 0x1f, 0x0a43);
> -	if (rtl_readphy(tp, 0x10) & 0x0004)
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x10, 0x0004, 0x0000);
> +	else if (rtl_readphy(tp, 0x10) & 0x0004)
>  		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
>  
>  	rtl_writephy(tp, 0x1f, 0x0000);
> @@ -3874,6 +3890,14 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
>  static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp)
>  {
>  	rtl_apply_firmware(tp);
> +
> +	rtl_writephy(tp, 0x1f, 0x0a43);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x10, 0x0004, 0x0000);
> +	else if (rtl_readphy(tp, 0x10) & 0x0004)
> +		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
>  }
>  
>  static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
> @@ -3980,7 +4004,9 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
>  
>  	/* Check ALDPS bit, disable it if enabled */
>  	rtl_writephy(tp, 0x1f, 0x0a43);
> -	if (rtl_readphy(tp, 0x10) & 0x0004)
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x10, 0x0004, 0x0000);
> +	else if (rtl_readphy(tp, 0x10) & 0x0004)
>  		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
>  
>  	rtl_writephy(tp, 0x1f, 0x0000);
> @@ -4053,7 +4079,9 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
>  
>  	/* Check ALDPS bit, disable it if enabled */
>  	rtl_writephy(tp, 0x1f, 0x0a43);
> -	if (rtl_readphy(tp, 0x10) & 0x0004)
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x10, 0x0004, 0x0000);
> +	else if (rtl_readphy(tp, 0x10) & 0x0004)
>  		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
>  
>  	rtl_writephy(tp, 0x1f, 0x0000);
> @@ -4095,7 +4123,9 @@ static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp)
>  
>  	/* Check ALDPS bit, disable it if enabled */
>  	rtl_writephy(tp, 0x1f, 0x0a43);
> -	if (rtl_readphy(tp, 0x10) & 0x0004)
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x10, 0x0004, 0x0000);
> +	else if (rtl_readphy(tp, 0x10) & 0x0004)
>  		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
>  
>  	rtl_writephy(tp, 0x1f, 0x0000);
> @@ -4186,7 +4216,9 @@ static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp)
>  
>  	/* Check ALDPS bit, disable it if enabled */
>  	rtl_writephy(tp, 0x1f, 0x0a43);
> -	if (rtl_readphy(tp, 0x10) & 0x0004)
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x10, 0x0004, 0x0000);
> +	else if (rtl_readphy(tp, 0x10) & 0x0004)
>  		rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0004);
>  
>  	rtl_writephy(tp, 0x1f, 0x0000);
> @@ -4233,6 +4265,15 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
>  	rtl_apply_firmware(tp);
>  
>  	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
> +
> +	/* Check ALDPS bit, disable it if enabled */
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x18, 0x1000, 0x0000);
> +	else if (rtl_readphy(tp, 0x18) & 0x1000)
> +		rtl_w0w1_phy(tp, 0x18, 0x0000, 0x1000);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
>  }
>  
>  static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
> @@ -4250,6 +4291,15 @@ static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
>  	rtl_writephy(tp, 0x10, 0x401f);
>  	rtl_writephy(tp, 0x19, 0x7030);
>  	rtl_writephy(tp, 0x1f, 0x0000);
> +
> +	/* Check ALDPS bit, disable it if enabled */
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x18, 0x1000, 0x0000);
> +	else if (rtl_readphy(tp, 0x18) & 0x1000)
> +		rtl_w0w1_phy(tp, 0x18, 0x0000, 0x1000);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
>  }
>  
>  static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
> @@ -4272,6 +4322,15 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
>  	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
>  
>  	rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC);
> +
> +	/* Check ALDPS bit, disable it if enabled */
> +	rtl_writephy(tp, 0x1f, 0x0000);
> +	if (enable_aldps)
> +		rtl_w0w1_phy(tp, 0x18, 0x1000, 0x0000);
> +	else if (rtl_readphy(tp, 0x18) & 0x1000)
> +		rtl_w0w1_phy(tp, 0x18, 0x0000, 0x1000);
> +
> +	rtl_writephy(tp, 0x1f, 0x0000);
>  }
>  
>  static void rtl_hw_phy_config(struct net_device *dev)
> @@ -5290,6 +5349,18 @@ static void rtl_pcie_state_l2l3_enable(struct rtl8169_private *tp, bool enable)
>  	RTL_W8(tp, Config3, data);
>  }
>  
> +static void rtl_hw_internal_aspm_clkreq_enable(struct rtl8169_private *tp,
> +					       bool enable)
> +{
> +	if (enable) {
> +		RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn);
> +		RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en);
> +	} else {
> +		RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> +		RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	}
> +}
> +
>  static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
>  {
>  	RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
> @@ -5646,9 +5717,10 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp)
>  	rtl_hw_start_8168g(tp);
>  
>  	/* disable aspm and clock request before access ephy */
> -	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> -	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	rtl_hw_internal_aspm_clkreq_enable(tp, false);
>  	rtl_ephy_init(tp, e_info_8168g_1, ARRAY_SIZE(e_info_8168g_1));
> +	if (enable_aspm)
> +		rtl_hw_internal_aspm_clkreq_enable(tp, true);
>  }
>  
>  static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
> @@ -5681,9 +5753,10 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
>  	rtl_hw_start_8168g(tp);
>  
>  	/* disable aspm and clock request before access ephy */
> -	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> -	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	rtl_hw_internal_aspm_clkreq_enable(tp, false);
>  	rtl_ephy_init(tp, e_info_8411_2, ARRAY_SIZE(e_info_8411_2));
> +	if (enable_aspm)
> +		rtl_hw_internal_aspm_clkreq_enable(tp, true);
>  }
>  
>  static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
> @@ -5700,8 +5773,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
>  	};
>  
>  	/* disable aspm and clock request before access ephy */
> -	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> -	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	rtl_hw_internal_aspm_clkreq_enable(tp, false);
>  	rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1));
>  
>  	RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO);
> @@ -5780,6 +5852,9 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
>  	r8168_mac_ocp_write(tp, 0xe63e, 0x0000);
>  	r8168_mac_ocp_write(tp, 0xc094, 0x0000);
>  	r8168_mac_ocp_write(tp, 0xc09e, 0x0000);
> +
> +	if (enable_aspm)
> +		rtl_hw_internal_aspm_clkreq_enable(tp, true);
>  }
>  
>  static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
> @@ -5831,11 +5906,13 @@ static void rtl_hw_start_8168ep_1(struct rtl8169_private *tp)
>  	};
>  
>  	/* disable aspm and clock request before access ephy */
> -	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> -	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	rtl_hw_internal_aspm_clkreq_enable(tp, false);
>  	rtl_ephy_init(tp, e_info_8168ep_1, ARRAY_SIZE(e_info_8168ep_1));
>  
>  	rtl_hw_start_8168ep(tp);
> +
> +	if (enable_aspm)
> +		rtl_hw_internal_aspm_clkreq_enable(tp, true);
>  }
>  
>  static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
> @@ -5847,14 +5924,16 @@ static void rtl_hw_start_8168ep_2(struct rtl8169_private *tp)
>  	};
>  
>  	/* disable aspm and clock request before access ephy */
> -	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> -	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	rtl_hw_internal_aspm_clkreq_enable(tp, false);
>  	rtl_ephy_init(tp, e_info_8168ep_2, ARRAY_SIZE(e_info_8168ep_2));
>  
>  	rtl_hw_start_8168ep(tp);
>  
>  	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
>  	RTL_W8(tp, MISC_1, RTL_R8(tp, MISC_1) & ~PFM_D3COLD_EN);
> +
> +	if (enable_aspm)
> +		rtl_hw_internal_aspm_clkreq_enable(tp, true);
>  }
>  
>  static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
> @@ -5868,8 +5947,7 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
>  	};
>  
>  	/* disable aspm and clock request before access ephy */
> -	RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~ClkReqEn);
> -	RTL_W8(tp, Config5, RTL_R8(tp, Config5) & ~ASPM_en);
> +	rtl_hw_internal_aspm_clkreq_enable(tp, false);
>  	rtl_ephy_init(tp, e_info_8168ep_3, ARRAY_SIZE(e_info_8168ep_3));
>  
>  	rtl_hw_start_8168ep(tp);
> @@ -5889,6 +5967,9 @@ static void rtl_hw_start_8168ep_3(struct rtl8169_private *tp)
>  	data = r8168_mac_ocp_read(tp, 0xe860);
>  	data |= 0x0080;
>  	r8168_mac_ocp_write(tp, 0xe860, data);
> +
> +	if (enable_aspm)
> +		rtl_hw_internal_aspm_clkreq_enable(tp, true);
>  }
>  
>  static void rtl_hw_start_8168(struct rtl8169_private *tp)
> @@ -6364,7 +6445,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
>  	tp->cur_tx = tp->dirty_tx = 0;
>  }
>  
> -static void rtl_reset_work(struct rtl8169_private *tp)
> +static void _rtl_reset_work(struct rtl8169_private *tp)
>  {
>  	struct net_device *dev = tp->dev;
>  	int i;
> @@ -6384,6 +6465,33 @@ static void rtl_reset_work(struct rtl8169_private *tp)
>  	napi_enable(&tp->napi);
>  	rtl_hw_start(tp);
>  	netif_wake_queue(dev);
> +}
> +
> +static void rtl8169_check_link_status(struct net_device *dev,
> +				      struct rtl8169_private *tp)
> +{
> +	struct device *d = tp_to_dev(tp);
> +
> +	if (tp->link_ok(tp)) {
> +		rtl_link_chg_patch(tp);
> +		/* This is to cancel a scheduled suspend if there's one. */
> +		if (pm_request_resume(d))
> +			_rtl_reset_work(tp);

This reset was added, what is it good for and how is it related to
ASPM/ALDPS? It looks a little bogus, especially considering that
pm_request_resume() can return also positive values.

> +		netif_carrier_on(dev);
> +		if (net_ratelimit())
> +			netif_info(tp, ifup, dev, "link up\n");
> +	} else {
> +		netif_carrier_off(dev);
> +		netif_info(tp, ifdown, dev, "link down\n");
> +		pm_runtime_idle(d);
> +	}
> +}
> +
> +static void rtl_reset_work(struct rtl8169_private *tp)
> +{
> +	struct net_device *dev = tp->dev;
> +
> +	_rtl_reset_work(tp);
>  	rtl8169_check_link_status(dev, tp);
>  }
>  
> @@ -7649,8 +7757,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	/* disable ASPM completely as that cause random device stop working
>  	 * problems as well as full system hangs for some PCIe devices users */
> -	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
> -				     PCIE_LINK_STATE_CLKPM);
> +	if (!enable_aspm) {
> +		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
> +					     PCIE_LINK_STATE_L1 |
> +					     PCIE_LINK_STATE_CLKPM);
> +		netif_info(tp, probe, dev, "ASPM disabled\n");

You should use dev_info() here because the net_device isn't registered yet.

> +	}
>  
>  	/* enable device (incl. PCI PM wakeup and hotplug setup) */
>  	rc = pcim_enable_device(pdev);
> 

^ permalink raw reply

* Re: general protection fault in sockfs_setattr
From: Cong Wang @ 2018-06-05 19:14 UTC (permalink / raw)
  To: shankarapailoor
  Cc: David Miller, LKML, syzkaller, Linux Kernel Network Developers
In-Reply-To: <CAB+yDabFuBpT5UU1Hy0s4kY5UKJzA84=6fNieNcdTjjZNq5SHQ@mail.gmail.com>

On Mon, Jun 4, 2018 at 9:53 PM, shankarapailoor
<shankarapailoor@gmail.com> wrote:
> Hi,
>
> I have been fuzzing Linux 4.17-rc7 with Syzkaller and found the
> following crash: https://pastebin.com/ixX3RB9j
>
> Syzkaller isolated the cause of the bug to the following program:
>
> socketpair$unix(0x1, 0x1, 0x0,
> &(0x7f0000000000)={<r0=>0xffffffffffffffff, <r1=>0xffffffffffffffff})
> getresuid(&(0x7f0000000080)=<r2=>0x0, &(0x7f00000000c0),
> &(0x7f0000000700))r3 = getegid()
> fchownat(r0, &(0x7f0000000040)='\x00', r2, r3, 0x1000)
> dup3(r1, r0, 0x80000)
>
>
> The problematic area appears to be here:
>
> static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
> {
>     int err = simple_setattr(dentry, iattr);
>
>     if (!err && (iattr->ia_valid & ATTR_UID)) {
>          struct socket *sock = SOCKET_I(d_inode(dentry));
>
>          sock->sk->sk_uid = iattr->ia_uid; //KASAN GPF
>     }
>     return err;
> }
>
> If dup3 is called concurrently with fchownat then can sock->sk be NULL?

Although dup3() implies a close(), fd is refcnt'ted, if dup3() runs
concurrently with fchownat() it should not be closed until whoever
the last closes it.

Or maybe fchownat() doesn't even hold refcnt of fd, since it aims
to change the file backed.


Not sure if the following is sufficient, inode might need to be protected
with some lock...

diff --git a/net/socket.c b/net/socket.c
index f10f1d947c78..6294b4b3132e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -537,7 +537,10 @@ static int sockfs_setattr(struct dentry *dentry,
struct iattr *iattr)
        if (!err && (iattr->ia_valid & ATTR_UID)) {
                struct socket *sock = SOCKET_I(d_inode(dentry));

-               sock->sk->sk_uid = iattr->ia_uid;
+               if (sock->sk)
+                       sock->sk->sk_uid = iattr->ia_uid;
+               else
+                       err = -ENOENT;
        }

        return err;

^ permalink raw reply related

* Re: suspicius csum initialization in vmxnet3_rx_csum
From: Ronak Doshi @ 2018-06-05 19:15 UTC (permalink / raw)
  To: Paolo Abeni; +Cc: Neil Horman, Guolin Yang, Boon Ang, Louis Luo, netdev
In-Reply-To: <9296894c5f1ffb300ec18d7902751f68d914615b.camel@redhat.com>



On Tue, 5 Jun 2018, Paolo Abeni wrote:

> Hi,
> 
> I'm sorry for the long delay in my answer, I've been travelling.
> 
> On Fri, 2018-06-01 at 11:10 -0700, Ronak Doshi wrote:
> > On Thu, 31 May 2018, Neil Horman wrote:
> > > What packet types will rcd.csum be set for?
> > > Neil 
> > 
> > I looked thorugh the emulation code and found that rcd.csum is not set. 
> > For valid v4/v6, TCP/UDP packets the code block above the mentioend "if" 
> > block will be executed or else it will go through checksum none.
> > 
> > That's why I wanted to know (in previous emails) which ESX build is being 
> > used while this was tested. The code block under "if (gdesc->rcd.csum)" 
> > block might seem incorrect but it shouldn't be hit as rcd.csum is not set. 
> 
> I'm unsure if I read the above correctly. Do you mean that the relevant
> code-path is never hit? If so, can we simply drop it, as we agreed that
> such code is uncorrect? Elsewhere, could you plese specify under which
> circumstances gdesc->rcd.csum is filled by the hypervisor?
>
I do not see hypervisor populating rcd.csum field or may be the code has 
changed over the years. So, the codepath should not be hit as it is not 
populated. I will check and fix it or remove the block if not required. 
But as far as your issue is concerned, that code block is not hit.
 
Thanks,
Ronak

^ permalink raw reply

* Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
From: Heiner Kallweit @ 2018-06-05 19:17 UTC (permalink / raw)
  To: Bjorn Helgaas, Ryankao
  Cc: Kai Heng Feng, jrg.otte@gmail.com, David Miller, Hayes Wang,
	romieu@fr.zoreil.com, Linux Netdev List,
	Linux Kernel Mailing List, Hau, linux-pci
In-Reply-To: <20180605191142.GA214338@bhelgaas-glaptop.roam.corp.google.com>

On 05.06.2018 21:11, Bjorn Helgaas wrote:
> [+cc linux-pci]
> 
> On Tue, Jun 05, 2018 at 12:28:05PM -0500, Bjorn Helgaas wrote:
>> On Tue, Jun 05, 2018 at 06:34:09AM +0000, Ryankao wrote:
>>> Add realtek folk Hau
>>>
>>> -----Original Message-----
>>> From: Kai Heng Feng [mailto:kai.heng.feng@canonical.com] 
>>> Sent: Tuesday, June 05, 2018 1:02 PM
>>> To: jrg.otte@gmail.com
>>> Cc: David Miller <davem@davemloft.net>; Hayes Wang <hayeswang@realtek.com>; hkallweit1@gmail.com; romieu@fr.zoreil.com; Linux Netdev List <netdev@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Ryankao <ryankao@realtek.com>
>>> Subject: Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
>>>
>>> Hi Jörg Otte,
>>>
>>> Can you give this patch a try?
>>>
>>> Since you are the only one that reported ALDPS/ASPM regression,
>>>
>>> And I think this patch should solve the issue you had [1].
>>>
>>> Hopefully we don't need to go down the rabbit hole of blacklist/whitelist...
>>>
>>> Kai-Heng
>>>
>>> [1] https://lkml.org/lkml/2013/1/5/36
>>
>> I have no idea what ALDPS is.  It's not mentioned in the PCIe spec, so
>> presumably it's some Realtek-specific thing.  ASPM is a generic PCIe
>> thing.  Changes to these two things should be in separate patches so
>> they don't get tangled up.
>>
ALDPS = Advanced Link Down Power Saving
And yes, it's a Realtek feature.

>>>> On Jun 5, 2018, at 12:58 PM, Kai-Heng Feng 
>>>> <kai.heng.feng@canonical.com>
>>>> wrote:
>>>>
>>>> This patch reinstate ALDPS and ASPM support on r8169.
>>>>
>>>> On some Intel platforms, ASPM support on r8169 is the key factor to 
>>>> let Package C-State achieve PC8. Without ASPM support, the deepest 
>>>> Package C-State can hit is PC3. PC8 can save additional ~3W in 
>>>> comparison with PC3.
>>>>
>>>> This patch is from Realtek.
>>>>
>>>> Fixes: e0c075577965 ("r8169: enable ALDPS for power saving")
>>>> Fixes: d64ec841517a ("r8169: enable internal ASPM and clock request
>>>> settings")
>>
>>>> +3507,15 @@ static void rtl8168e_1_hw_phy_config(struct 
>>>> rtl8169_private *tp)
>>>>  	rtl_writephy(tp, 0x0d, 0x4007);
>>>>  	rtl_writephy(tp, 0x0e, 0x0000);
>>>>  	rtl_writephy(tp, 0x0d, 0x0000);
>>>> +
>>>> +	/* Check ALDPS bit, disable it if enabled */
>>>> +	rtl_writephy(tp, 0x1f, 0x0000);
>>>> +	if (enable_aldps)
>>>> +		rtl_w0w1_phy(tp, 0x15, 0x1000, 0x0000);
>>>> +	else if (rtl_readphy(tp, 0x15) & 0x1000)
>>>> +		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x1000);
>>
>> There's a lot of repetition of this code with minor variations.  You
>> could probably factor it out and make it more concise and more
>> readable.
>>
>>>> +static void rtl8169_check_link_status(struct net_device *dev,
>>>> +				      struct rtl8169_private *tp) {
>>>> +	struct device *d = tp_to_dev(tp);
>>>> +
>>>> +	if (tp->link_ok(tp)) {
>>>> +		rtl_link_chg_patch(tp);
>>>> +		/* This is to cancel a scheduled suspend if there's one. */
>>>> +		if (pm_request_resume(d))
>>>> +			_rtl_reset_work(tp);
>>>> +		netif_carrier_on(dev);
>>>> +		if (net_ratelimit())
>>>> +			netif_info(tp, ifup, dev, "link up\n");
>>>> +	} else {
>>>> +		netif_carrier_off(dev);
>>>> +		netif_info(tp, ifdown, dev, "link down\n");
>>>> +		pm_runtime_idle(d);
>>>> +	}
>>>> +}
>>
>> This function apparently just got moved around without changing
>> anything.  That's fine, but the move should be in a separate patch to
>> make the real changes easier to review.
>>
>>>> @@ -7649,8 +7757,12 @@ static int rtl_init_one(struct pci_dev *pdev, 
>>>> const struct pci_device_id *ent)
>>>>
>>>>  	/* disable ASPM completely as that cause random device stop working
>>>>  	 * problems as well as full system hangs for some PCIe devices users */
>>>> -	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
>>>> -				     PCIE_LINK_STATE_CLKPM);
>>>> +	if (!enable_aspm) {
>>>> +		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
>>>> +					     PCIE_LINK_STATE_L1 |
>>>> +					     PCIE_LINK_STATE_CLKPM);
>>>> +		netif_info(tp, probe, dev, "ASPM disabled\n");
>>>> +	}
>>
>> ASPM is a generic PCIe feature that should be configured by the PCI
>> core without any help from the device driver.
>>
>> If code in the driver is needed, that means either the PCI core is
>> doing it wrong and we should fix it there, or the device is broken and
>> the driver is working around the erratum.
>>
>> If this is an erratum, you should include details about exactly what's
>> broken and (ideally) a URL to the published erratum.  Otherwise this
>> is just unmaintainable black magic and likely to be broken by future
>> ASPM changes in the PCI core.
>>
>> ASPM configuration is done by the PCI core before drivers are bound to
>> the device.  If you need device-specific workarounds, they should
>> probably be in quirks so they're done before the core does that ASPM
>> configuration.
>>
>>>>  	/* enable device (incl. PCI PM wakeup and hotplug setup) */
>>>>  	rc = pcim_enable_device(pdev);
>>>> --
>>>> 2.17.0
>>>
>>> ------Please consider the environment before printing this e-mail.
> 

^ permalink raw reply

* Re: pull-request: bpf-next 2018-06-05
From: David Miller @ 2018-06-05 19:22 UTC (permalink / raw)
  To: daniel; +Cc: ast, netdev
In-Reply-To: <20180605163916.2922-1-daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue,  5 Jun 2018 18:39:16 +0200

> The following pull-request contains BPF updates for your *net-next* tree.
> 
> The main changes are:
 ...
> Please consider pulling these changes from:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git

I've pulled this in and will push back out after some build testing.

Thanks!

^ permalink raw reply

* Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
From: Heiner Kallweit @ 2018-06-05 19:24 UTC (permalink / raw)
  To: Bjorn Helgaas, Ryankao
  Cc: Kai Heng Feng, jrg.otte@gmail.com, David Miller, Hayes Wang,
	romieu@fr.zoreil.com, Linux Netdev List,
	Linux Kernel Mailing List, Hau
In-Reply-To: <20180605172805.GD30381@bhelgaas-glaptop.roam.corp.google.com>

On 05.06.2018 19:28, Bjorn Helgaas wrote:
> On Tue, Jun 05, 2018 at 06:34:09AM +0000, Ryankao wrote:
>> Add realtek folk Hau
>>
>> -----Original Message-----
>> From: Kai Heng Feng [mailto:kai.heng.feng@canonical.com] 
>> Sent: Tuesday, June 05, 2018 1:02 PM
>> To: jrg.otte@gmail.com
>> Cc: David Miller <davem@davemloft.net>; Hayes Wang <hayeswang@realtek.com>; hkallweit1@gmail.com; romieu@fr.zoreil.com; Linux Netdev List <netdev@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Ryankao <ryankao@realtek.com>
>> Subject: Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
>>
>> Hi Jörg Otte,
>>
>> Can you give this patch a try?
>>
>> Since you are the only one that reported ALDPS/ASPM regression,
>>
>> And I think this patch should solve the issue you had [1].
>>
>> Hopefully we don't need to go down the rabbit hole of blacklist/whitelist...
>>
>> Kai-Heng
>>
>> [1] https://lkml.org/lkml/2013/1/5/36
> 
> I have no idea what ALDPS is.  It's not mentioned in the PCIe spec, so
> presumably it's some Realtek-specific thing.  ASPM is a generic PCIe
> thing.  Changes to these two things should be in separate patches so
> they don't get tangled up.
> 
>>> On Jun 5, 2018, at 12:58 PM, Kai-Heng Feng 
>>> <kai.heng.feng@canonical.com>
>>> wrote:
>>>
>>> This patch reinstate ALDPS and ASPM support on r8169.
>>>
>>> On some Intel platforms, ASPM support on r8169 is the key factor to 
>>> let Package C-State achieve PC8. Without ASPM support, the deepest 
>>> Package C-State can hit is PC3. PC8 can save additional ~3W in 
>>> comparison with PC3.
>>>
>>> This patch is from Realtek.
>>>
>>> Fixes: e0c075577965 ("r8169: enable ALDPS for power saving")
>>> Fixes: d64ec841517a ("r8169: enable internal ASPM and clock request
>>> settings")
> 
>>> +3507,15 @@ static void rtl8168e_1_hw_phy_config(struct 
>>> rtl8169_private *tp)
>>>  	rtl_writephy(tp, 0x0d, 0x4007);
>>>  	rtl_writephy(tp, 0x0e, 0x0000);
>>>  	rtl_writephy(tp, 0x0d, 0x0000);
>>> +
>>> +	/* Check ALDPS bit, disable it if enabled */
>>> +	rtl_writephy(tp, 0x1f, 0x0000);
>>> +	if (enable_aldps)
>>> +		rtl_w0w1_phy(tp, 0x15, 0x1000, 0x0000);
>>> +	else if (rtl_readphy(tp, 0x15) & 0x1000)
>>> +		rtl_w0w1_phy(tp, 0x15, 0x0000, 0x1000);
> 
> There's a lot of repetition of this code with minor variations.  You
> could probably factor it out and make it more concise and more
> readable.
> 
>>> +static void rtl8169_check_link_status(struct net_device *dev,
>>> +				      struct rtl8169_private *tp) {
>>> +	struct device *d = tp_to_dev(tp);
>>> +
>>> +	if (tp->link_ok(tp)) {
>>> +		rtl_link_chg_patch(tp);
>>> +		/* This is to cancel a scheduled suspend if there's one. */
>>> +		if (pm_request_resume(d))
>>> +			_rtl_reset_work(tp);
>>> +		netif_carrier_on(dev);
>>> +		if (net_ratelimit())
>>> +			netif_info(tp, ifup, dev, "link up\n");
>>> +	} else {
>>> +		netif_carrier_off(dev);
>>> +		netif_info(tp, ifdown, dev, "link down\n");
>>> +		pm_runtime_idle(d);
>>> +	}
>>> +}
> 
> This function apparently just got moved around without changing
> anything.  That's fine, but the move should be in a separate patch to
> make the real changes easier to review.
> 
>>> @@ -7649,8 +7757,12 @@ static int rtl_init_one(struct pci_dev *pdev, 
>>> const struct pci_device_id *ent)
>>>
>>>  	/* disable ASPM completely as that cause random device stop working
>>>  	 * problems as well as full system hangs for some PCIe devices users */
>>> -	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
>>> -				     PCIE_LINK_STATE_CLKPM);
>>> +	if (!enable_aspm) {
>>> +		pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S |
>>> +					     PCIE_LINK_STATE_L1 |
>>> +					     PCIE_LINK_STATE_CLKPM);
>>> +		netif_info(tp, probe, dev, "ASPM disabled\n");
>>> +	}
> 
> ASPM is a generic PCIe feature that should be configured by the PCI
> core without any help from the device driver.
> 
> If code in the driver is needed, that means either the PCI core is
> doing it wrong and we should fix it there, or the device is broken and
> the driver is working around the erratum.
> 
> If this is an erratum, you should include details about exactly what's
> broken and (ideally) a URL to the published erratum.  Otherwise this
> is just unmaintainable black magic and likely to be broken by future
> ASPM changes in the PCI core.
> 
Fully agree, but: There are no publicly available datasheets and only
source for such magic is the r8168 vendor driver and trial&error.

In addition the driver supports ~ 50 chip variants and not all variants
(unfortunately nobody except Realtek knows which) are affected by the
problem. Maybe the involved Realtek guys can shed some light on this.

> ASPM configuration is done by the PCI core before drivers are bound to
> the device.  If you need device-specific workarounds, they should
> probably be in quirks so they're done before the core does that ASPM
> configuration.
> 
>>>  	/* enable device (incl. PCI PM wakeup and hotplug setup) */
>>>  	rc = pcim_enable_device(pdev);
>>> --
>>> 2.17.0
>>
>> ------Please consider the environment before printing this e-mail.
> 

^ permalink raw reply

* Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
From: Florian Fainelli @ 2018-06-05 19:27 UTC (permalink / raw)
  To: Heiner Kallweit, Bjorn Helgaas, Ryankao
  Cc: Kai Heng Feng, jrg.otte@gmail.com, David Miller, Hayes Wang,
	romieu@fr.zoreil.com, Linux Netdev List,
	Linux Kernel Mailing List, Hau, linux-pci
In-Reply-To: <6c6b579a-5be4-ccc0-f64c-9998e51145f5@gmail.com>

On 06/05/2018 12:17 PM, Heiner Kallweit wrote:
> On 05.06.2018 21:11, Bjorn Helgaas wrote:
>> [+cc linux-pci]
>>
>> On Tue, Jun 05, 2018 at 12:28:05PM -0500, Bjorn Helgaas wrote:
>>> On Tue, Jun 05, 2018 at 06:34:09AM +0000, Ryankao wrote:
>>>> Add realtek folk Hau
>>>>
>>>> -----Original Message-----
>>>> From: Kai Heng Feng [mailto:kai.heng.feng@canonical.com] 
>>>> Sent: Tuesday, June 05, 2018 1:02 PM
>>>> To: jrg.otte@gmail.com
>>>> Cc: David Miller <davem@davemloft.net>; Hayes Wang <hayeswang@realtek.com>; hkallweit1@gmail.com; romieu@fr.zoreil.com; Linux Netdev List <netdev@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Ryankao <ryankao@realtek.com>
>>>> Subject: Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
>>>>
>>>> Hi Jörg Otte,
>>>>
>>>> Can you give this patch a try?
>>>>
>>>> Since you are the only one that reported ALDPS/ASPM regression,
>>>>
>>>> And I think this patch should solve the issue you had [1].
>>>>
>>>> Hopefully we don't need to go down the rabbit hole of blacklist/whitelist...
>>>>
>>>> Kai-Heng
>>>>
>>>> [1] https://lkml.org/lkml/2013/1/5/36
>>>
>>> I have no idea what ALDPS is.  It's not mentioned in the PCIe spec, so
>>> presumably it's some Realtek-specific thing.  ASPM is a generic PCIe
>>> thing.  Changes to these two things should be in separate patches so
>>> they don't get tangled up.
>>>
> ALDPS = Advanced Link Down Power Saving
> And yes, it's a Realtek feature.

Link as in Ethernet link or PCI(e) link? Sorry too lazy to let me google
that for myself :)
-- 
Florian

^ permalink raw reply

* Re: [PATCH net] failover: eliminate callback hell
From: Michael S. Tsirkin @ 2018-06-05 19:38 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: kys, haiyangz, davem, sridhar.samudrala, netdev,
	Stephen Hemminger
In-Reply-To: <20180605115305.502a7ebb@xeon-e3>

On Tue, Jun 05, 2018 at 11:53:05AM -0700, Stephen Hemminger wrote:
> > >   * Now, netvsc and net_failover use the same delayed work type
> > >     mechanism for setup. Previously, net_failover code was triggering off
> > >     name change but a similar policy was rejected for netvsc.
> > >     "what is good for the goose is good for the gander"  
> > 
> > I don't really understand what you are saying here.  I think the delayed
> > hack is kind of ugly and seems racy.  Current failover code was rejected
> > by whom?  Why is new one good and for whom?  Did you want to do a name
> > change in netvsc but it was rejected? Could you clarify please?
> 
> See:
>    https://patchwork.ozlabs.org/patch/851711/

Let me try to summarize that:

	You wanted to speed up the delayed link up.  You had an idea to
	additionally take link up when userspace renames the interface (standby
	one which is also the failover for netvsc).

	But userspace might not do any renames, in which case there will
	still be the delay, and so this never got applied.

	Is this a good summary?

Davem said delay should go away completely as it's not robust, and I
think I agree.  So I don't think we should make all failover users use
delay. IIUC failover kept a delay option especially for netvsc to
minimize the surprise factor. Hopefully we can come up with
something more robust and drop that option completely.


> > >   * Set permanent and current address of net_failover device
> > >     to match the primary.
> > > 
> > >   * Carrier should be marked off before registering device
> > >     the net_failover device.  
> > 
> > Are above two bugfixes?
> 
> Yes.

Maybe fix these two as first patches in the set?

> > > Although this patch needs to go into 4.18 (linux-net),  
> > 
> > I'd rather we focused on fixing bugs in 4.18, and left refactoring to
> > 4.19.
> >
> 
> Either we fix or revert the current code in 4.18.
> Sorry, I am not having callback hell code in any vendor or upstream kernel.

I agree callbacks add complexity which often isn't necessary, so
removing them where possible is a good cleanup.  But maybe a patch
shouldn't mix bugfixes, cleanups and behaviour changes all together.  If
nothing else it makes review harder.  Splitting patches up might make it
more likely they can go into 4.18 which seems to be what you want.

HTH,
-- 
MST

^ permalink raw reply

* Re: [PATCH net-next v2 0/2] net: phy: improve PHY suspend/resume
From: Heiner Kallweit @ 2018-06-05 19:39 UTC (permalink / raw)
  To: Andrew Lunn; +Cc: Florian Fainelli, David Miller, netdev@vger.kernel.org
In-Reply-To: <28a8846a-f7a7-3266-3aec-7f58fe8dac72@gmail.com>

On 02.06.2018 22:27, Heiner Kallweit wrote:
> On 01.06.2018 02:10, Andrew Lunn wrote:
>>> Configuring the different WoL options isn't handled by writing to
>>> the PHY registers but by writing to chip / MAC registers.
>>> Therefore phy_suspend() isn't able to figure out whether WoL is
>>> enabled or not. Only the parent has the full picture.
>>
>> Hi Heiner
>>
>> I think you need to look at your different runtime PM domains.  If i
>> understand the code right, you runtime suspend if there is no
>> link. But for this to work correctly, your PHY needs to keep working.
>> You also cannot assume all accesses to the PHY go via the MAC. Some
>> calls will go direct to the PHY, and they can trigger MDIO bus
>> accesses.  So i think you need two runtime PM domains. MAC and MDIO
>> bus.  Maybe just the pll? An MDIO bus is a device, so it can have its
>> on PM callbacks. It is not clear what you need to resume in order to
>> make MDIO work.
>>
> Thanks for your comments!
> The actual problem is quite small: I get an error at MDIO suspend,
> the PHY however is suspended later by the driver's suspend callback
> anyway. Because the problem is small I'm somewhat reluctant to
> consider bigger changes like introducing different PM domains.
> 
> Primary reason for the error is that the network chip is in PCI D3hot
> at that moment. In addition to that for some of the chips supported by
> the driver also MDIO-relevant PLL's might be disabled.
> 
> By the way:
> When checking PM handling for PHY/MDIO I stumbled across something
> that can be improved IMO, I'll send a patch for your review.
> 
I experimented a little and with adding Runtime PM to MDIO bus and
PHY device I can make it work:
PHY runtime-resumes before entering suspend and resumes its parent
(MDIO bus) which then resumes its parent (PCI device).
However this needs quite some code and is hard to read / understand
w/o reading through this mail thread.

And in general I still have doubts this is the right way. Let's
consider the following scenario:

A network driver configures WoL in its suspend callback
(incl. setting the device to wakeup-enabled).
The suspend callback of the PHY is called before this and therefore
has no clue that WoL will be configured a little later, with the
consequence that it will do an unsolicited power-down.
The network driver then has to detect this and power-up the PHY again.
This doesn't seem to make much sense and still my best idea is to
establish a mechanism that a device can state: I orchestrate PM
of my children.

Heiner

>> It might also help if you do the phy_connect in .ndo_open and
>> disconnect in .ndo_stop. This is a common pattern in drivers. But some
>> also do it is probe and remove.
>>
> Thanks for the hint. I will move phy_connect_direct accordingly.
> 
>>      Andrew
>>
> Heiner
> 

^ permalink raw reply

* [PATCH iproute2-next 1/1] tc: add json support in csum action
From: Keara Leibovitz @ 2018-06-05 19:30 UTC (permalink / raw)
  To: dsahern; +Cc: stephen, netdev, kernel, Keara Leibovitz

Signed-off-by: Keara Leibovitz <kleib@mojatatu.com>
---
 tc/m_csum.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tc/m_csum.c b/tc/m_csum.c
index 8391071d73f2..67481667d9d2 100644
--- a/tc/m_csum.c
+++ b/tc/m_csum.c
@@ -162,6 +162,7 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
 	char *uflag_5 = "";
 	char *uflag_6 = "";
 	char *uflag_7 = "";
+	char buf[64] = {0};
 
 	int uflag_count = 0;
 
@@ -198,12 +199,15 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
 		uflag_1 = "?empty";
 	}
 
-	fprintf(f, "csum (%s%s%s%s%s%s%s) ",
-		uflag_1, uflag_2, uflag_3,
-		uflag_4, uflag_5, uflag_6, uflag_7);
+	snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s",
+		 uflag_1, uflag_2, uflag_3,
+		 uflag_4, uflag_5, uflag_6, uflag_7);
+	print_string(PRINT_ANY, "csum", "csum (%s) ", buf);
+
 	print_action_control(f, "action ", sel->action, "\n");
-	fprintf(f, "\tindex %u ref %d bind %d", sel->index, sel->refcnt,
-		sel->bindcnt);
+	print_uint(PRINT_ANY, "index", "\tindex %u", sel->index);
+	print_int(PRINT_ANY, "ref", " ref %d", sel->refcnt);
+	print_int(PRINT_ANY, "bind", " bind %d", sel->bindcnt);
 
 	if (show_stats) {
 		if (tb[TCA_CSUM_TM]) {
@@ -212,7 +216,7 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
 			print_tm(f, tm);
 		}
 	}
-	fprintf(f, "\n");
+	print_string(PRINT_FP, NULL, "%s", "\n");
 
 	return 0;
 }
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
From: Heiner Kallweit @ 2018-06-05 19:41 UTC (permalink / raw)
  To: Florian Fainelli, Bjorn Helgaas, Ryankao
  Cc: Kai Heng Feng, jrg.otte@gmail.com, David Miller, Hayes Wang,
	romieu@fr.zoreil.com, Linux Netdev List,
	Linux Kernel Mailing List, Hau, linux-pci
In-Reply-To: <d50e04b3-87be-f300-b0ea-2da7e3660ebb@gmail.com>

On 05.06.2018 21:27, Florian Fainelli wrote:
> On 06/05/2018 12:17 PM, Heiner Kallweit wrote:
>> On 05.06.2018 21:11, Bjorn Helgaas wrote:
>>> [+cc linux-pci]
>>>
>>> On Tue, Jun 05, 2018 at 12:28:05PM -0500, Bjorn Helgaas wrote:
>>>> On Tue, Jun 05, 2018 at 06:34:09AM +0000, Ryankao wrote:
>>>>> Add realtek folk Hau
>>>>>
>>>>> -----Original Message-----
>>>>> From: Kai Heng Feng [mailto:kai.heng.feng@canonical.com] 
>>>>> Sent: Tuesday, June 05, 2018 1:02 PM
>>>>> To: jrg.otte@gmail.com
>>>>> Cc: David Miller <davem@davemloft.net>; Hayes Wang <hayeswang@realtek.com>; hkallweit1@gmail.com; romieu@fr.zoreil.com; Linux Netdev List <netdev@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Ryankao <ryankao@realtek.com>
>>>>> Subject: Re: [PATCH] r8169: Reinstate ALDPS and ASPM support
>>>>>
>>>>> Hi Jörg Otte,
>>>>>
>>>>> Can you give this patch a try?
>>>>>
>>>>> Since you are the only one that reported ALDPS/ASPM regression,
>>>>>
>>>>> And I think this patch should solve the issue you had [1].
>>>>>
>>>>> Hopefully we don't need to go down the rabbit hole of blacklist/whitelist...
>>>>>
>>>>> Kai-Heng
>>>>>
>>>>> [1] https://lkml.org/lkml/2013/1/5/36
>>>>
>>>> I have no idea what ALDPS is.  It's not mentioned in the PCIe spec, so
>>>> presumably it's some Realtek-specific thing.  ASPM is a generic PCIe
>>>> thing.  Changes to these two things should be in separate patches so
>>>> they don't get tangled up.
>>>>
>> ALDPS = Advanced Link Down Power Saving
>> And yes, it's a Realtek feature.
> 
> Link as in Ethernet link or PCI(e) link? Sorry too lazy to let me google
> that for myself :)
> 
Sorry .. Link here refers to the Ethernet PHY.

^ permalink raw reply

* Re: [PATCH iproute2-next 1/1] tc: add json support in csum action
From: David Ahern @ 2018-06-05 19:56 UTC (permalink / raw)
  To: Keara Leibovitz, dsahern; +Cc: stephen, netdev, kernel
In-Reply-To: <1528227039-21831-1-git-send-email-kleib@mojatatu.com>

On 6/5/18 12:30 PM, Keara Leibovitz wrote:

please add some words here. e.g., add example output

> Signed-off-by: Keara Leibovitz <kleib@mojatatu.com>
> ---
>  tc/m_csum.c | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/tc/m_csum.c b/tc/m_csum.c
> index 8391071d73f2..67481667d9d2 100644
> --- a/tc/m_csum.c
> +++ b/tc/m_csum.c
> @@ -162,6 +162,7 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
>  	char *uflag_5 = "";
>  	char *uflag_6 = "";
>  	char *uflag_7 = "";
> +	char buf[64] = {0};

initialization is not needed. It is set before use.

^ permalink raw reply

* [PATCH 1/1] net: dsa: b53: Fix for brcm tag issue in Cygnus SoC
From: Arun Parameswaran @ 2018-06-05 20:38 UTC (permalink / raw)
  To: Florian Fainelli, Vivien Didelot, Andrew Lunn, David S. Miller
  Cc: netdev, linux-kernel, bcm-kernel-feedback-list, Arun Parameswaran

In the Broadcom Cygnus SoC, the brcm tag needs to be inserted
in between the mac address and the ether type (should use
'DSA_PROTO_TAG_BRCM') for the packets sent to the internal
b53 switch.

Since the Cygnus was added with the BCM58XX device id and the
BCM58XX uses 'DSA_PROTO_TAG_BRCM_PREPEND', the data path is
broken, due to the incorrect brcm tag location.

Add a new b53 device id (BCM583XX) for Cygnus family to fix the
issue. Add the new device id to the BCM58XX family as Cygnus
is similar to the BCM58XX in most other functionalities.

Fixes: 11606039604c ("net: dsa: b53: Support prepended Broadcom tags")

Signed-off-by: Arun Parameswaran <arun.parameswaran@broadcom.com>
---
 drivers/net/dsa/b53/b53_common.c | 15 ++++++++++++++-
 drivers/net/dsa/b53/b53_priv.h   |  2 ++
 drivers/net/dsa/b53/b53_srab.c   |  4 ++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 3da5fca..bbc6cc6 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -684,7 +684,8 @@ static int b53_switch_reset(struct b53_device *dev)
 	 * still use this driver as a library and need to perform the reset
 	 * earlier.
 	 */
-	if (dev->chip_id == BCM58XX_DEVICE_ID) {
+	if (dev->chip_id == BCM58XX_DEVICE_ID ||
+	    dev->chip_id == BCM583XX_DEVICE_ID) {
 		b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
 		reg |= SW_RST | EN_SW_RST | EN_CH_RST;
 		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg);
@@ -1880,6 +1881,18 @@ struct b53_chip_data {
 		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
 	},
 	{
+		.chip_id = BCM583XX_DEVICE_ID,
+		.dev_name = "BCM583xx/11360",
+		.vlans = 4096,
+		.enabled_ports = 0x103,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
+	{
 		.chip_id = BCM7445_DEVICE_ID,
 		.dev_name = "BCM7445",
 		.vlans	= 4096,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 3b57f47..b232aaa 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -62,6 +62,7 @@ enum {
 	BCM53018_DEVICE_ID = 0x53018,
 	BCM53019_DEVICE_ID = 0x53019,
 	BCM58XX_DEVICE_ID = 0x5800,
+	BCM583XX_DEVICE_ID = 0x58300,
 	BCM7445_DEVICE_ID = 0x7445,
 	BCM7278_DEVICE_ID = 0x7278,
 };
@@ -181,6 +182,7 @@ static inline int is5301x(struct b53_device *dev)
 static inline int is58xx(struct b53_device *dev)
 {
 	return dev->chip_id == BCM58XX_DEVICE_ID ||
+		dev->chip_id == BCM583XX_DEVICE_ID ||
 		dev->chip_id == BCM7445_DEVICE_ID ||
 		dev->chip_id == BCM7278_DEVICE_ID;
 }
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index c37ffd1..8247481 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -364,7 +364,7 @@ static int b53_srab_write64(struct b53_device *dev, u8 page, u8 reg,
 	{ .compatible = "brcm,bcm53018-srab" },
 	{ .compatible = "brcm,bcm53019-srab" },
 	{ .compatible = "brcm,bcm5301x-srab" },
-	{ .compatible = "brcm,bcm11360-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,bcm11360-srab", .data = (void *)BCM583XX_DEVICE_ID },
 	{ .compatible = "brcm,bcm58522-srab", .data = (void *)BCM58XX_DEVICE_ID },
 	{ .compatible = "brcm,bcm58525-srab", .data = (void *)BCM58XX_DEVICE_ID },
 	{ .compatible = "brcm,bcm58535-srab", .data = (void *)BCM58XX_DEVICE_ID },
@@ -372,7 +372,7 @@ static int b53_srab_write64(struct b53_device *dev, u8 page, u8 reg,
 	{ .compatible = "brcm,bcm58623-srab", .data = (void *)BCM58XX_DEVICE_ID },
 	{ .compatible = "brcm,bcm58625-srab", .data = (void *)BCM58XX_DEVICE_ID },
 	{ .compatible = "brcm,bcm88312-srab", .data = (void *)BCM58XX_DEVICE_ID },
-	{ .compatible = "brcm,cygnus-srab", .data = (void *)BCM58XX_DEVICE_ID },
+	{ .compatible = "brcm,cygnus-srab", .data = (void *)BCM583XX_DEVICE_ID },
 	{ .compatible = "brcm,nsp-srab", .data = (void *)BCM58XX_DEVICE_ID },
 	{ /* sentinel */ },
 };
-- 
1.9.1

^ permalink raw reply related

* [PATCH iproute2-next v2 1/1] tc: add json support in csum action
From: Keara Leibovitz @ 2018-06-05 20:44 UTC (permalink / raw)
  To: dsahern; +Cc: stephen, netdev, kernel, Keara Leibovitz

Add json output support for checksum action.

Example output:

~$ $TC actions add action csum udp continue index 7
~$ $TC actions add action csum icmp iph igmp pipe index 200 cookie 112233
~$ $TC -j actions ls action csum

[{
    "total acts":2
}, {
    "actions": [{
        "order":0,
        "csum":"udp",
        "control_action": {
            "type":"continue"
        },
        "index":7,
        "ref":1,
        "bind":0
    }, {
        "order":1,
        "csum":"iph, icmp, igmp",
        "control_action": {
            "type":"pipe"
        },
        "index":200,
        "ref":1,
        "bind":0,
        "cookie":"112233"
    }]
}]

v2:
    Don't initialized char buf[64];
    Add output example
    
Signed-off-by: Keara Leibovitz <kleib@mojatatu.com>
---
 tc/m_csum.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tc/m_csum.c b/tc/m_csum.c
index 8391071d73f2..0bdbcf361a28 100644
--- a/tc/m_csum.c
+++ b/tc/m_csum.c
@@ -162,6 +162,7 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
 	char *uflag_5 = "";
 	char *uflag_6 = "";
 	char *uflag_7 = "";
+	char buf[64];
 
 	int uflag_count = 0;
 
@@ -198,12 +199,15 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
 		uflag_1 = "?empty";
 	}
 
-	fprintf(f, "csum (%s%s%s%s%s%s%s) ",
-		uflag_1, uflag_2, uflag_3,
-		uflag_4, uflag_5, uflag_6, uflag_7);
+	snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s",
+		 uflag_1, uflag_2, uflag_3,
+		 uflag_4, uflag_5, uflag_6, uflag_7);
+	print_string(PRINT_ANY, "csum", "csum (%s) ", buf);
+
 	print_action_control(f, "action ", sel->action, "\n");
-	fprintf(f, "\tindex %u ref %d bind %d", sel->index, sel->refcnt,
-		sel->bindcnt);
+	print_uint(PRINT_ANY, "index", "\tindex %u", sel->index);
+	print_int(PRINT_ANY, "ref", " ref %d", sel->refcnt);
+	print_int(PRINT_ANY, "bind", " bind %d", sel->bindcnt);
 
 	if (show_stats) {
 		if (tb[TCA_CSUM_TM]) {
@@ -212,7 +216,7 @@ print_csum(struct action_util *au, FILE *f, struct rtattr *arg)
 			print_tm(f, tm);
 		}
 	}
-	fprintf(f, "\n");
+	print_string(PRINT_FP, NULL, "%s", "\n");
 
 	return 0;
 }
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH 1/1] net: dsa: b53: Fix for brcm tag issue in Cygnus SoC
From: Scott Branden @ 2018-06-05 20:55 UTC (permalink / raw)
  To: Arun Parameswaran, Florian Fainelli, Vivien Didelot, Andrew Lunn,
	David S. Miller, Clément Péron
  Cc: netdev, linux-kernel, bcm-kernel-feedback-list
In-Reply-To: <1528231092-31472-1-git-send-email-arun.parameswaran@broadcom.com>

Clement,

Please test this works for your issue.


On 18-06-05 01:38 PM, Arun Parameswaran wrote:
> In the Broadcom Cygnus SoC, the brcm tag needs to be inserted
> in between the mac address and the ether type (should use
> 'DSA_PROTO_TAG_BRCM') for the packets sent to the internal
> b53 switch.
>
> Since the Cygnus was added with the BCM58XX device id and the
> BCM58XX uses 'DSA_PROTO_TAG_BRCM_PREPEND', the data path is
> broken, due to the incorrect brcm tag location.
>
> Add a new b53 device id (BCM583XX) for Cygnus family to fix the
> issue. Add the new device id to the BCM58XX family as Cygnus
> is similar to the BCM58XX in most other functionalities.
>
> Fixes: 11606039604c ("net: dsa: b53: Support prepended Broadcom tags")
>
> Signed-off-by: Arun Parameswaran <arun.parameswaran@broadcom.com>
Acked-by: Scott Branden <scott.branden@broadcom.com>
> ---
>   drivers/net/dsa/b53/b53_common.c | 15 ++++++++++++++-
>   drivers/net/dsa/b53/b53_priv.h   |  2 ++
>   drivers/net/dsa/b53/b53_srab.c   |  4 ++--
>   3 files changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
> index 3da5fca..bbc6cc6 100644
> --- a/drivers/net/dsa/b53/b53_common.c
> +++ b/drivers/net/dsa/b53/b53_common.c
> @@ -684,7 +684,8 @@ static int b53_switch_reset(struct b53_device *dev)
>   	 * still use this driver as a library and need to perform the reset
>   	 * earlier.
>   	 */
> -	if (dev->chip_id == BCM58XX_DEVICE_ID) {
> +	if (dev->chip_id == BCM58XX_DEVICE_ID ||
> +	    dev->chip_id == BCM583XX_DEVICE_ID) {
>   		b53_read8(dev, B53_CTRL_PAGE, B53_SOFTRESET, &reg);
>   		reg |= SW_RST | EN_SW_RST | EN_CH_RST;
>   		b53_write8(dev, B53_CTRL_PAGE, B53_SOFTRESET, reg);
> @@ -1880,6 +1881,18 @@ struct b53_chip_data {
>   		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
>   	},
>   	{
> +		.chip_id = BCM583XX_DEVICE_ID,
> +		.dev_name = "BCM583xx/11360",
> +		.vlans = 4096,
> +		.enabled_ports = 0x103,
> +		.arl_entries = 4,
> +		.cpu_port = B53_CPU_PORT,
> +		.vta_regs = B53_VTA_REGS,
> +		.duplex_reg = B53_DUPLEX_STAT_GE,
> +		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
> +		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
> +	},
> +	{
>   		.chip_id = BCM7445_DEVICE_ID,
>   		.dev_name = "BCM7445",
>   		.vlans	= 4096,
> diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
> index 3b57f47..b232aaa 100644
> --- a/drivers/net/dsa/b53/b53_priv.h
> +++ b/drivers/net/dsa/b53/b53_priv.h
> @@ -62,6 +62,7 @@ enum {
>   	BCM53018_DEVICE_ID = 0x53018,
>   	BCM53019_DEVICE_ID = 0x53019,
>   	BCM58XX_DEVICE_ID = 0x5800,
> +	BCM583XX_DEVICE_ID = 0x58300,
>   	BCM7445_DEVICE_ID = 0x7445,
>   	BCM7278_DEVICE_ID = 0x7278,
>   };
> @@ -181,6 +182,7 @@ static inline int is5301x(struct b53_device *dev)
>   static inline int is58xx(struct b53_device *dev)
>   {
>   	return dev->chip_id == BCM58XX_DEVICE_ID ||
> +		dev->chip_id == BCM583XX_DEVICE_ID ||
>   		dev->chip_id == BCM7445_DEVICE_ID ||
>   		dev->chip_id == BCM7278_DEVICE_ID;
>   }
> diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
> index c37ffd1..8247481 100644
> --- a/drivers/net/dsa/b53/b53_srab.c
> +++ b/drivers/net/dsa/b53/b53_srab.c
> @@ -364,7 +364,7 @@ static int b53_srab_write64(struct b53_device *dev, u8 page, u8 reg,
>   	{ .compatible = "brcm,bcm53018-srab" },
>   	{ .compatible = "brcm,bcm53019-srab" },
>   	{ .compatible = "brcm,bcm5301x-srab" },
> -	{ .compatible = "brcm,bcm11360-srab", .data = (void *)BCM58XX_DEVICE_ID },
> +	{ .compatible = "brcm,bcm11360-srab", .data = (void *)BCM583XX_DEVICE_ID },
>   	{ .compatible = "brcm,bcm58522-srab", .data = (void *)BCM58XX_DEVICE_ID },
>   	{ .compatible = "brcm,bcm58525-srab", .data = (void *)BCM58XX_DEVICE_ID },
>   	{ .compatible = "brcm,bcm58535-srab", .data = (void *)BCM58XX_DEVICE_ID },
> @@ -372,7 +372,7 @@ static int b53_srab_write64(struct b53_device *dev, u8 page, u8 reg,
>   	{ .compatible = "brcm,bcm58623-srab", .data = (void *)BCM58XX_DEVICE_ID },
>   	{ .compatible = "brcm,bcm58625-srab", .data = (void *)BCM58XX_DEVICE_ID },
>   	{ .compatible = "brcm,bcm88312-srab", .data = (void *)BCM58XX_DEVICE_ID },
> -	{ .compatible = "brcm,cygnus-srab", .data = (void *)BCM58XX_DEVICE_ID },
> +	{ .compatible = "brcm,cygnus-srab", .data = (void *)BCM583XX_DEVICE_ID },
>   	{ .compatible = "brcm,nsp-srab", .data = (void *)BCM58XX_DEVICE_ID },
>   	{ /* sentinel */ },
>   };

^ permalink raw reply

* Re: [PATCH 4/4] cpsw: add switchdev support
From: Grygorii Strashko @ 2018-06-05 21:03 UTC (permalink / raw)
  To: Ilias Apalodimas, Florian Fainelli
  Cc: Andrew Lunn, netdev, ivan.khoronzhuk, nsekhar, jiri, ivecera,
	francois.ozog, yogeshs, spatton
In-Reply-To: <20180602103432.GA948@apalos>



On 06/02/2018 05:34 AM, Ilias Apalodimas wrote:
> Hi Florian,
> 
> Thanks for taking time to look into this
> 
> On Fri, Jun 01, 2018 at 02:48:48PM -0700, Florian Fainelli wrote:
>>
>>
>> On 05/24/2018 09:56 PM, Ilias Apalodimas wrote:
>>> On Thu, May 24, 2018 at 06:39:04PM +0200, Andrew Lunn wrote:
>>>> On Thu, May 24, 2018 at 04:32:34PM +0300, Ilias Apalodimas wrote:
>>>>> On Thu, May 24, 2018 at 03:12:29PM +0200, Andrew Lunn wrote:
>>>>>> Device tree is supposed to describe the hardware. Using that hardware
>>>>>> in different ways is not something you should describe in DT.
>>>>>>
>>>>> The new switchdev mode is applied with a .config option in the kernel. What you
>>>>> see is pre-existing code, so i am not sure if i should change it in this
>>>>> patchset.
>>>>
>>>> If you break the code up into a library and two drivers, it becomes a
>>>> moot point.
>>> Agree
>>>
>>>>
>>>> But what i don't like here is that the device tree says to do dual
>>>> mac. But you ignore that and do sometime else. I would prefer that if
>>>> DT says dual mac, and switchdev is compiled in, the probe fails with
>>>> EINVAL. Rather than ignore something, make it clear it is invalid.
>>> The switch has 3 modes of operation as is.
>>> 1. switch mode, to enable that you don't need to add anything on
>>> the DTS and linux registers a single netdev interface.
>>> 2. dual mac mode, this is when you need to add dual_emac; on the DTS.
>>> 3. switchdev mode which is controlled by a .config option, since as you
>>> pointed out DTS was not made for controlling config options.
>>>
>>> I agree that this is far from beautiful. If the driver remains as in though,
>>> i'd prefer either keeping what's there or making "switchdev" a DTS option,
>>> following the pre-existing erroneous usage rather than making the device
>>> unusable.  If we end up returning some error and refuse to initialize, users
>>> that remote upgrade their equipment, without taking a good look at changelog,
>>> will loose access to their devices with no means of remotely fixing that.
>>
>> It seems to me that the mistake here is seeing multiple modes of
>> operations for the cpsw. There are not actually many, there is one
>> usage, and then there is what you can and cannot offload.

> CPSW has in fact 2 modes of operation, different FIFO usage/lookup entry(it's
> called ALE in the current driver) by-pass(which is used in dual emac for
> example) and other features. Again Grygorii is better suited to answer the
> exact differences.

dual_mac is HW enabled mode of operation, so having DT option is pretty
reasonable as for me. 
1) when enabled it configures internal FIFOs in special way so both
external Ports became equal in the direction toward to Host port 0.

TRM "The intention of this mode is to allow packets from both ethernet ports
to be written into the FIFO without one port starving the other port."

2) ALE, out of the box, does not support this mode and, as result, two
default vlan have to be created to direct traffic P1->P0 (vlan1) and
P2-P0 (vlan2), but any packets P0->P1 and P0->P2 have to be directed
(and will bypass ALE). This way traffic separated on cpsw egress towards to P0, 

>> The basic
>> premise with switchdev and DSA (which uses switchdev) is that each
>> user-facing port of your switch needs to work as if it were a normal
>> Ethernet NIC, that is what you call dual-MAC I believe. Then, when you
>> create a bridge and you enslave those ports into the bridge, you need to
>> have forwarding done in hardware between these two ports when the
>> SMAC/DMAC are not for the host/CPU/management interface and you must
>> simultaneously still have the host have the ability to send/receive
>> traffic through the bridge device.

TRM
"When operating in dual mac mode the intention is to transfer packets between ports 0 and 1 and ports 0
and 2, but not between ports 1 and 2. Each CPGMAC_SL appears as a single MAC with no bridging
between MAC’s. Each CPGMAC_SL has at least one unique (not the same) mac address."

So, we cant't talk about forwarding in dual_mac mode. Interfaces expected to be 
completely independent without any packet leaking between interfaces.

!! BUT !! CPSW ALE (switch core) still expected to be used, but for packet filtering
- only registered vlans
- only registered mcast/bcast
- ingress mcast/bcast rate limiting (it's actually more like coalescing -
 limits number of mcast/bcast packets per sec. 

And all offloading ALE (val/mdb) entries should always contain two ports in masks:
p1&p0 or p2&p0. Never ever all three ports. 

> Yes dual emac does that. But dual emac configures the port facing VLAN to the
> CPU port as well. So dual emac splits and uses 2 interfaces. VLAN 1 is
> configured on port1 + CPU port and VLAN 2 is confired on port 2 + CPU port
> That's exactly what the current RFC does as well, with the addition of
> registering a sw0p0 (i'll explain why later on this mail)
> A little more detail on the issue we are having. On my description
> sw0p0 -> CPU port, sw0p1 -> port 1 sw0p2 -> port 2. sw0p1/sw0p2 are the ports
> that have PHYs attached.
> 
> When we start in the new switchdev mode all interfaces are added to VLAN 0
> so CPU port + port1 + port2 are all in the same VLAN group. In that case sw0p1
> and sw0p2 are working as you describe. So those 2 interfaces can send/receive
> traffic normally which matches the switchdev case.
> 
> When we add them on a bridge(let's say br0), VLAN1(or any default bridge VLAN)
> is now configured on sw0p1 and sw0p2 but *not* on the CPU port.
>  From this point on the whole fuunctionality just collapses. The switch will
> work and offload traffic between sw0p1/sw0p2 but the bridge won't be able to
> get an ip address (since VLAN1 is not a member of the CPU port and the packet
> gets dropped).
> IGMPv2/V3 messages will never reach the br_multicast.c code to trigger
> switchdev and configure the MDBs on the ports.  i am prety sure there are other
> fail scenarios which i haven't discovered already, but those 2 are the most
> basic ones.  If we add VLAN1 on the CPU port, everything works as intended of
> course.
> 
> That's the reason we registered sw0p0 as the CPU port. It can't do any "real"
> traffic, but you can configure the CPU port independantly and not be forced to
> do an OR on every VLAN add/delete grouping the CPU port with your port command.
> The TL;DR version of this is that the switch is working exactly as switchdev is
> expecting offloading traffic to the hardware when possible as long as the CPU
> port is member of the proper VLANs
> 
> Petr's patch solves this for us (9c86ce2c1ae337fc10568a12aea812ed03de8319).
> We can now do "bridge vlan add dev br0 vid 100 pvid untagged self" and decide
> when to add the CPU port or not.
> 
> There are still a couple of cases that are not covered though, if we don't
> register the CPU port. We cant decide when to forward multicast
> traffic on the CPU port if a join hasn't been sent from br0.
> So let's say you got 2 hosts doing multicast and for whatever reason the host
> wants to see that traffic.
> With the CPU port present you can do a
> "bridge mdb add dev br0 port sw0p0 grp 239.1.1.1 permanent" which will offload
> the traffic to the CPU port and thus the host. If this goes away we are forced
> to send a join.
>   
>> It seems to me like this is entirely doable given that the dual MAC use
>> case is supported already.
>>
>> switchdev is just a stateless framework to get notified from the
>> networking stack about what you can possibly offload in hardware, so
>> having a DTS option gate that is unfortunately wrong because it is
>> really implementing a SW policy in DTS which is not what it is meant for.
> The DTS option for configuration pre-existed, i don't like that either switchdev
> mode is activated by a .config option not DTS(it just overrides whatever config
> you have on the DTS). Far from pretty though fair point, i am open to
> suggestions.

Again this is option describing HW mode which not expected to be changed on the fly.
I'm honestly, propose descope dual_mac and concentrate on switch mode (switchdev) - 
right now I don't see how dual_mac can be supported with switchdev as per above.
The same way as I do not see how we can re-use switchdev with 50% of devices which 
have "only one" user-facing external port (P1 or P2).


-- 
regards,
-grygorii

^ permalink raw reply

* umh build...
From: David Miller @ 2018-06-05 21:03 UTC (permalink / raw)
  To: alexei.starovoitov; +Cc: netdev

Alexei, I tried to build bpfilter on sparc64 and it shows that
CONFIG_OUTPUT_FORMAT is an x86 specific Kconfig value.

And, even if I added it, it's not clear what it should even be set to.

Right now, for example, my userland builds default to 32-bit sparc
even though I'm building a 64-bit kernel.

I think what ends up happening has to be in some way in response to
what kind of "native" binaries HOSTCC is actually building.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox