Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 3/6] staging: fsl-dpaa2/ethsw: Add ethtool support
From: Razvan Stefanescu @ 2017-10-05  9:16 UTC (permalink / raw)
  To: gregkh
  Cc: devel, arnd, netdev, alexandru.marginean, linux-kernel, agraf,
	stuyoder, bogdan.purcareata, laurentiu.tudor
In-Reply-To: <1507194974-12050-1-git-send-email-razvan.stefanescu@nxp.com>

Add driver information, link details and hardware statistics to be
reported via ethtool -S.

Signed-off-by: Razvan Stefanescu <razvan.stefanescu@nxp.com>
---
Changelog:
 v2:
    - no changes
 v3:
    - removed driver version

 drivers/staging/fsl-dpaa2/ethsw/Makefile        |   2 +-
 drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h      |  13 ++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.c          |  32 ++++
 drivers/staging/fsl-dpaa2/ethsw/dpsw.h          |  32 ++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c | 206 ++++++++++++++++++++++++
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c         |   1 +
 drivers/staging/fsl-dpaa2/ethsw/ethsw.h         |   2 +
 7 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c

diff --git a/drivers/staging/fsl-dpaa2/ethsw/Makefile b/drivers/staging/fsl-dpaa2/ethsw/Makefile
index a6d72d1..de92cd9 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/Makefile
+++ b/drivers/staging/fsl-dpaa2/ethsw/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_FSL_DPAA2_ETHSW) += dpaa2-ethsw.o
 
-dpaa2-ethsw-objs := ethsw.o dpsw.o
+dpaa2-ethsw-objs := ethsw.o ethsw-ethtool.o dpsw.o
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
index ddfd820..06b71122 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw-cmd.h
@@ -74,6 +74,8 @@
 #define DPSW_CMDID_IF_SET_FLOODING          DPSW_CMD_ID(0x047)
 #define DPSW_CMDID_IF_SET_BROADCAST         DPSW_CMD_ID(0x048)
 
+#define DPSW_CMDID_IF_SET_LINK_CFG          DPSW_CMD_ID(0x04C)
+
 #define DPSW_CMDID_VLAN_ADD                 DPSW_CMD_ID(0x060)
 #define DPSW_CMDID_VLAN_ADD_IF              DPSW_CMD_ID(0x061)
 #define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED     DPSW_CMD_ID(0x062)
@@ -262,6 +264,17 @@ struct dpsw_cmd_if_set_max_frame_length {
 	__le16 frame_length;
 };
 
+struct dpsw_cmd_if_set_link_cfg {
+	/* cmd word 0 */
+	__le16 if_id;
+	u8 pad[6];
+	/* cmd word 1 */
+	__le32 rate;
+	__le32 pad1;
+	/* cmd word 2 */
+	__le64 options;
+};
+
 struct dpsw_cmd_if_get_link_state {
 	__le16 if_id;
 };
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
index e65b6f5..f1a1fac 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.c
@@ -383,6 +383,38 @@ int dpsw_get_attributes(struct fsl_mc_io *mc_io,
 }
 
 /**
+ * dpsw_if_set_link_cfg() - Set the link configuration.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPSW object
+ * @if_id:	Interface id
+ * @cfg:	Link configuration
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u16 if_id,
+			 struct dpsw_link_cfg *cfg)
+{
+	struct mc_command cmd = { 0 };
+	struct dpsw_cmd_if_set_link_cfg *cmd_params;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LINK_CFG,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpsw_cmd_if_set_link_cfg *)cmd.params;
+	cmd_params->if_id = cpu_to_le16(if_id);
+	cmd_params->rate = cpu_to_le32(cfg->rate);
+	cmd_params->options = cpu_to_le64(cfg->options);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
  * dpsw_if_get_link_state - Return the link state
  * @mc_io:	Pointer to MC portal's I/O object
  * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
diff --git a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
index 7fa8a61..87369a5 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/dpsw.h
@@ -245,6 +245,38 @@ enum dpsw_action {
 };
 
 /**
+ * Enable auto-negotiation
+ */
+#define DPSW_LINK_OPT_AUTONEG		0x0000000000000001ULL
+/**
+ * Enable half-duplex mode
+ */
+#define DPSW_LINK_OPT_HALF_DUPLEX	0x0000000000000002ULL
+/**
+ * Enable pause frames
+ */
+#define DPSW_LINK_OPT_PAUSE		0x0000000000000004ULL
+/**
+ * Enable a-symmetric pause frames
+ */
+#define DPSW_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
+
+/**
+ * struct dpsw_link_cfg - Structure representing DPSW link configuration
+ * @rate: Rate
+ * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
+ */
+struct dpsw_link_cfg {
+	u32 rate;
+	u64 options;
+};
+
+int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token,
+			 u16 if_id,
+			 struct dpsw_link_cfg *cfg);
+/**
  * struct dpsw_link_state - Structure representing DPSW link state
  * @rate: Rate
  * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c
new file mode 100644
index 0000000..659e66e
--- /dev/null
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw-ethtool.c
@@ -0,0 +1,206 @@
+/* Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2017 NXP
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the above-listed copyright holders nor the
+ *	 names of any contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ethsw.h"
+
+static struct {
+	enum dpsw_counter id;
+	char name[ETH_GSTRING_LEN];
+} ethsw_ethtool_counters[] =  {
+	{DPSW_CNT_ING_FRAME,		"rx frames"},
+	{DPSW_CNT_ING_BYTE,		"rx bytes"},
+	{DPSW_CNT_ING_FLTR_FRAME,	"rx filtered frames"},
+	{DPSW_CNT_ING_FRAME_DISCARD,	"rx discarded frames"},
+	{DPSW_CNT_ING_BCAST_FRAME,	"rx b-cast frames"},
+	{DPSW_CNT_ING_BCAST_BYTES,	"rx b-cast bytes"},
+	{DPSW_CNT_ING_MCAST_FRAME,	"rx m-cast frames"},
+	{DPSW_CNT_ING_MCAST_BYTE,	"rx m-cast bytes"},
+	{DPSW_CNT_EGR_FRAME,		"tx frames"},
+	{DPSW_CNT_EGR_BYTE,		"tx bytes"},
+	{DPSW_CNT_EGR_FRAME_DISCARD,	"tx discarded frames"},
+
+};
+
+#define ETHSW_NUM_COUNTERS	ARRAY_SIZE(ethsw_ethtool_counters)
+
+static void ethsw_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	u16 version_major, version_minor;
+	int err;
+
+	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+
+	err = dpsw_get_api_version(port_priv->ethsw_data->mc_io, 0,
+				   &version_major,
+				   &version_minor);
+	if (err)
+		strlcpy(drvinfo->fw_version, "N/A",
+			sizeof(drvinfo->fw_version));
+	else
+		snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+			 "%u.%u", version_major, version_minor);
+
+	strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent->parent),
+		sizeof(drvinfo->bus_info));
+}
+
+static int
+ethsw_get_link_ksettings(struct net_device *netdev,
+			 struct ethtool_link_ksettings *link_ksettings)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpsw_link_state state = {0};
+	int err = 0;
+
+	err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
+				     port_priv->ethsw_data->dpsw_handle,
+				     port_priv->idx,
+				     &state);
+	if (err) {
+		netdev_err(netdev, "ERROR %d getting link state", err);
+		goto out;
+	}
+
+	/* At the moment, we have no way of interrogating the DPMAC
+	 * from the DPSW side or there may not exist a DPMAC at all.
+	 * Report only autoneg state, duplexity and speed.
+	 */
+	if (state.options & DPSW_LINK_OPT_AUTONEG)
+		link_ksettings->base.autoneg = AUTONEG_ENABLE;
+	if (!(state.options & DPSW_LINK_OPT_HALF_DUPLEX))
+		link_ksettings->base.duplex = DUPLEX_FULL;
+	link_ksettings->base.speed = state.rate;
+
+out:
+	return err;
+}
+
+static int
+ethsw_set_link_ksettings(struct net_device *netdev,
+			 const struct ethtool_link_ksettings *link_ksettings)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	struct dpsw_link_cfg cfg = {0};
+	int err = 0;
+
+	netdev_dbg(netdev, "Setting link parameters...");
+
+	/* Due to a temporary MC limitation, the DPSW port must be down
+	 * in order to be able to change link settings. Taking steps to let
+	 * the user know that.
+	 */
+	if (netif_running(netdev)) {
+		netdev_info(netdev, "Sorry, interface must be brought down first.\n");
+		return -EACCES;
+	}
+
+	cfg.rate = link_ksettings->base.speed;
+	if (link_ksettings->base.autoneg == AUTONEG_ENABLE)
+		cfg.options |= DPSW_LINK_OPT_AUTONEG;
+	else
+		cfg.options &= ~DPSW_LINK_OPT_AUTONEG;
+	if (link_ksettings->base.duplex  == DUPLEX_HALF)
+		cfg.options |= DPSW_LINK_OPT_HALF_DUPLEX;
+	else
+		cfg.options &= ~DPSW_LINK_OPT_HALF_DUPLEX;
+
+	err = dpsw_if_set_link_cfg(port_priv->ethsw_data->mc_io, 0,
+				   port_priv->ethsw_data->dpsw_handle,
+				   port_priv->idx,
+				   &cfg);
+	if (err)
+		/* ethtool will be loud enough if we return an error; no point
+		 * in putting our own error message on the console by default
+		 */
+		netdev_dbg(netdev, "ERROR %d setting link cfg", err);
+
+	return err;
+}
+
+static int ethsw_ethtool_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ETHSW_NUM_COUNTERS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void ethsw_ethtool_get_strings(struct net_device *netdev,
+				      u32 stringset, u8 *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ETHSW_NUM_COUNTERS; i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       ethsw_ethtool_counters[i].name, ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static void ethsw_ethtool_get_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats,
+				    u64 *data)
+{
+	struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+	int i, err;
+
+	memset(data, 0,
+	       sizeof(u64) * ETHSW_NUM_COUNTERS);
+
+	for (i = 0; i < ETHSW_NUM_COUNTERS; i++) {
+		err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0,
+					  port_priv->ethsw_data->dpsw_handle,
+					  port_priv->idx,
+					  ethsw_ethtool_counters[i].id,
+					  &data[i]);
+		if (err)
+			netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
+				   ethsw_ethtool_counters[i].name, err);
+	}
+}
+
+const struct ethtool_ops ethsw_port_ethtool_ops = {
+	.get_drvinfo		= ethsw_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= ethsw_get_link_ksettings,
+	.set_link_ksettings	= ethsw_set_link_ksettings,
+	.get_strings		= ethsw_ethtool_get_strings,
+	.get_ethtool_stats	= ethsw_ethtool_get_stats,
+	.get_sset_count		= ethsw_ethtool_get_sset_count,
+};
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index f45e5bb..7276ae37 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -1402,6 +1402,7 @@ static int ethsw_probe_port(struct ethsw_core *ethsw, u16 port_idx)
 
 	SET_NETDEV_DEV(port_netdev, dev);
 	port_netdev->netdev_ops = &ethsw_port_ops;
+	port_netdev->ethtool_ops = &ethsw_port_ethtool_ops;
 	port_netdev->switchdev_ops = &ethsw_port_switchdev_ops;
 
 	/* Set MTU limits */
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
index 8c1d645..413275b 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.h
@@ -57,6 +57,8 @@
 #define ETHSW_MAX_FRAME_LENGTH	(DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN)
 #define ETHSW_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN)
 
+extern const struct ethtool_ops ethsw_port_ethtool_ops;
+
 struct ethsw_core;
 
 /* Per port private data */
-- 
1.9.1

^ permalink raw reply related

* [PATCH v3 4/6] staging: fsl-dpaa2/ethsw: Add maintainer for Ethernet Switch driver
From: Razvan Stefanescu @ 2017-10-05  9:16 UTC (permalink / raw)
  To: gregkh
  Cc: devel, arnd, netdev, alexandru.marginean, linux-kernel, agraf,
	stuyoder, bogdan.purcareata, laurentiu.tudor
In-Reply-To: <1507194974-12050-1-git-send-email-razvan.stefanescu@nxp.com>

Signed-off-by: Razvan Stefanescu <razvan.stefanescu@nxp.com>
---
Changelog:
 v2:
    - no changes
 v3:
    - no changes

 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2281af4..cfd4f74 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4297,6 +4297,12 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	drivers/staging/fsl-dpaa2/ethernet
 
+DPAA2 ETHERNET SWITCH DRIVER
+M:	Razvan Stefanescu <razvan.stefanescu@nxp.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/staging/fsl-dpaa2/ethsw
+
 DPT_I2O SCSI RAID DRIVER
 M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
 L:	linux-scsi@vger.kernel.org
-- 
1.9.1

^ permalink raw reply related

* [PATCH v3 5/6] staging: fsl-dpaa2/ethsw: Add README
From: Razvan Stefanescu @ 2017-10-05  9:16 UTC (permalink / raw)
  To: gregkh
  Cc: devel, arnd, netdev, alexandru.marginean, linux-kernel, agraf,
	stuyoder, bogdan.purcareata, laurentiu.tudor
In-Reply-To: <1507194974-12050-1-git-send-email-razvan.stefanescu@nxp.com>

Add a README file describing the driver architecture, components and
interfaces.

Signed-off-by: Razvan Stefanescu <razvan.stefanescu@nxp.com>
---
Changelog:
 v2:
    - no changes
 v3:
    - no changes

 drivers/staging/fsl-dpaa2/ethsw/README | 106 +++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 drivers/staging/fsl-dpaa2/ethsw/README

diff --git a/drivers/staging/fsl-dpaa2/ethsw/README b/drivers/staging/fsl-dpaa2/ethsw/README
new file mode 100644
index 0000000..f6fc07f
--- /dev/null
+++ b/drivers/staging/fsl-dpaa2/ethsw/README
@@ -0,0 +1,106 @@
+DPAA2 Ethernet Switch driver
+============================
+
+This file provides documentation for the DPAA2 Ethernet Switch driver
+
+
+Contents
+========
+	Supported Platforms
+	Architecture Overview
+	Creating an Ethernet Switch
+	Features
+
+
+	Supported Platforms
+===================
+This driver provides networking support for Freescale LS2085A, LS2088A
+DPAA2 SoCs.
+
+
+Architecture Overview
+=====================
+The Ethernet Switch in the DPAA2 architecture consists of several hardware
+resources that provide the functionality. These are allocated and
+configured via the Management Complex (MC) portals. MC abstracts most of
+these resources as DPAA2 objects and exposes ABIs through which they can
+be configured and controlled.
+
+For a more detailed description of the DPAA2 architecture and its object
+abstractions see:
+	drivers/staging/fsl-mc/README.txt
+
+The Ethernet Switch is built on top of a Datapath Switch (DPSW) object.
+
+Configuration interface:
+
+          ---------------------
+         | DPAA2 Switch driver |
+          ---------------------
+                   .
+                   .
+              ----------
+             | DPSW API |
+              ----------
+                   .           software
+ ================= . ==============
+                   .           hardware
+          ---------------------
+         | MC hardware portals |
+          ---------------------
+                   .
+                   .
+                 ------
+                | DPSW |
+                 ------
+
+Driver uses the switch device driver model and exposes each switch port as
+a network interface, which can be included in a bridge. Traffic switched
+between ports is offloaded into the hardware. Exposed network interfaces
+are not used for I/O, they are used just for configuration. This
+limitation is going to be addressed in the future.
+
+The DPSW can have ports connected to DPNIs or to PHYs via DPMACs.
+
+
+ [ethA]     [ethB]     [ethC]     [ethD]     [ethE]     [ethF]
+    :          :          :          :          :          :
+    :          :          :          :          :          :
+[eth drv]  [eth drv]  [                ethsw drv              ]
+    :          :          :          :          :          :        kernel
+========================================================================
+    :          :          :          :          :          :        hardware
+ [DPNI]      [DPNI]     [============= DPSW =================]
+    |          |          |          |          |          |
+    |           ----------           |       [DPMAC]    [DPMAC]
+     -------------------------------            |          |
+                                                |          |
+                                              [PHY]      [PHY]
+
+For a more detailed description of the Ethernet switch device driver model
+see:
+	Documentation/networking/switchdev.txt
+
+Creating an Ethernet Switch
+===========================
+A device is created for the switch objects probed on the MC bus. Each DPSW
+has a number of properties which determine the configuration options and
+associated hardware resources.
+
+A DPSW object (and the other DPAA2 objects needed for a DPAA2 switch) can
+be added to a container on the MC bus in one of two ways: statically,
+through a Datapath Layout Binary file (DPL) that is parsed by MC at boot
+time; or created dynamically at runtime, via the DPAA2 objects APIs.
+
+Features
+========
+Driver configures DPSW to perform hardware switching offload of
+unicast/multicast/broadcast (VLAN tagged or untagged) traffic between its
+ports.
+
+It allows configuration of hardware learning, flooding, multicast groups,
+port VLAN configuration and STP state.
+
+Static entries can be added/removed from the FDB.
+
+Hardware statistics for each port are provided through ethtool -S option.
-- 
1.9.1

^ permalink raw reply related

* [PATCH v3 6/6] staging: fsl-dpaa2/ethsw: Add TODO
From: Razvan Stefanescu @ 2017-10-05  9:16 UTC (permalink / raw)
  To: gregkh
  Cc: devel, arnd, netdev, alexandru.marginean, linux-kernel, agraf,
	stuyoder, bogdan.purcareata, laurentiu.tudor
In-Reply-To: <1507194974-12050-1-git-send-email-razvan.stefanescu@nxp.com>

Add a TODO file describing what needs to be added/changed before the driver
can be moved out of staging.

Signed-off-by: Razvan Stefanescu <razvan.stefanescu@nxp.com>
---
Changelog:
 v2:
    - no changes
 v3:
    - no changes

 drivers/staging/fsl-dpaa2/ethsw/TODO | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 drivers/staging/fsl-dpaa2/ethsw/TODO

diff --git a/drivers/staging/fsl-dpaa2/ethsw/TODO b/drivers/staging/fsl-dpaa2/ethsw/TODO
new file mode 100644
index 0000000..d3f12c3
--- /dev/null
+++ b/drivers/staging/fsl-dpaa2/ethsw/TODO
@@ -0,0 +1,14 @@
+* Add I/O capabilities on switch port netdevices. This will allow control
+traffic to reach the CPU.
+* Add ACL to redirect control traffic to CPU.
+* Add support for displaying learned FDB entries
+* MC firmware uprev; the DPAA2 objects used by the Ethernet Switch driver
+need to   be kept in sync with binary interface changes in MC
+* refine README file
+* cleanup
+
+NOTE: At least first three of the above are required before getting the
+DPAA2 Ethernet Switch driver out of staging. Another requirement is that
+the fsl-mc bus driver is moved to drivers/bus and dpio driver is moved to
+drivers/soc (this is required for I/O).
+
-- 
1.9.1

^ permalink raw reply related

* [PATCH v3 1/2] net: phonet: mark header_ops as const
From: Lin Zhang @ 2017-10-05 17:37 UTC (permalink / raw)
  To: courmisch, davem; +Cc: netdev, Lin Zhang

Signed-off-by: Lin Zhang <xiaolou4617@gmail.com>
---
changelog:

v2 -> v3:
	* fix phonet_header_ops type mismatch error
---
 include/linux/if_phonet.h | 2 +-
 net/phonet/af_phonet.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h
index bbcdb0a..a118ee4 100644
--- a/include/linux/if_phonet.h
+++ b/include/linux/if_phonet.h
@@ -10,5 +10,5 @@
 
 #include <uapi/linux/if_phonet.h>
 
-extern struct header_ops phonet_header_ops;
+extern const struct header_ops phonet_header_ops;
 #endif
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753..b12142e 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 	return 1;
 }
 
-struct header_ops phonet_header_ops = {
+const struct header_ops phonet_header_ops = {
 	.create = pn_header_create,
 	.parse = pn_header_parse,
 };
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH] net/mlx4_core: Convert timers to use timer_setup()
From: Leon Romanovsky @ 2017-10-05  9:38 UTC (permalink / raw)
  To: Kees Cook; +Cc: linux-kernel, Tariq Toukan, netdev, linux-rdma, Thomas Gleixner
In-Reply-To: <20171005005154.GA23500@beast>

[-- Attachment #1: Type: text/plain, Size: 2583 bytes --]

On Wed, Oct 04, 2017 at 05:51:54PM -0700, Kees Cook wrote:
> In preparation for unconditionally passing the struct timer_list pointer to
> all timer callbacks, switch to using the new timer_setup() and from_timer()
> to pass the timer pointer explicitly.
>
> Cc: Tariq Toukan <tariqt@mellanox.com>
> Cc: netdev@vger.kernel.org
> Cc: linux-rdma@vger.kernel.org
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Kees Cook <keescook@chromium.org>
> ---
> This requires commit 686fef928bba ("timer: Prepare to change timer
> callback argument type") in v4.14-rc3, but should be otherwise
> stand-alone.
> ---
>  drivers/net/ethernet/mellanox/mlx4/catas.c | 10 ++++------
>  1 file changed, 4 insertions(+), 6 deletions(-)
>

Hi Kees,

In RDMA, we had very similar patch [1] to your patch series, but it converts to
setup_timer, while you are converting to timer_setup.

Which conversion is the right one?

[1] https://patchwork.kernel.org/patch/9980701/

Thanks

> diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
> index 53daa6ca5d83..e2b6b0cac1ac 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/catas.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
> @@ -231,10 +231,10 @@ static void dump_err_buf(struct mlx4_dev *dev)
>  			 i, swab32(readl(priv->catas_err.map + i)));
>  }
>
> -static void poll_catas(unsigned long dev_ptr)
> +static void poll_catas(struct timer_list *t)
>  {
> -	struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
> -	struct mlx4_priv *priv = mlx4_priv(dev);
> +	struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer);
> +	struct mlx4_dev *dev = &priv->dev;
>  	u32 slave_read;
>
>  	if (mlx4_is_slave(dev)) {
> @@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
>  	phys_addr_t addr;
>
>  	INIT_LIST_HEAD(&priv->catas_err.list);
> -	init_timer(&priv->catas_err.timer);
> +	timer_setup(&priv->catas_err.timer, poll_catas, 0);
>  	priv->catas_err.map = NULL;
>
>  	if (!mlx4_is_slave(dev)) {
> @@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
>  		}
>  	}
>
> -	priv->catas_err.timer.data     = (unsigned long) dev;
> -	priv->catas_err.timer.function = poll_catas;
>  	priv->catas_err.timer.expires  =
>  		round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
>  	add_timer(&priv->catas_err.timer);
> --
> 2.7.4
>
>
> --
> Kees Cook
> Pixel Security
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH 3/3] ARM: dts: gr-peach: Add ETHER pin group
From: jacopo mondi @ 2017-10-05  9:39 UTC (permalink / raw)
  To: Geert Uytterhoeven; +Cc: Chris Brandt, andrew, f.fainelli, netdev
In-Reply-To: <CAMuHMdWLt9EYp21YiMp7uEGWgjEO_VztbDOyvhr+RxkrAKRYYA@mail.gmail.com>

Hi Geert

On Thu, Oct 05, 2017 at 11:09:40AM +0200, Geert Uytterhoeven wrote:
> Hi Jacopo,
>
> On Thu, Oct 5, 2017 at 10:58 AM, Jacopo Mondi <jacopo+renesas@jmondi.org> wrote:
> > Add pin configuration subnode for ETHER pin group and enable the interface.
> >
> > Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
>
> Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
>
> > --- a/arch/arm/boot/dts/r7s72100-gr-peach.dts
> > +++ b/arch/arm/boot/dts/r7s72100-gr-peach.dts
>
> > @@ -88,3 +110,19 @@
> >
> >         status = "okay";
> >  };
> > +
> > +&ether {
> > +       pinctrl-names = "default";
> > +       pinctrl-0 = <&ether_pins>;
> > +
> > +       status = "okay";
> > +
> > +       reset-gpios = <&port4 2 GPIO_ACTIVE_LOW>;
> > +       reset-delay-us = <5>;
>
> I'm afraid the PHY people (not CCed ;-) will want you to move these reset
> properties to the phy subnode these days, despite
> Documentation/devicetree/bindings/net/mdio.txt...

Extending to:
    +andrew@lunn.ch
    +f.fainelli@gmail.com
    +netdev@vger.kernel.org

To hear from them when and how they like to move those properties and
if we can apply this in this shape or not.

Otherwise, if you think it's the case, I can move them nonetheless.

Thanks
   j
>
> > +
> > +       renesas,no-ether-link;
> > +       phy-handle = <&phy0>;
> > +       phy0: ethernet-phy@0 {
> > +               reg = <0>;
> > +       };
> > +};
>
> Gr{oetje,eeting}s,
>
>                         Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
>                                 -- Linus Torvalds

^ permalink raw reply

* Re: [net-next V4 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP
From: Daniel Borkmann @ 2017-10-05  9:40 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, netdev
  Cc: jakub.kicinski, Michael S. Tsirkin, pavel.odintsov, Jason Wang,
	mchan, John Fastabend, peter.waskiewicz.jr, Daniel Borkmann,
	Alexei Starovoitov, Andy Gospodarek
In-Reply-To: <150711862505.9499.15042356194495111353.stgit@firesoul>

On 10/04/2017 02:03 PM, Jesper Dangaard Brouer wrote:
[...]
> +#define CPU_MAP_BULK_SIZE 8  /* 8 == one cacheline on 64-bit archs */
> +struct xdp_bulk_queue {
> +	void *q[CPU_MAP_BULK_SIZE];
> +	unsigned int count;
> +};
> +
> +/* Struct for every remote "destination" CPU in map */
> +struct bpf_cpu_map_entry {
> +	u32 cpu;    /* kthread CPU and map index */
> +	int map_id; /* Back reference to map */

map_id is not used here if I read it correctly? We should
then remove it.

> +	u32 qsize;  /* Redundant queue size for map lookup */
> +
> +	/* XDP can run multiple RX-ring queues, need __percpu enqueue store */
> +	struct xdp_bulk_queue __percpu *bulkq;
> +
> +	/* Queue with potential multi-producers, and single-consumer kthread */
> +	struct ptr_ring *queue;
> +	struct task_struct *kthread;
> +	struct work_struct kthread_stop_wq;
> +
> +	atomic_t refcnt; /* Control when this struct can be free'ed */
> +	struct rcu_head rcu;
> +};
> +
> +struct bpf_cpu_map {
> +	struct bpf_map map;
> +	/* Below members specific for map type */
> +	struct bpf_cpu_map_entry **cpu_map;
> +	unsigned long __percpu *flush_needed;
> +};
> +
> +static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
> +			     struct xdp_bulk_queue *bq);

Could we avoid forward declaration?

> +static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
> +{
> +	return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
> +}
> +
> +static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
> +{
> +	struct bpf_cpu_map *cmap;
> +	u64 cost;
> +	int err;
> +
> +	/* check sanity of attributes */
> +	if (attr->max_entries == 0 || attr->key_size != 4 ||
> +	    attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
> +		return ERR_PTR(-EINVAL);
> +
> +	cmap = kzalloc(sizeof(*cmap), GFP_USER);
> +	if (!cmap)
> +		return ERR_PTR(-ENOMEM);
> +
> +	/* mandatory map attributes */
> +	cmap->map.map_type = attr->map_type;
> +	cmap->map.key_size = attr->key_size;
> +	cmap->map.value_size = attr->value_size;
> +	cmap->map.max_entries = attr->max_entries;
> +	cmap->map.map_flags = attr->map_flags;
> +	cmap->map.numa_node = bpf_map_attr_numa_node(attr);
> +
> +	/* Pre-limit array size based on NR_CPUS, not final CPU check */
> +	if (cmap->map.max_entries > NR_CPUS)
> +		return ERR_PTR(-E2BIG);
> +
> +	/* make sure page count doesn't overflow */
> +	cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
> +	cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
> +	if (cost >= U32_MAX - PAGE_SIZE)
> +		goto free_cmap;
> +	cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
> +
> +	/* if map size is larger than memlock limit, reject it early */
> +	err = bpf_map_precharge_memlock(cmap->map.pages);
> +	if (err)
> +		goto free_cmap;

Given this is almost the same as devmap and touches user land reporting,
we should probably go and do the same as in 582db7e0c4c2 ("bpf: devmap:
pass on return value of bpf_map_precharge_memlock").

> +	/* A per cpu bitfield with a bit per possible CPU in map  */
> +	cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
> +					    __alignof__(unsigned long));
> +	if (!cmap->flush_needed)
> +		goto free_cmap;
> +
> +	/* Alloc array for possible remote "destination" CPUs */
> +	cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
> +					   sizeof(struct bpf_cpu_map_entry *),
> +					   cmap->map.numa_node);
> +	if (!cmap->cpu_map)
> +		goto free_cmap;
> +
> +	return &cmap->map;
> +free_cmap:
> +	free_percpu(cmap->flush_needed);
> +	kfree(cmap);
> +	return ERR_PTR(-ENOMEM);
> +}
> +
> +void __cpu_map_queue_destructor(void *ptr)
> +{
> +	/* For now, just catch this as an error */
> +	if (!ptr)
> +		return;
> +	pr_err("ERROR: %s() cpu_map queue was not empty\n", __func__);

Can you elaborate on this "for now" condition? Is this a race
when kthread doesn't consume queue on thread exit, or should it
be impossible to trigger (should it then be replaced with a
'if (WARN_ON_ONCE(ptr)) page_frag_free(ptr)' and a more elaborate
comment)?

> +	page_frag_free(ptr);
> +}
> +
> +static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
> +{
> +	if (atomic_dec_and_test(&rcpu->refcnt)) {
> +		/* The queue should be empty at this point */
> +		ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
> +		kfree(rcpu->queue);
> +		kfree(rcpu);
> +	}
> +}
> +
> +static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
> +{
> +	atomic_inc(&rcpu->refcnt);
> +}
> +
> +/* called from workqueue, to workaround syscall using preempt_disable */
> +static void cpu_map_kthread_stop(struct work_struct *work)
> +{
> +	struct bpf_cpu_map_entry *rcpu;
> +
> +	rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
> +	synchronize_rcu(); /* wait for flush in __cpu_map_entry_free() */
> +	kthread_stop(rcpu->kthread); /* calls put_cpu_map_entry */
> +}
> +
> +static int cpu_map_kthread_run(void *data)
> +{
> +	struct bpf_cpu_map_entry *rcpu = data;
> +
> +	set_current_state(TASK_INTERRUPTIBLE);
> +	while (!kthread_should_stop()) {
> +		struct xdp_pkt *xdp_pkt;
> +
> +		schedule();
> +		/* Do work */
> +		while ((xdp_pkt = ptr_ring_consume(rcpu->queue))) {
> +			/* For now just "refcnt-free" */
> +			page_frag_free(xdp_pkt);
> +		}
> +		__set_current_state(TASK_INTERRUPTIBLE);
> +	}
> +	put_cpu_map_entry(rcpu);
> +
> +	__set_current_state(TASK_RUNNING);
> +	return 0;
> +}
> +
> +struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id)
> +{
> +	gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
> +	struct bpf_cpu_map_entry *rcpu;
> +	int numa, err;
> +
> +	/* Have map->numa_node, but choose node of redirect target CPU */
> +	numa = cpu_to_node(cpu);
> +
> +	rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa);
> +	if (!rcpu)
> +		return NULL;
> +
> +	/* Alloc percpu bulkq */
> +	rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq),
> +					 sizeof(void *), gfp);
> +	if (!rcpu->bulkq)
> +		goto fail;
> +
> +	/* Alloc queue */
> +	rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa);
> +	if (!rcpu->queue)
> +		goto fail;
> +
> +	err = ptr_ring_init(rcpu->queue, qsize, gfp);
> +	if (err)
> +		goto fail;
> +	rcpu->qsize = qsize;
> +
> +	/* Setup kthread */
> +	rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
> +					       "cpumap/%d/map:%d", cpu, map_id);
> +	if (IS_ERR(rcpu->kthread))
> +		goto fail;

What about ptr_ring_cleanup() when we fail here?

> +
> +	/* Make sure kthread runs on a single CPU */
> +	kthread_bind(rcpu->kthread, cpu);
> +	wake_up_process(rcpu->kthread);
> +
> +	get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
> +	get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
> +
> +	return rcpu;
> +
> +fail:   /* Hint: free API detect NULL values */
> +	free_percpu(rcpu->bulkq);
> +	kfree(rcpu->queue);
> +	kfree(rcpu);
> +	return NULL;
> +}
> +
> +void __cpu_map_entry_free(struct rcu_head *rcu)
> +{
> +	struct bpf_cpu_map_entry *rcpu;
> +	int cpu;
> +
> +	/* This cpu_map_entry have been disconnected from map and one
> +	 * RCU graze-period have elapsed.  Thus, XDP cannot queue any
> +	 * new packets and cannot change/set flush_needed that can
> +	 * find this entry.
> +	 */
> +	rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu);
> +
> +	/* Flush remaining packets in percpu bulkq */
> +	for_each_online_cpu(cpu) {
> +		struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
> +
> +		/* No concurrent bq_enqueue can run at this point */
> +		bq_flush_to_queue(rcpu, bq);
> +	}
> +	free_percpu(rcpu->bulkq);
> +	/* Cannot kthread_stop() here, last put free rcpu resources */
> +	put_cpu_map_entry(rcpu);
> +}
> +
> +/* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to
> + * ensure any driver rcu critical sections have completed, but this
> + * does not guarantee a flush has happened yet. Because driver side
> + * rcu_read_lock/unlock only protects the running XDP program.  The
> + * atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a
> + * pending flush op doesn't fail.
> + *
> + * The bpf_cpu_map_entry is still used by the kthread, and there can
> + * still be pending packets (in queue and percpu bulkq).  A refcnt
> + * makes sure to last user (kthread_stop vs. call_rcu) free memory
> + * resources.
> + *
> + * The rcu callback __cpu_map_entry_free flush remaining packets in
> + * percpu bulkq to queue.  Due to caller map_delete_elem() disable
> + * preemption, cannot call kthread_stop() to make sure queue is empty.
> + * Instead a work_queue is started for stopping kthread,
> + * cpu_map_kthread_stop, which waits for an RCU graze period before
> + * stopping kthread, emptying the queue.
> + */
> +void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
> +			     u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
> +{
> +	struct bpf_cpu_map_entry *old_rcpu;
> +
> +	old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu);
> +	if (old_rcpu) {
> +		call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
> +		INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
> +		schedule_work(&old_rcpu->kthread_stop_wq);
> +	}
> +}
> +
> +int cpu_map_delete_elem(struct bpf_map *map, void *key)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	u32 key_cpu = *(u32 *)key;
> +
> +	if (key_cpu >= map->max_entries)
> +		return -EINVAL;
> +
> +	/* notice caller map_delete_elem() use preempt_disable() */
> +	__cpu_map_entry_replace(cmap, key_cpu, NULL);
> +	return 0;
> +}
> +
> +int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
> +				u64 map_flags)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	struct bpf_cpu_map_entry *rcpu;
> +
> +	/* Array index key correspond to CPU number */
> +	u32 key_cpu = *(u32 *)key;
> +	/* Value is the queue size */
> +	u32 qsize = *(u32 *)value;
> +
> +	/* Make sure CPU is a valid possible cpu */
> +	if (!cpu_possible(key_cpu))
> +		return -ENODEV;
> +
> +	if (unlikely(map_flags > BPF_EXIST))
> +		return -EINVAL;
> +	if (unlikely(key_cpu >= cmap->map.max_entries))
> +		return -E2BIG;
> +	if (unlikely(map_flags == BPF_NOEXIST))
> +		return -EEXIST;
> +	if (unlikely(qsize > 16384)) /* sanity limit on qsize */
> +		return -EOVERFLOW;
> +
> +	if (qsize == 0) {
> +		rcpu = NULL; /* Same as deleting */
> +	} else {
> +		/* Updating qsize cause re-allocation of bpf_cpu_map_entry */
> +		rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
> +		if (!rcpu)
> +			return -ENOMEM;
> +	}
> +	rcu_read_lock();
> +	__cpu_map_entry_replace(cmap, key_cpu, rcpu);
> +	rcu_read_unlock();
> +	return 0;

You need to update verifier such that this function cannot be called
out of an BPF program, otherwise it would be possible under full RCU
read context, which is explicitly avoided here and also it would otherwise
be allowed for other maps of different type as well, which needs to
be avoided.

> +}
> +
> +void cpu_map_free(struct bpf_map *map)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	int cpu;
> +	u32 i;
> +
> +	/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
> +	 * so the bpf programs (can be more than one that used this map) were
> +	 * disconnected from events. Wait for outstanding critical sections in
> +	 * these programs to complete. The rcu critical section only guarantees
> +	 * no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map.
> +	 * It does __not__ ensure pending flush operations (if any) are
> +	 * complete.
> +	 */
> +	synchronize_rcu();
> +
> +	/* To ensure all pending flush operations have completed wait for flush
> +	 * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
> +	 * Because the above synchronize_rcu() ensures the map is disconnected
> +	 * from the program we can assume no new bits will be set.
> +	 */
> +	for_each_online_cpu(cpu) {
> +		unsigned long *bitmap = per_cpu_ptr(cmap->flush_needed, cpu);
> +
> +		while (!bitmap_empty(bitmap, cmap->map.max_entries))
> +			cond_resched();
> +	}
> +
> +	/* For cpu_map the remote CPUs can still be using the entries
> +	 * (struct bpf_cpu_map_entry).
> +	 */
> +	for (i = 0; i < cmap->map.max_entries; i++) {
> +		struct bpf_cpu_map_entry *rcpu;
> +
> +		rcpu = READ_ONCE(cmap->cpu_map[i]);
> +		if (!rcpu)
> +			continue;
> +
> +		/* bq flush and cleanup happens after RCU graze-period */
> +		__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
> +	}
> +	free_percpu(cmap->flush_needed);
> +	bpf_map_area_free(cmap->cpu_map);
> +	kfree(cmap);
> +}
> +
> +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	struct bpf_cpu_map_entry *rcpu;
> +
> +	if (key >= map->max_entries)
> +		return NULL;
> +
> +	rcpu = READ_ONCE(cmap->cpu_map[key]);
> +	return rcpu;
> +}
> +
> +static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> +	struct bpf_cpu_map_entry *rcpu =
> +		__cpu_map_lookup_elem(map, *(u32 *)key);
> +
> +	return rcpu ? &rcpu->qsize : NULL;

The qsize doesn't seem used anywhere else besides here, but you
probably should update verifier such that this cannot be called
out of the BPF program, which could then mangle qsize value.

> +}
> +
> +static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	u32 index = key ? *(u32 *)key : U32_MAX;
> +	u32 *next = next_key;
> +
> +	if (index >= cmap->map.max_entries) {
> +		*next = 0;
> +		return 0;
> +	}
> +
> +	if (index == cmap->map.max_entries - 1)
> +		return -ENOENT;
> +	*next = index + 1;
> +	return 0;
> +}
> +
> +const struct bpf_map_ops cpu_map_ops = {
> +	.map_alloc		= cpu_map_alloc,
> +	.map_free		= cpu_map_free,
> +	.map_delete_elem	= cpu_map_delete_elem,
> +	.map_update_elem	= cpu_map_update_elem,
> +	.map_lookup_elem	= cpu_map_lookup_elem,
> +	.map_get_next_key	= cpu_map_get_next_key,
> +};
> +
> +static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
> +			     struct xdp_bulk_queue *bq)
> +{
> +	struct ptr_ring *q;
> +	int i;
> +
> +	if (unlikely(!bq->count))
> +		return 0;
> +
> +	q = rcpu->queue;
> +	spin_lock(&q->producer_lock);
> +
> +	for (i = 0; i < bq->count; i++) {
> +		void *xdp_pkt = bq->q[i];
> +		int err;
> +
> +		err = __ptr_ring_produce(q, xdp_pkt);
> +		if (err) {
> +			/* Free xdp_pkt */
> +			page_frag_free(xdp_pkt);
> +		}
> +	}
> +	bq->count = 0;
> +	spin_unlock(&q->producer_lock);
> +
> +	return 0;
> +}
> +
> +/* Notice: Will change in later patch */
> +struct xdp_pkt {
> +	void *data;
> +	u16 len;
> +	u16 headroom;
> +};
> +
> +/* Runs under RCU-read-side, plus in softirq under NAPI protection.
> + * Thus, safe percpu variable access.
> + */
> +static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
> +{
> +	struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
> +
> +	if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
> +		bq_flush_to_queue(rcpu, bq);
> +
> +	/* Notice, xdp_buff/page MUST be queued here, long enough for
> +	 * driver to code invoking us to finished, due to driver
> +	 * (e.g. ixgbe) recycle tricks based on page-refcnt.
> +	 *
> +	 * Thus, incoming xdp_pkt is always queued here (else we race
> +	 * with another CPU on page-refcnt and remaining driver code).
> +	 * Queue time is very short, as driver will invoke flush
> +	 * operation, when completing napi->poll call.
> +	 */
> +	bq->q[bq->count++] = xdp_pkt;
> +	return 0;
> +}
> +
> +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)
> +{
> +	struct xdp_pkt *xdp_pkt;
> +	int headroom;
> +
> +	/* Convert xdp_buff to xdp_pkt */
> +	headroom = xdp->data - xdp->data_hard_start;
> +	if (headroom < sizeof(*xdp_pkt))
> +		return -EOVERFLOW;
> +	xdp_pkt = xdp->data_hard_start;
> +	xdp_pkt->data = xdp->data;
> +	xdp_pkt->len  = xdp->data_end - xdp->data;
> +	xdp_pkt->headroom = headroom;
> +	/* For now this is just used as a void pointer to data_hard_start */
> +
> +	bq_enqueue(rcpu, xdp_pkt);
> +	return 0;
> +}
> +
> +void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);
> +
> +	__set_bit(bit, bitmap);
> +}
> +
> +void __cpu_map_flush(struct bpf_map *map)
> +{
> +	struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
> +	unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);
> +	u32 bit;
> +
> +	/* The napi->poll softirq makes sure __cpu_map_insert_ctx()
> +	 * and __cpu_map_flush() happen on same CPU. Thus, the percpu
> +	 * bitmap indicate which percpu bulkq have packets.
> +	 */
> +	for_each_set_bit(bit, bitmap, map->max_entries) {
> +		struct bpf_cpu_map_entry *rcpu = READ_ONCE(cmap->cpu_map[bit]);
> +		struct xdp_bulk_queue *bq;
> +
> +		/* This is possible if entry is removed by user space
> +		 * between xdp redirect and flush op.
> +		 */
> +		if (unlikely(!rcpu))
> +			continue;
> +
> +		__clear_bit(bit, bitmap);
> +
> +		/* Flush all frames in bulkq to real queue */
> +		bq = this_cpu_ptr(rcpu->bulkq);
> +		bq_flush_to_queue(rcpu, bq);
> +
> +		/* If already running, costs spin_lock_irqsave + smb_mb */
> +		wake_up_process(rcpu->kthread);
> +	}
> +}
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index b927da66f653..641bdb0df020 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -593,6 +593,12 @@ static int map_update_elem(union bpf_attr *attr)
>   	if (copy_from_user(value, uvalue, value_size) != 0)
>   		goto free_value;
>
> +	/* Need to create a kthread, thus must support schedule */
> +	if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
> +		err = map->ops->map_update_elem(map, key, value, attr->flags);
> +		goto out;
> +	}
> +
>   	/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
>   	 * inside bpf map update or delete otherwise deadlocks are possible
>   	 */
> @@ -623,7 +629,7 @@ static int map_update_elem(union bpf_attr *attr)
>   	}
>   	__this_cpu_dec(bpf_prog_active);
>   	preempt_enable();
> -
> +out:
>   	if (!err)
>   		trace_bpf_map_update_elem(map, ufd, key, value);
>   free_value:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 6d2137b4cf38..03f8e2827a95 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -111,6 +111,7 @@ enum bpf_map_type {
>   	BPF_MAP_TYPE_HASH_OF_MAPS,
>   	BPF_MAP_TYPE_DEVMAP,
>   	BPF_MAP_TYPE_SOCKMAP,
> +	BPF_MAP_TYPE_CPUMAP,
>   };
>
>   enum bpf_prog_type {
>

^ permalink raw reply

* [PATCH v3 2/2] net: phonet: mark phonet_protocol as const
From: Lin Zhang @ 2017-10-05 17:40 UTC (permalink / raw)
  To: courmisch, davem; +Cc: netdev, Lin Zhang

The phonet_protocol structs don't need to be written by anyone and
so can be marked as const.

Signed-off-by: Lin Zhang <xiaolou4617@gmail.com>
---
changelog:

v1 -> v2:
	* remove __read_mostly tag
v2 -> v3:
	* revert __read_mostly tag
---
 include/net/phonet/phonet.h |  6 ++++--
 net/phonet/af_phonet.c      | 15 ++++++++-------
 net/phonet/datagram.c       |  2 +-
 net/phonet/pep.c            |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 039cc29..51e1a2a 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -108,8 +108,10 @@ struct phonet_protocol {
 	int			sock_type;
 };
 
-int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp);
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp);
+int phonet_proto_register(unsigned int protocol,
+		const struct phonet_protocol *pp);
+void phonet_proto_unregister(unsigned int protocol,
+		const struct phonet_protocol *pp);
 
 int phonet_sysctl_init(void);
 void phonet_sysctl_exit(void);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index b12142e..3b0ef69 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
 #include <net/phonet/pn_dev.h>
 
 /* Transport protocol registration */
-static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
 
-static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
+static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
 {
-	struct phonet_protocol *pp;
+	const struct phonet_protocol *pp;
 
 	if (protocol >= PHONET_NPROTO)
 		return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
 	return pp;
 }
 
-static inline void phonet_proto_put(struct phonet_protocol *pp)
+static inline void phonet_proto_put(const struct phonet_protocol *pp)
 {
 	module_put(pp->prot->owner);
 }
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
 {
 	struct sock *sk;
 	struct pn_sock *pn;
-	struct phonet_protocol *pnp;
+	const struct phonet_protocol *pnp;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -470,7 +470,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
 static DEFINE_MUTEX(proto_tab_lock);
 
 int __init_or_module phonet_proto_register(unsigned int protocol,
-						struct phonet_protocol *pp)
+				const struct phonet_protocol *pp)
 {
 	int err = 0;
 
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
 }
 EXPORT_SYMBOL(phonet_proto_register);
 
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol,
+			const struct phonet_protocol *pp)
 {
 	mutex_lock(&proto_tab_lock);
 	BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e71043..b44fb90 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 	.name		= "PHONET",
 };
 
-static struct phonet_protocol pn_dgram_proto = {
+static const struct phonet_protocol pn_dgram_proto = {
 	.ops		= &phonet_dgram_ops,
 	.prot		= &pn_proto,
 	.sock_type	= SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e815379..9fc76b1 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static void pep_sock_unhash(struct sock *sk)
 	.name		= "PNPIPE",
 };
 
-static struct phonet_protocol pep_pn_proto = {
+static const struct phonet_protocol pep_pn_proto = {
 	.ops		= &phonet_stream_ops,
 	.prot		= &pep_proto,
 	.sock_type	= SOCK_SEQPACKET,
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH net] netfilter: x_tables: avoid stack-out-of-bounds read in xt_copy_counters_from_user
From: Eric Dumazet @ 2017-10-05  9:50 UTC (permalink / raw)
  To: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal
  Cc: netfilter-devel, netdev, Willem de Bruijn, netdev

From: Eric Dumazet <edumazet@google.com>

syzkaller reports an out of bound read in strlcpy(), triggered
by xt_copy_counters_from_user()

Fix this by using memcpy(), then forcing a zero byte at the last position
of the destination, as Florian did for the non COMPAT code.

Fixes: d7591f0c41ce ("netfilter: x_tables: introduce and use xt_copy_counters_from_user")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
---
 net/netfilter/x_tables.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c83a3b5e1c6c2a91b713b6681a794bd79ab3fa08..d8571f4142080a3c121fc90f0b52d81ee9df6712 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 		if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
 			return ERR_PTR(-EFAULT);
 
-		strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
 		info->num_counters = compat_tmp.num_counters;
 		user += sizeof(compat_tmp);
 	} else
@@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
 		if (copy_from_user(info, user, sizeof(*info)) != 0)
 			return ERR_PTR(-EFAULT);
 
-		info->name[sizeof(info->name) - 1] = '\0';
 		user += sizeof(*info);
 	}
+	info->name[sizeof(info->name) - 1] = '\0';
 
 	size = sizeof(struct xt_counters);
 	size *= info->num_counters;



^ permalink raw reply related

* Re: [PATCH net] netfilter: x_tables: avoid stack-out-of-bounds read in xt_copy_counters_from_user
From: Florian Westphal @ 2017-10-05  9:56 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Pablo Neira Ayuso, Jozsef Kadlecsik, Florian Westphal,
	netfilter-devel, netdev, Willem de Bruijn
In-Reply-To: <1507197007.14419.15.camel@edumazet-glaptop3.roam.corp.google.com>

Eric Dumazet <eric.dumazet@gmail.com> wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> syzkaller reports an out of bound read in strlcpy(), triggered
> by xt_copy_counters_from_user()
> 
> Fix this by using memcpy(), then forcing a zero byte at the last position
> of the destination, as Florian did for the non COMPAT code.
> 
> Fixes: d7591f0c41ce ("netfilter: x_tables: introduce and use xt_copy_counters_from_user")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Willem de Bruijn <willemb@google.com>
> ---
>  net/netfilter/x_tables.c |    4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
> index c83a3b5e1c6c2a91b713b6681a794bd79ab3fa08..d8571f4142080a3c121fc90f0b52d81ee9df6712 100644
> --- a/net/netfilter/x_tables.c
> +++ b/net/netfilter/x_tables.c
> @@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
>  		if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
>  			return ERR_PTR(-EFAULT);
>  
> -		strlcpy(info->name, compat_tmp.name, sizeof(info->name));
> +		memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);

Argh, right, compat_tmp.name might not be 0 terminated :-/

Acked-by: Florian Westphal <fw@strlen.de>

^ permalink raw reply

* [PATCH] net: ipv6: remove unused code in ipv6_find_hdr()
From: Lin Zhang @ 2017-10-05 18:07 UTC (permalink / raw)
  To: kuznet, davem, yoshfuji; +Cc: netdev, Lin Zhang

Storing the left length of skb into 'len' actually has no effect 
so we can remove it.

Signed-off-by: Lin Zhang <xiaolou4617@gmail.com>
---
 net/ipv6/exthdrs_core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed..98096ea 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 {
 	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
 	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-	unsigned int len;
 	bool found;
 
 	if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 		start = *offset + sizeof(struct ipv6hdr);
 		nexthdr = ip6->nexthdr;
 	}
-	len = skb->len - start;
 
 	do {
 		struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 
 		if (!found) {
 			nexthdr = hp->nexthdr;
-			len -= hdrlen;
 			start += hdrlen;
 		}
 	} while (!found);
-- 
1.8.3.1

^ permalink raw reply related

* Re: [net-next V4 PATCH 2/5] bpf: XDP_REDIRECT enable use of cpumap
From: Daniel Borkmann @ 2017-10-05 10:10 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, netdev
  Cc: jakub.kicinski, Michael S. Tsirkin, pavel.odintsov, Jason Wang,
	mchan, John Fastabend, peter.waskiewicz.jr, Daniel Borkmann,
	Alexei Starovoitov, Andy Gospodarek
In-Reply-To: <150711863012.9499.383645968070658124.stgit@firesoul>

On 10/04/2017 02:03 PM, Jesper Dangaard Brouer wrote:
> This patch connects cpumap to the xdp_do_redirect_map infrastructure.
>
> Still no SKB allocation are done yet.  The XDP frames are transferred
> to the other CPU, but they are simply refcnt decremented on the remote
> CPU.  This served as a good benchmark for measuring the overhead of
> remote refcnt decrement.  If driver page recycle cache is not
> efficient then this, exposes a bottleneck in the page allocator.
>
> A shout-out to MST's ptr_ring, which is the secret behind is being so
> efficient to transfer memory pointers between CPUs, without constantly
> bouncing cache-lines between CPUs.
>
> V3: Handle !CONFIG_BPF_SYSCALL pointed out by kbuild test robot.
>
> V4: Make Generic-XDP aware of cpumap type, but don't allow redirect yet,
>   as implementation require a separate upstream discussion.
>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
[...]
> diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
> index ae8e29352261..4926a9971f90 100644
> --- a/kernel/bpf/cpumap.c
> +++ b/kernel/bpf/cpumap.c
> @@ -493,7 +493,8 @@ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
>   	return 0;
>   }
>
> -int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)
> +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
> +		    struct net_device *dev_rx)
>   {
>   	struct xdp_pkt *xdp_pkt;
>   	int headroom;
> @@ -505,7 +506,7 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp)
>   	xdp_pkt = xdp->data_hard_start;
>   	xdp_pkt->data = xdp->data;
>   	xdp_pkt->len  = xdp->data_end - xdp->data;
> -	xdp_pkt->headroom = headroom;
> +	xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);

(Just a note, bit confusing that first two patches add and extend
  this, and only in the third you add the xdp->data_meta handling,
  makes it harder to review at least.)

[...]
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 9b6e7e84aafd..dbf2ae071108 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2521,10 +2521,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
>   	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
>   	if (err)
>   		return err;
> -	if (map)
> +	dev->netdev_ops->ndo_xdp_flush(dev);
> +	return 0;
> +}
> +
> +static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
> +			    struct bpf_map *map,
> +			    struct xdp_buff *xdp,
> +			    u32 index)
> +{
> +	int err;
> +
> +	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
> +		struct net_device *dev = fwd;
> +
> +		if (!dev->netdev_ops->ndo_xdp_xmit)
> +			return -EOPNOTSUPP;
> +
> +		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
> +		if (err)
> +			return err;
>   		__dev_map_insert_ctx(map, index);
> -	else
> -		dev->netdev_ops->ndo_xdp_flush(dev);
> +
> +	} else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
> +		struct bpf_cpu_map_entry *rcpu = fwd;
> +
> +		err = cpu_map_enqueue(rcpu, xdp, dev_rx);
> +		if (err)
> +			return err;
> +		__cpu_map_insert_ctx(map, index);
> +	}
>   	return 0;
>   }
>
> @@ -2534,11 +2560,33 @@ void xdp_do_flush_map(void)
>   	struct bpf_map *map = ri->map_to_flush;
>
>   	ri->map_to_flush = NULL;
> -	if (map)
> -		__dev_map_flush(map);
> +	if (map) {
> +		switch (map->map_type) {
> +		case BPF_MAP_TYPE_DEVMAP:
> +			__dev_map_flush(map);
> +			break;
> +		case BPF_MAP_TYPE_CPUMAP:
> +			__cpu_map_flush(map);
> +			break;
> +		default:
> +			break;
> +		}
> +	}
>   }
>   EXPORT_SYMBOL_GPL(xdp_do_flush_map);
>
> +static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
> +{
> +	switch (map->map_type) {
> +	case BPF_MAP_TYPE_DEVMAP:
> +		return __dev_map_lookup_elem(map, index);
> +	case BPF_MAP_TYPE_CPUMAP:
> +		return __cpu_map_lookup_elem(map, index);
> +	default:
> +		return NULL;
> +	}

Should we just have a callback and instead of the above use
map->ptr_lookup_elem() (or however we name it) ... lot of it
is pretty much the same logic as with devmap.

> +}
> +
>   static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
>   				   unsigned long aux)
>   {
> @@ -2551,8 +2599,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
>   	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
>   	unsigned long map_owner = ri->map_owner;
>   	struct bpf_map *map = ri->map;
> -	struct net_device *fwd = NULL;
>   	u32 index = ri->ifindex;
> +	void *fwd = NULL;
>   	int err;
>
>   	ri->ifindex = 0;

^ permalink raw reply

* Re: [PATCH V2] Fix a sleep-in-atomic bug in shash_setkey_unaligned
From: Herbert Xu @ 2017-10-05 10:16 UTC (permalink / raw)
  To: David Miller
  Cc: marcelo.leitner, luto, baijiaju1990, nhorman, vyasevich, kvalo,
	linux-crypto, netdev, linux-sctp, linux-wireless
In-Reply-To: <20171004.213758.2210486785503998906.davem@davemloft.net>

On Wed, Oct 04, 2017 at 09:37:58PM -0700, David Miller wrote:
>
> > I'm not talking about the code-path in question.  I'm talking
> > about the function which generates the secret key in the first
> > place.  AFAICS that's only called in GFP_KERNEL context.  What
> > am I missing?
> 
> The setkey happens in functions like sctp_pack_cookie() and
> sctp_unpack_cookie(), which seems to run from software interrupts.

That was my point.  Functions like sctp_pack_cookie shouldn't be
setting the key in the first place.  The setkey should happen at
the point when the key is generated.  That's sctp_endpoint_init
which AFAICS only gets called in GFP_KERNEL context.

Or is there a code-path where sctp_endpoint_init is called in
softirq context?

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH net] net: enable interface alias removal via rtnl
From: Nicolas Dichtel @ 2017-10-05 10:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, Nicolas Dichtel, Oliver Hartkopp, Stephen Hemminger

IFLA_IFALIAS is defined as NLA_STRING. It means that the minimal length of
the attribute is 1 ("\0"). However, to remove an alias, the attribute
length must be 0 (see dev_set_alias()).

Let's define the type to NLA_BINARY, so that the alias can be removed.

Example:
$ ip l s dummy0 alias foo
$ ip l l dev dummy0
5: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether ae:20:30:4f:a7:f3 brd ff:ff:ff:ff:ff:ff
    alias foo

Before the patch:
$ ip l s dummy0 alias ""
RTNETLINK answers: Numerical result out of range

After the patch:
$ ip l s dummy0 alias ""
$ ip l l dev dummy0
5: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether ae:20:30:4f:a7:f3 brd ff:ff:ff:ff:ff:ff

CC: Oliver Hartkopp <oliver@hartkopp.net>
CC: Stephen Hemminger <stephen@networkplumber.org>
Fixes: 96ca4a2cc145 ("net: remove ifalias on empty given alias")
Reported-by: Julien FLoret <julien.floret@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d4bcdcc68e92..570092cee902 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1483,7 +1483,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_NET_NS_FD]	= { .type = NLA_U32 },
-	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
+	[IFLA_IFALIAS]	        = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
 	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
-- 
2.13.2

^ permalink raw reply related

* Re: [net-next V4 PATCH 3/5] bpf: cpumap xdp_buff to skb conversion and allocation
From: Daniel Borkmann @ 2017-10-05 10:22 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, netdev
  Cc: jakub.kicinski, Michael S. Tsirkin, pavel.odintsov, Jason Wang,
	mchan, John Fastabend, peter.waskiewicz.jr, Daniel Borkmann,
	Alexei Starovoitov, Andy Gospodarek
In-Reply-To: <150711863521.9499.3702385818650624585.stgit@firesoul>

On 10/04/2017 02:03 PM, Jesper Dangaard Brouer wrote:
[...]
>   static int cpu_map_kthread_run(void *data)
>   {
>   	struct bpf_cpu_map_entry *rcpu = data;
>
>   	set_current_state(TASK_INTERRUPTIBLE);
>   	while (!kthread_should_stop()) {
> +		unsigned int processed = 0, drops = 0;
>   		struct xdp_pkt *xdp_pkt;
>
> -		schedule();
> -		/* Do work */
> -		while ((xdp_pkt = ptr_ring_consume(rcpu->queue))) {
> -			/* For now just "refcnt-free" */
> -			page_frag_free(xdp_pkt);
> +		/* Release CPU reschedule checks */
> +		if (__ptr_ring_empty(rcpu->queue)) {
> +			schedule();
> +		} else {
> +			cond_resched();
> +		}
> +
> +		/* Process packets in rcpu->queue */
> +		local_bh_disable();
> +		/*
> +		 * The bpf_cpu_map_entry is single consumer, with this
> +		 * kthread CPU pinned. Lockless access to ptr_ring
> +		 * consume side valid as no-resize allowed of queue.
> +		 */
> +		while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
> +			struct sk_buff *skb;
> +			int ret;
> +
> +			skb = cpu_map_build_skb(rcpu, xdp_pkt);
> +			if (!skb) {
> +				page_frag_free(xdp_pkt);
> +				continue;
> +			}
> +
> +			/* Inject into network stack */
> +			ret = netif_receive_skb_core(skb);

Don't we need to hold RCU read lock for above netif_receive_skb_core()?

> +			if (ret == NET_RX_DROP)
> +				drops++;
> +
> +			/* Limit BH-disable period */
> +			if (++processed == 8)
> +				break;
>   		}
> +		local_bh_enable(); /* resched point, may call do_softirq() */
> +
>   		__set_current_state(TASK_INTERRUPTIBLE);
>   	}
>   	put_cpu_map_entry(rcpu);
> @@ -463,13 +582,6 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,

^ permalink raw reply

* [patch net-next 0/6] mlxsw: Offload bridge device mrouter
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa

From: Jiri Pirko <jiri@mellanox.com>

Yotam says:

Similarly to a bridged port, the bridge device itself can be configured by
the user to be an mrouter port. In this case, all multicast traffic should
be forwarded to it. Make the mlxsw Spectrum driver offload these directives
to the Spectrum hardware.

Patches 1-3 add a new switchdev notification for bridge device mrouter
port status and make the bridge module notify about it.

Patches 4-6 change the mlxsw Spectrum driver to handle these notifications
by adding the Spectrum router port to the bridge MDB entries.

Yotam Gigi (6):
  net: bridge: Use the MDB_RTR_TYPE_TEMP on bridge device too
  net: bridge: Notify on bridge device mrouter state changes
  net: bridge: Export bridge multicast router state
  mlxsw: spectrum: router: Export the mlxsw_sp_router_port function
  mlxsw: spectrum_switchdev: Add support for router port in SMID entries
  mlxsw: spectrum_switchdev: Support bridge mrouter notifications

 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  2 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_router.h  |  1 +
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 76 +++++++++++++++++++++-
 include/linux/if_bridge.h                          |  5 ++
 include/net/switchdev.h                            |  1 +
 net/bridge/br_multicast.c                          | 56 ++++++++++++++--
 net/bridge/br_netlink.c                            |  3 +-
 net/bridge/br_private.h                            | 13 +++-
 net/bridge/br_sysfs_br.c                           |  3 +-
 9 files changed, 147 insertions(+), 13 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [patch net-next 1/6] net: bridge: Use the MDB_RTR_TYPE_TEMP on bridge device too
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa
In-Reply-To: <20171005103642.1414-1-jiri@resnulli.us>

From: Yotam Gigi <yotamg@mellanox.com>

Every bridge port is in one of four mcast router port states:
 - MDB_RTR_TYPE_PERM - the port is set by the user to be an mrouter port
   regardless of IGMP queries.
 - MDB_RTR_TYPE_DISABLED - the port is set by the user to not be an mrouter
   port regardless of IGMP queries.
 - MDB_RTR_TYPE_TEMP - the port is set by the user to be in mcast router
   learning state, but currently it is not an mrouter port as no IGMP query
   has been received by it for the last multicast_querier_interval.
 - MDB_RTR_TYPE_TEMP_QUERY - the port is set by the user to be in mcast
   router learning state, and currently it is an mrouter port due to an
   IGMP query that has been received by it during the passed
   multicast_querier_interval.

The bridge device (brX) itself can also be configured by the user to be
either fixed, disabled or learning mrouter port states, but currently there
is no distinction between the MDB_RTR_TYPE_TEMP_QUERY and MDB_RTR_TYPE_TEMP
in the bridge internal state. Due to that, when an IGMP query is received,
it is not straightforward to tell whether it changes the bridge device
mrouter port status or not.

Further patches in this patch-set will introduce notifications upon the
bridge device mrouter port state. In order to prevent resending bridge
mrouter notification when it is not needed, such distinction is necessary.

Hence, add the distinction between MDB_RTR_TYPE_TEMP and
MDB_RTR_TYPE_TEMP_QUERY states for the bridge device, similarly to any
other bridge port.

In order to not break the current kernel-user API, don't propagate the new
state to the user and use it only in the bridge internal state. Thus, if
the user reads (either via sysfs or netlink) the bridge device mrouter
state, he will get the MDB_RTR_TYPE_TEMP_QUERY state even if the current
bridge state is MDB_RTR_TYPE_TEMP.

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 net/bridge/br_multicast.c | 25 +++++++++++++++++++++----
 net/bridge/br_netlink.c   |  3 ++-
 net/bridge/br_private.h   | 13 ++++++++++---
 net/bridge/br_sysfs_br.c  |  3 ++-
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d..b86307b 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -861,6 +861,17 @@ static void br_multicast_router_expired(unsigned long data)
 
 static void br_multicast_local_router_expired(unsigned long data)
 {
+	struct net_bridge *br = (struct net_bridge *) data;
+
+	spin_lock(&br->multicast_lock);
+	if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
+	    br->multicast_router == MDB_RTR_TYPE_PERM ||
+	    timer_pending(&br->multicast_router_timer))
+		goto out;
+
+	br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+out:
+	spin_unlock(&br->multicast_lock);
 }
 
 static void br_multicast_querier_expired(struct net_bridge *br,
@@ -1364,9 +1375,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
 	unsigned long now = jiffies;
 
 	if (!port) {
-		if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
+		if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY ||
+		    br->multicast_router == MDB_RTR_TYPE_TEMP) {
 			mod_timer(&br->multicast_router_timer,
 				  now + br->multicast_querier_interval);
+			br->multicast_router = MDB_RTR_TYPE_TEMP;
+		}
 		return;
 	}
 
@@ -1952,7 +1966,7 @@ void br_multicast_init(struct net_bridge *br)
 
 	spin_lock_init(&br->multicast_lock);
 	setup_timer(&br->multicast_router_timer,
-		    br_multicast_local_router_expired, 0);
+		    br_multicast_local_router_expired, (unsigned long)br);
 	setup_timer(&br->ip4_other_query.timer,
 		    br_ip4_multicast_querier_expired, (unsigned long)br);
 	setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
@@ -2043,11 +2057,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 	case MDB_RTR_TYPE_DISABLED:
 	case MDB_RTR_TYPE_PERM:
 		del_timer(&br->multicast_router_timer);
-		/* fall through */
-	case MDB_RTR_TYPE_TEMP_QUERY:
 		br->multicast_router = val;
 		err = 0;
 		break;
+	case MDB_RTR_TYPE_TEMP_QUERY:
+		if (br->multicast_router != MDB_RTR_TYPE_TEMP)
+			br->multicast_router = val;
+		err = 0;
+		break;
 	}
 
 	spin_unlock_bh(&br->multicast_lock);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index dea88a2..cee5016 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1357,7 +1357,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 		return -EMSGSIZE;
 #endif
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-	if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+	if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER,
+		       br_multicast_router_translate(br->multicast_router)) ||
 	    nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, !br->multicast_disabled) ||
 	    nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
 		       br->multicast_query_use_ifaddr) ||
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index ab4df24..e6e3fec 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -649,9 +649,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
 
 static inline bool br_multicast_is_router(struct net_bridge *br)
 {
-	return br->multicast_router == 2 ||
-	       (br->multicast_router == 1 &&
-		timer_pending(&br->multicast_router_timer));
+	return br->multicast_router == MDB_RTR_TYPE_PERM ||
+	       br->multicast_router == MDB_RTR_TYPE_TEMP;
 }
 
 static inline bool
@@ -790,6 +789,14 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
 }
 #endif
 
+static inline unsigned char
+br_multicast_router_translate(unsigned char multicast_router)
+{
+	if (multicast_router == MDB_RTR_TYPE_TEMP)
+		return MDB_RTR_TYPE_TEMP_QUERY;
+	return multicast_router;
+}
+
 /* br_vlan.c */
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 bool br_allowed_ingress(const struct net_bridge *br,
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 723f25e..9b9c597 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -340,7 +340,8 @@ static ssize_t multicast_router_show(struct device *d,
 				     struct device_attribute *attr, char *buf)
 {
 	struct net_bridge *br = to_bridge(d);
-	return sprintf(buf, "%d\n", br->multicast_router);
+	return sprintf(buf, "%d\n",
+		       br_multicast_router_translate(br->multicast_router));
 }
 
 static ssize_t multicast_router_store(struct device *d,
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 2/6] net: bridge: Notify on bridge device mrouter state changes
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa
In-Reply-To: <20171005103642.1414-1-jiri@resnulli.us>

From: Yotam Gigi <yotamg@mellanox.com>

Add the SWITCHDEV_ATTR_ID_BRIDGE_MROUTER switchdev notification type, used
to indicate whether the bridge is or isn't mrouter. Notify when the bridge
changes its state, similarly to the already existing bridged port mrouter
notifications.

The notification uses the switchdev_attr.u.mrouter boolean flag to indicate
the current bridge mrouter status. Thus, it only indicates whether the
bridge is currently used as an mrouter or not, and does not indicate the
exact mrouter state of the bridge (learning, permanent, etc.).

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/switchdev.h   |  1 +
 net/bridge/br_multicast.c | 21 ++++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d767b79..d756fbe 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -51,6 +51,7 @@ enum switchdev_attr_id {
 	SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
 	SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
 	SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
+	SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
 };
 
 struct switchdev_attr {
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b86307b..4d4fcb5 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -859,6 +859,19 @@ static void br_multicast_router_expired(unsigned long data)
 	spin_unlock(&br->multicast_lock);
 }
 
+static void br_mc_router_state_change(struct net_bridge *p,
+				      bool is_mc_router)
+{
+	struct switchdev_attr attr = {
+		.orig_dev = p->dev,
+		.id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+		.flags = SWITCHDEV_F_DEFER,
+		.u.mrouter = is_mc_router,
+	};
+
+	switchdev_port_attr_set(p->dev, &attr);
+}
+
 static void br_multicast_local_router_expired(unsigned long data)
 {
 	struct net_bridge *br = (struct net_bridge *) data;
@@ -869,6 +882,7 @@ static void br_multicast_local_router_expired(unsigned long data)
 	    timer_pending(&br->multicast_router_timer))
 		goto out;
 
+	br_mc_router_state_change(br, false);
 	br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
 out:
 	spin_unlock(&br->multicast_lock);
@@ -1379,6 +1393,8 @@ static void br_multicast_mark_router(struct net_bridge *br,
 		    br->multicast_router == MDB_RTR_TYPE_TEMP) {
 			mod_timer(&br->multicast_router_timer,
 				  now + br->multicast_querier_interval);
+			if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
+				br_mc_router_state_change(br, true);
 			br->multicast_router = MDB_RTR_TYPE_TEMP;
 		}
 		return;
@@ -2056,13 +2072,16 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
 	switch (val) {
 	case MDB_RTR_TYPE_DISABLED:
 	case MDB_RTR_TYPE_PERM:
+		br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
 		del_timer(&br->multicast_router_timer);
 		br->multicast_router = val;
 		err = 0;
 		break;
 	case MDB_RTR_TYPE_TEMP_QUERY:
-		if (br->multicast_router != MDB_RTR_TYPE_TEMP)
+		if (br->multicast_router != MDB_RTR_TYPE_TEMP) {
+			br_mc_router_state_change(br, false);
 			br->multicast_router = val;
+		}
 		err = 0;
 		break;
 	}
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 3/6] net: bridge: Export bridge multicast router state
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa
In-Reply-To: <20171005103642.1414-1-jiri@resnulli.us>

From: Yotam Gigi <yotamg@mellanox.com>

Add an access function that, given a bridge netdevice, returns whether the
bridge device is currently an mrouter or not. The function uses the already
existing br_multicast_is_router function to check that.

This function is needed in order to allow ports that join an already
existing bridge to know the current mrouter state of the bridge device.
Together with the bridge device mrouter ports switchdev notifications, it
is possible to have full offloading of the semantics of the bridge device
mcast router state.

Due to the fact that the bridge multicast router status can change in
packet RX path, take the multicast_router bridge spinlock to protect the
read.

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/linux/if_bridge.h |  5 +++++
 net/bridge/br_multicast.c | 12 ++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 3cd18ac..283a9be 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -63,6 +63,7 @@ int br_multicast_list_adjacent(struct net_device *dev,
 bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
 bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 bool br_multicast_enabled(const struct net_device *dev);
+bool br_multicast_router(const struct net_device *dev);
 #else
 static inline int br_multicast_list_adjacent(struct net_device *dev,
 					     struct list_head *br_ip_list)
@@ -83,6 +84,10 @@ static inline bool br_multicast_enabled(const struct net_device *dev)
 {
 	return false;
 }
+static inline bool br_multicast_router(const struct net_device *dev)
+{
+	return false;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 4d4fcb5..b4c98a3 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2220,6 +2220,18 @@ bool br_multicast_enabled(const struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(br_multicast_enabled);
 
+bool br_multicast_router(const struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	bool is_router;
+
+	spin_lock_bh(&br->multicast_lock);
+	is_router = br_multicast_is_router(br);
+	spin_unlock_bh(&br->multicast_lock);
+	return is_router;
+}
+EXPORT_SYMBOL_GPL(br_multicast_router);
+
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
 {
 	unsigned long max_delay;
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 4/6] mlxsw: spectrum: router: Export the mlxsw_sp_router_port function
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa
In-Reply-To: <20171005103642.1414-1-jiri@resnulli.us>

From: Yotam Gigi <yotamg@mellanox.com>

In Spectrum hardware, the router port is a virtual port that is the gateway
to the routing mechanism. Hence, in order for a packet to be L3 forwarded,
it must first be L2 forwarded to the router port inside the hardware.

Further patches in this patchset are going to introduce support in bridge
device used as an mrouter port. In this case, the router port index will be
needed in order to update the MDB entries to include the router port. Thus,
export the mlxsw_sp_router_port function, which returns the index of the
Spectrum router port.

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 58bc04c..58adb23 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5947,7 +5947,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 }
 
-static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
 {
 	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 3d44918..3f2d840 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -70,6 +70,7 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
 u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
 u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
 int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
 const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
 int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_rif *rif,
-- 
2.9.5

^ permalink raw reply related

* [patch net-next 5/6] mlxsw: spectrum_switchdev: Add support for router port in SMID entries
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa
In-Reply-To: <20171005103642.1414-1-jiri@resnulli.us>

From: Yotam Gigi <yotamg@mellanox.com>

In Spectrum, MDB entries point to MID entries, that indicate which ports a
packet should be forwarded to. Add the support in creating MID entries that
forward the packet to the Spectrum router port.

This will be later used to handle the bridge mrouter port switchdev
notifications.

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 0f9eac5..092231a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -48,6 +48,7 @@
 #include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
+#include "spectrum_router.h"
 #include "spectrum.h"
 #include "core.h"
 #include "reg.h"
@@ -1241,7 +1242,8 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
 }
 
 static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx,
-					 long *ports_bitmap)
+					 long *ports_bitmap,
+					 bool set_router_port)
 {
 	char *smid_pl;
 	int err, i;
@@ -1256,9 +1258,15 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx,
 			mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
 	}
 
+	mlxsw_reg_smid_port_mask_set(smid_pl,
+				     mlxsw_sp_router_port(mlxsw_sp), 1);
+
 	for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core))
 		mlxsw_reg_smid_port_set(smid_pl, i, 1);
 
+	mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp),
+				set_router_port);
+
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
 	kfree(smid_pl);
 	return err;
@@ -1362,7 +1370,8 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
 
 	mid->mid = mid_idx;
-	err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap);
+	err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
+					    false);
 	kfree(flood_bitmap);
 	if (err)
 		return false;
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH net-next v6 0/4] bpf: add two helpers to read perf event enabled/running time
From: Daniel Borkmann @ 2017-10-05 10:36 UTC (permalink / raw)
  To: Peter Zijlstra, David Miller; +Cc: yhs, rostedt, ast, netdev, kernel-team
In-Reply-To: <20171005083437.v3yfluazdy5du7pi@hirez.programming.kicks-ass.net>

On 10/05/2017 10:34 AM, Peter Zijlstra wrote:
> On Wed, Oct 04, 2017 at 04:00:56PM -0700, David Miller wrote:
>> From: Yonghong Song <yhs@fb.com>
>> Date: Mon, 2 Oct 2017 15:42:14 -0700
>>
>>> [Dave, Peter,
>>>
>>>   Previous communcation shows that this patch may potentially have
>>>   merge conflict with upcoming tip changes in the next merge window.
>>>
>>>   Could you advise how this patch should proceed?
>>>
>>>   Thanks!
>>> ]
>>
>> Indeed, Peter how do you want to handle this?
>
> I think Alexei suggested that we merge the one patch in two branches and
> let git sort it out. I _think_ I've done something similar before and it
> worked.

Sounds good, we did something like this in the past as well I recall,
so lets make first patch isolated to only touch perf event area so
it can go into both trees, and the remaining pieces only for BPF bits
for net-next.

^ permalink raw reply

* [patch net-next 6/6] mlxsw: spectrum_switchdev: Support bridge mrouter notifications
From: Jiri Pirko @ 2017-10-05 10:36 UTC (permalink / raw)
  To: netdev
  Cc: davem, yotamg, idosch, nogahf, mlxsw, ivecera, nikolay, andrew,
	stephen, nbd, roopa
In-Reply-To: <20171005103642.1414-1-jiri@resnulli.us>

From: Yotam Gigi <yotamg@mellanox.com>

Support the SWITCHDEV_ATTR_ID_BRIDGE_MROUTER port attribute switchdev
notification.

To do that, add the mrouter flag to struct mlxsw_sp_bridge_device, which
indicates whether the bridge device was set to be mrouter port. This field
is set when:
 - A new bridge is created, where the value is taken from the kernel
   bridge value.
 - A switchdev SWITCHDEV_ATTR_ID_BRIDGE_MROUTER notification is sent.

In addition, change the bridge MID entries to include the router port when
the bridge device is configured to be mrouter port. The MID entries are
updated in the following cases:
 - When a new MID entry is created, update the router port according to the
   bridge mrouter state.
 - When a SWITCHDEV_ATTR_ID_BRIDGE_MROUTER notification is sent, update all
   the bridge's MID entries.

This is aligned with the case where a bridge slave is configured to be
mrouter port.

Signed-off-by: Yotam Gigi <yotamg@mellanox.com>
Reviewed-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 .../ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 65 +++++++++++++++++++++-
 1 file changed, 63 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 092231a..15bb5f9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -79,7 +79,8 @@ struct mlxsw_sp_bridge_device {
 	struct list_head ports_list;
 	struct list_head mids_list;
 	u8 vlan_enabled:1,
-	   multicast_enabled:1;
+	   multicast_enabled:1,
+	   mrouter:1;
 	const struct mlxsw_sp_bridge_ops *ops;
 };
 
@@ -169,6 +170,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
 	bridge_device->dev = br_dev;
 	bridge_device->vlan_enabled = vlan_enabled;
 	bridge_device->multicast_enabled = br_multicast_enabled(br_dev);
+	bridge_device->mrouter = br_multicast_router(br_dev);
 	INIT_LIST_HEAD(&bridge_device->ports_list);
 	if (vlan_enabled) {
 		bridge->vlan_enabled_exists = true;
@@ -811,6 +813,60 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
+static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp,
+					 u16 mid_idx, bool add)
+{
+	char *smid_pl;
+	int err;
+
+	smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+	if (!smid_pl)
+		return -ENOMEM;
+
+	mlxsw_reg_smid_pack(smid_pl, mid_idx,
+			    mlxsw_sp_router_port(mlxsw_sp), add);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+	kfree(smid_pl);
+	return err;
+}
+
+static void
+mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_bridge_device *bridge_device,
+				   bool add)
+{
+	struct mlxsw_sp_mid *mid;
+
+	list_for_each_entry(mid, &bridge_device->mids_list, list)
+		mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add);
+}
+
+static int
+mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				  struct switchdev_trans *trans,
+				  struct net_device *orig_dev,
+				  bool is_mrouter)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_bridge_device *bridge_device;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	/* It's possible we failed to enslave the port, yet this
+	 * operation is executed due to it being deferred.
+	 */
+	bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev);
+	if (!bridge_device)
+		return 0;
+
+	if (bridge_device->mrouter != is_mrouter)
+		mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device,
+						   is_mrouter);
+	bridge_device->mrouter = is_mrouter;
+	return 0;
+}
+
 static int mlxsw_sp_port_attr_set(struct net_device *dev,
 				  const struct switchdev_attr *attr,
 				  struct switchdev_trans *trans)
@@ -848,6 +904,11 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
 						    attr->orig_dev,
 						    attr->u.mc_disabled);
 		break;
+	case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+		err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans,
+							attr->orig_dev,
+							attr->u.mrouter);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 		break;
@@ -1371,7 +1432,7 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
 
 	mid->mid = mid_idx;
 	err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
-					    false);
+					    bridge_device->mrouter);
 	kfree(flood_bitmap);
 	if (err)
 		return false;
-- 
2.9.5

^ permalink raw reply related

* Re: [PATCH 3/7] crypto:gf128mul: The x8_ble multiplication functions
From: Harsh Jain @ 2017-10-05 11:20 UTC (permalink / raw)
  To: David Laight, herbert@gondor.apana.org.au,
	linux-crypto@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6DD0088A2B@AcuExch.aculab.com>



On 03-10-2017 20:28, David Laight wrote:
> From: Harsh Jain
>> Sent: 03 October 2017 07:46
>> It multiply GF(2^128) elements in the ble format.
>> It will be used by chelsio driver to fasten gf multiplication.
>                                        ^ speed up ??
It should be speed up. Will fix the same in V2. Thanks
>
> 	David
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox