Netdev List
 help / color / mirror / Atom feed
* [patch net-next RFC 01/12] mlxsw: spectrum: Move QSFP EEPROM defenitons to common location
From: Vadim Pasternak @ 2018-06-26 12:10 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux, rui.zhang, edubezval, jiri, mlxsw, michaelsh,
	Vadim Pasternak
In-Reply-To: <1530015037-67361-1-git-send-email-vadimp@mellanox.com>

Move QSFP EEPROM definitions to common location from the spectrum
driver in order to make them available for other mlxsw modules. They
are common for all kind of chips and have relation to SFF
specifications 8024, 8436, 8472, 8636, rather then to chip type.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 32 ++++++++++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 62 +++++++++-----------------
 2 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 1877d9f..6a41c48 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -6757,13 +6757,41 @@ MLXSW_ITEM32(reg, mcia, device_address, 0x04, 0, 16);
  */
 MLXSW_ITEM32(reg, mcia, size, 0x08, 0, 16);
 
-#define MLXSW_SP_REG_MCIA_EEPROM_SIZE 48
+#define MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH	256
+#define MLXSW_REG_MCIA_EEPROM_SIZE		48
+#define MLXSW_REG_MCIA_I2C_ADDR_LOW		0x50
+#define MLXSW_REG_MCIA_I2C_ADDR_HIGH		0x51
+#define MLXSW_REG_MCIA_PAGE0_LO_OFF		0xa0
+#define MLXSW_REG_MCIA_TH_SIZE			8
+#define MLXSW_REG_MCIA_TH_PAGE_NUM		3
+#define MLXSW_REG_MCIA_PAGE0_LO			0
+#define MLXSW_REG_MCIA_TH_PAGE_OFF		0x80
+
+enum mlxsw_reg_mcia_eeprom_module_info_rev_id {
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_UNSPC	= 0x00,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8436	= 0x01,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636	= 0x03,
+};
+
+enum mlxsw_reg_mcia_eeprom_module_info_id {
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP	= 0x03,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP	= 0x0C,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS	= 0x0D,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28	= 0x11,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_DD	= 0x18,
+};
+
+enum mlxsw_reg_mcia_eeprom_module_info {
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID,
+	MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE,
+};
 
 /* reg_mcia_eeprom
  * Bytes to read/write.
  * Access: RW
  */
-MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+MLXSW_ITEM_BUF(reg, mcia, eeprom, 0x10, MLXSW_REG_MCIA_EEPROM_SIZE);
 
 static inline void mlxsw_reg_mcia_pack(char *payload, u8 module, u8 lock,
 				       u8 page_number, u16 device_addr,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 968b88a..1b0d1bc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2481,23 +2481,23 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
 					unsigned int *p_read_size)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE];
+	char eeprom_tmp[MLXSW_REG_MCIA_EEPROM_SIZE];
 	char mcia_pl[MLXSW_REG_MCIA_LEN];
 	u16 i2c_addr;
 	int status;
 	int err;
 
-	size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE);
+	size = min_t(u16, size, MLXSW_REG_MCIA_EEPROM_SIZE);
 
-	if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH &&
-	    offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH)
+	if (offset < MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH &&
+	    offset + size > MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH)
 		/* Cross pages read, read until offset 256 in low page */
-		size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset;
+		size = MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH - offset;
 
-	i2c_addr = MLXSW_SP_I2C_ADDR_LOW;
-	if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) {
-		i2c_addr = MLXSW_SP_I2C_ADDR_HIGH;
-		offset -= MLXSW_SP_EEPROM_PAGE_LENGTH;
+	i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_LOW;
+	if (offset >= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH) {
+		i2c_addr = MLXSW_REG_MCIA_I2C_ADDR_HIGH;
+		offset -= MLXSW_REG_MCIA_EEPROM_PAGE_LENGTH;
 	}
 
 	mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module,
@@ -2518,55 +2518,37 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 }
 
-enum mlxsw_sp_eeprom_module_info_rev_id {
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_UNSPC      = 0x00,
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8436       = 0x01,
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636       = 0x03,
-};
-
-enum mlxsw_sp_eeprom_module_info_id {
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP              = 0x03,
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP             = 0x0C,
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS        = 0x0D,
-	MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28           = 0x11,
-};
-
-enum mlxsw_sp_eeprom_module_info {
-	MLXSW_SP_EEPROM_MODULE_INFO_ID,
-	MLXSW_SP_EEPROM_MODULE_INFO_REV_ID,
-	MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
-};
-
 static int mlxsw_sp_get_module_info(struct net_device *netdev,
 				    struct ethtool_modinfo *modinfo)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
-	u8 module_info[MLXSW_SP_EEPROM_MODULE_INFO_SIZE];
+	u8 module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE];
 	u8 module_rev_id, module_id;
 	unsigned int read_size;
 	int err;
 
 	err = mlxsw_sp_query_module_eeprom(mlxsw_sp_port, 0,
-					   MLXSW_SP_EEPROM_MODULE_INFO_SIZE,
-					   module_info, &read_size);
+				MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE,
+				module_info, &read_size);
 	if (err)
 		return err;
 
-	if (read_size < MLXSW_SP_EEPROM_MODULE_INFO_SIZE)
+	if (read_size < MLXSW_REG_MCIA_EEPROM_MODULE_INFO_SIZE)
 		return -EIO;
 
-	module_rev_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_REV_ID];
-	module_id = module_info[MLXSW_SP_EEPROM_MODULE_INFO_ID];
+	module_rev_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID];
+	module_id = module_info[MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID];
 
 	switch (module_id) {
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP:
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP:
 		modinfo->type       = ETH_MODULE_SFF_8436;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
 		break;
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP_PLUS:
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28:
-		if (module_id  == MLXSW_SP_EEPROM_MODULE_INFO_ID_QSFP28 ||
-		    module_rev_id >= MLXSW_SP_EEPROM_MODULE_INFO_REV_ID_8636) {
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP_PLUS:
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28:
+		if (module_id == MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_QSFP28 ||
+		    module_rev_id >=
+		    MLXSW_REG_MCIA_EEPROM_MODULE_INFO_REV_ID_8636) {
 			modinfo->type       = ETH_MODULE_SFF_8636;
 			modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN;
 		} else {
@@ -2574,7 +2556,7 @@ static int mlxsw_sp_get_module_info(struct net_device *netdev,
 			modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN;
 		}
 		break;
-	case MLXSW_SP_EEPROM_MODULE_INFO_ID_SFP:
+	case MLXSW_REG_MCIA_EEPROM_MODULE_INFO_ID_SFP:
 		modinfo->type       = ETH_MODULE_SFF_8472;
 		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
 		break;
-- 
2.1.4

^ permalink raw reply related

* [patch net-next RFC 02/12] mlxsw: reg: Add MTBR register
From: Vadim Pasternak @ 2018-06-26 12:10 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux, rui.zhang, edubezval, jiri, mlxsw, michaelsh,
	Vadim Pasternak
In-Reply-To: <1530015037-67361-1-git-send-email-vadimp@mellanox.com>

Add MTBR (Management Temperature Bulk Register), which is used for port
temperature reading in a bulk mode.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 69 +++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6a41c48..cfe6bde 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -6703,6 +6703,74 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
 		mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
 }
 
+/* MTBR - Management Temperature Bulk Register
+ * -------------------------------------------
+ * This register is used for bulk temperature reading.
+ */
+#define MLXSW_REG_MTBR_ID		0x900F
+#define MLXSW_REG_MTBR_LEN		0xCC
+#define MLXSW_REG_MTBR_REC_MAX_COUNT	47
+
+MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
+
+/* reg_mtbr_base_sensor_index
+ * Base sensors index to access (0 - ASIC sensor, 1-63 - ambient sensors,
+ * 64-127 are mapped to the SFP+/QSFP modules sequentially).
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7);
+
+/* reg_mtbr_num_rec
+ * Request: Number of records to read
+ * Response: Number of records read
+ * See above description for more details.
+ * Ranges 0..64
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtbr, num_rec, 0x04, 0, 8);
+
+/* reg_mtbr_temp
+ * Temperature reading from the sensor. Reading is in 0.125 Celsius
+ * degrees units.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtbr, temp, 0x10, 0, 16, 0x04, 0x00, false);
+
+/* reg_mtbr_max_temp
+ * The highest measured temperature from the sensor.
+ * When the bit mte is cleared, the field max_temperature is reserved.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtbr, max_temp, 0x10, 16, 16, 0x04, 0x00, false);
+
+static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index,
+				       u8 num_rec)
+{
+	MLXSW_REG_ZERO(mtbr, payload);
+	mlxsw_reg_mtbr_base_sensor_index_set(payload, base_sensor_index);
+	mlxsw_reg_mtbr_num_rec_set(payload, num_rec);
+}
+
+/* Error codes from temperatute reading */
+enum mlxsw_reg_mtbr_temp_status {
+	MLXSW_REG_MTBR_NO_CONN		= 0x8000,
+	MLXSW_REG_MTBR_NO_TEMP_SENS	= 0x8001,
+	MLXSW_REG_MTBR_INDEX_NA		= 0x8002,
+	MLXSW_REG_MTBR_BAD_SENS_INFO	= 0x8003,
+};
+
+/* Base index for reading ports temperature */
+#define MLXSW_REG_MTBR_BASE_PORT_INDEX		64
+
+static inline void mlxsw_reg_mtbr_temp_unpack(char *payload, int rec_index,
+					      u16 *p_temp, u16 *p_max_temp)
+{
+	if (p_temp)
+		*p_temp = mlxsw_reg_mtbr_temp_get(payload, rec_index);
+	if (p_max_temp)
+		*p_max_temp = mlxsw_reg_mtbr_max_temp_get(payload, rec_index);
+}
+
 /* MCIA - Management Cable Info Access
  * -----------------------------------
  * MCIA register is used to access the SFP+ and QSFP connector's EPROM.
@@ -7945,6 +8013,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(mfsc),
 	MLXSW_REG(mfsm),
 	MLXSW_REG(mfsl),
+	MLXSW_REG(mtbr),
 	MLXSW_REG(mtcap),
 	MLXSW_REG(mtmp),
 	MLXSW_REG(mcia),
-- 
2.1.4

^ permalink raw reply related

* [patch net-next RFC 00/12] mlxsw thermal monitoring amendments
From: Vadim Pasternak @ 2018-06-26 12:10 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux, rui.zhang, edubezval, jiri, mlxsw, michaelsh,
	Vadim Pasternak

This patchset extends mlxsw hwmon and thermal modules with ports
temperature reading and adds new hwmon attributes for FAN and
temperature.

Ports temperatures are most critical component in system thermal control
and should be considered by thermal algorithm.

New hwmon attributes, such as FAN faults, port temperature fault will
improve system monitoring abilities.

Vadim Pasternak (12):
  mlxsw: spectrum: Move QSFP EEPROM defenitons to common location
  mlxsw: reg: Add MTBR register
  mlxsw: core: Add core environment module for port temperature reading
  mlxsw: core: Add bus frequency capability flag for the bus type
  mlxsw: core: Set different thermal polling time based on bus type
  mlxsw: core: Modify thermal zone definition
  mlxsw: core: Extend thermal zone operations with get_trend method
  mlxsw: core: Extend cooling device with cooling levels
  mlxsw: core: Rename cooling device
  mlxsw: core: Add ports temperature measurement to thermal algorithm
  mlxsw: core: Extend hwmon interface with FAN fault attribute
  mlxsw: core: Extend hwmon interface with port temperature attributes

 drivers/net/ethernet/mellanox/mlxsw/Makefile       |   2 +-
 drivers/net/ethernet/mellanox/mlxsw/core.h         |   1 +
 drivers/net/ethernet/mellanox/mlxsw/core_env.c     | 316 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlxsw/core_env.h     |  63 ++++
 drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c   | 164 ++++++++++-
 drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 231 +++++++++++++--
 drivers/net/ethernet/mellanox/mlxsw/i2c.c          |   1 +
 drivers/net/ethernet/mellanox/mlxsw/reg.h          | 101 ++++++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  62 ++--
 9 files changed, 865 insertions(+), 76 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/core_env.c
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/core_env.h

-- 
2.1.4

^ permalink raw reply

* [patch net-next RFC 04/12] mlxsw: core: Add bus frequency capability flag for the bus type
From: Vadim Pasternak @ 2018-06-26 12:10 UTC (permalink / raw)
  To: davem
  Cc: netdev, linux, rui.zhang, edubezval, jiri, mlxsw, michaelsh,
	Vadim Pasternak
In-Reply-To: <1530015037-67361-1-git-send-email-vadimp@mellanox.com>

Add low frequency bus capability in order to allow core functionality
separation based on bus type. Driver could run over PCIe, which is
considered as high frequency bus or I2C , which is considered as low
frequency bus. In the last case time setting, for example, for thermal
polling interval, should be increased.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlxsw/core.h | 1 +
 drivers/net/ethernet/mellanox/mlxsw/i2c.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 552cfa2..95e6190 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -371,6 +371,7 @@ struct mlxsw_bus_info {
 	struct mlxsw_fw_rev fw_rev;
 	u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN];
 	u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN];
+	bool low_frequency;
 };
 
 struct mlxsw_hwmon;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 25f9915..384b337 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -536,6 +536,7 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
 	mlxsw_i2c->bus_info.device_kind = id->name;
 	mlxsw_i2c->bus_info.device_name = client->name;
 	mlxsw_i2c->bus_info.dev = &client->dev;
+	mlxsw_i2c->bus_info.low_frequency = true;
 	mlxsw_i2c->dev = &client->dev;
 
 	err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
-- 
2.1.4

^ permalink raw reply related

* Re: [PATCH] selftests: bpf: enable NET_SCHED
From: Daniel Borkmann @ 2018-06-26 10:07 UTC (permalink / raw)
  To: Anders Roxell, ast, shuah; +Cc: netdev, linux-kernel, linux-kselftest
In-Reply-To: <20180625145605.13726-1-anders.roxell@linaro.org>

On 06/25/2018 04:56 PM, Anders Roxell wrote:
> CONFIG_NET_SCHED wasn't enabled in arm64's defconfig only for x86.
> So bpf/test_tunnel.sh tests fails with:
> RTNETLINK answers: Operation not supported
> RTNETLINK answers: Operation not supported
> We have an error talking to the kernel, -1
> Enable NET_SCHED and more tests passes.
> 
> Fixes: 3bce593ac06b ("selftests: bpf: config: add config fragments")
> Signed-off-by: Anders Roxell <anders.roxell@linaro.org>

Applied to bpf, thanks Anders!

^ permalink raw reply

* Re: [PATCH v4] bpf: attach type BPF_LIRC_MODE2 should not depend on CONFIG_CGROUP_BPF
From: Daniel Borkmann @ 2018-06-26 10:03 UTC (permalink / raw)
  To: Sean Young, Y Song, Matthias Reichl, linux-media, LKML,
	Alexei Starovoitov, Mauro Carvalho Chehab, netdev,
	Devin Heitmueller, Quentin Monnet
In-Reply-To: <20180618230423.nk2ey2755p2zkqmv@gofer.mess.org>

On 06/19/2018 01:04 AM, Sean Young wrote:
> If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not
> possible to attach, detach or query IR BPF programs to /dev/lircN devices,
> making them impossible to use. For embedded devices, it should be possible
> to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled.
> 
> This change requires some refactoring, since bpf_prog_{attach,detach,query}
> functions are now always compiled, but their code paths for cgroups need
> moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c,
> moving them to kernel/bpf/cgroup.c and kernel/bpf/sockmap.c does not
> require #ifdefs since that is already conditionally compiled.
> 
> Signed-off-by: Sean Young <sean@mess.org>

Applied to bpf, thanks Sean!

^ permalink raw reply

* Re: [PATCH bpf] nfp: bpf: don't stop offload if replace failed
From: Daniel Borkmann @ 2018-06-26 10:02 UTC (permalink / raw)
  To: Jakub Kicinski, alexei.starovoitov; +Cc: netdev, oss-drivers
In-Reply-To: <20180622185656.363-1-jakub.kicinski@netronome.com>

On 06/22/2018 08:56 PM, Jakub Kicinski wrote:
> Stopping offload completely if replace of program failed dates
> back to days of transparent offload.  Back then we wanted to
> silently fall back to the in-driver processing.  Today we mark
> programs for offload when they are loaded into the kernel, so
> the transparent offload is no longer a reality.
> 
> Flags check in the driver will only allow replace of a driver
> program with another driver program or an offload program with
> another offload program.
> 
> When driver program is replaced stopping offload is a no-op,
> because driver program isn't offloaded.  When replacing
> offloaded program if the offload fails the entire operation
> will fail all the way back to user space and we should continue
> using the old program.  IOW when replacing a driver program
> stopping offload is unnecessary and when replacing offloaded
> program - it's a bug, old program should continue to run.
> 
> In practice this bug would mean that if offload operation was to
> fail (either due to FW communication error, kernel OOM or new
> program being offloaded but for a different netdev) driver
> would continue reporting that previous XDP program is offloaded
> but in fact no program will be loaded in hardware.  The failure
> is fairly unlikely (found by inspection, when working on the code)
> but it's unpleasant.
> 
> Backport note: even though the bug was introduced in commit
> cafa92ac2553 ("nfp: bpf: add support for XDP_FLAGS_HW_MODE"),
> this fix depends on commit 441a33031fe5 ("net: xdp: don't allow
> device-bound programs in driver mode"), so this fix is sufficient
> only in v4.15 or newer.  Kernels v4.13.x and v4.14.x do need to
> stop offload if it was transparent/opportunistic, i.e. if
> XDP_FLAGS_HW_MODE was not set on running program.
> 
> Fixes: cafa92ac2553 ("nfp: bpf: add support for XDP_FLAGS_HW_MODE")
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>

Applied to bpf, thanks Jakub!

^ permalink raw reply

* Re: [PATCH v2 bpf-net] bpf: Change bpf_fib_lookup to return lookup status
From: Daniel Borkmann @ 2018-06-26  9:50 UTC (permalink / raw)
  To: dsahern, netdev, borkmann, ast; +Cc: davem, kafai, David Ahern
In-Reply-To: <20180621030011.7441-1-dsahern@kernel.org>

Hi David,

first off all sorry for my late reply, been mostly offline last week. I think
there's still an issue with the current patch, more below:

On 06/21/2018 05:00 AM, dsahern@kernel.org wrote:
> From: David Ahern <dsahern@gmail.com>
> 
> For ACLs implemented using either FIB rules or FIB entries, the BPF
> program needs the FIB lookup status to be able to drop the packet.
> Since the bpf_fib_lookup API has not reached a released kernel yet,
> change the return code to contain an encoding of the FIB lookup
> result and return the nexthop device index in the params struct.
> 
> In addition, inform the BPF program of any post FIB lookup reason as
> to why the packet needs to go up the stack.
> 
> The fib result for unicast routes must have an egress device, so remove
> the check that it is non-NULL.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
> v2
> - drop BPF_FIB_LKUP_RET_NO_NHDEV; check in dev in fib result not needed
> - enhance documentation of BPF_FIB_LKUP_RET_ codes
> 
>  include/uapi/linux/bpf.h   | 28 ++++++++++++++----
>  net/core/filter.c          | 72 ++++++++++++++++++++++++++++++----------------
>  samples/bpf/xdp_fwd_kern.c |  8 +++---
>  3 files changed, 74 insertions(+), 34 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 59b19b6a40d7..b7db3261c62d 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1857,7 +1857,8 @@ union bpf_attr {
>   *		is resolved), the nexthop address is returned in ipv4_dst
>   *		or ipv6_dst based on family, smac is set to mac address of
>   *		egress device, dmac is set to nexthop mac address, rt_metric
> - *		is set to metric from route (IPv4/IPv6 only).
> + *		is set to metric from route (IPv4/IPv6 only), and ifindex
> + *		is set to the device index of the nexthop from the FIB lookup.
>   *
>   *             *plen* argument is the size of the passed in struct.
>   *             *flags* argument can be a combination of one or more of the
> @@ -1873,9 +1874,10 @@ union bpf_attr {
>   *             *ctx* is either **struct xdp_md** for XDP programs or
>   *             **struct sk_buff** tc cls_act programs.
>   *     Return
> - *             Egress device index on success, 0 if packet needs to continue
> - *             up the stack for further processing or a negative error in case
> - *             of failure.
> + *		* < 0 if any input argument is invalid
> + *		*   0 on success (packet is forwarded, nexthop neighbor exists)
> + *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
> + *		*     packet is not forwarded or needs assist from full stack
>   *
>   * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
>   *	Description
> @@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
>  #define BPF_FIB_LOOKUP_DIRECT  BIT(0)
>  #define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
>  
> +enum {
> +	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
> +	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
> +	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
> +	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
> +	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
> +	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
> +	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
> +	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
> +	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
> +};
> +
>  struct bpf_fib_lookup {
>  	/* input:  network family for lookup (AF_INET, AF_INET6)
>  	 * output: network family of egress nexthop
> @@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
>  
>  	/* total length of packet from network header - used for MTU check */
>  	__u16	tot_len;
> -	__u32	ifindex;  /* L3 device index for lookup */
> +
> +	/* input: L3 device index for lookup
> +	 * output: device index from FIB lookup
> +	 */
> +	__u32	ifindex;
>  
>  	union {
>  		/* inputs to lookup */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index e7f12e9f598c..f8dd8aa89de4 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
>  	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
>  	params->h_vlan_TCI = 0;
>  	params->h_vlan_proto = 0;
> +	params->ifindex = dev->ifindex;
>  
> -	return dev->ifindex;
> +	return 0;
>  }
>  #endif
>  
> @@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  	/* verify forwarding is enabled on this interface */
>  	in_dev = __in_dev_get_rcu(dev);
>  	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
> -		return 0;
> +		return BPF_FIB_LKUP_RET_FWD_DISABLED;
>  
>  	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
>  		fl4.flowi4_iif = 1;
> @@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  
>  		tb = fib_get_table(net, tbid);
>  		if (unlikely(!tb))
> -			return 0;
> +			return BPF_FIB_LKUP_RET_NOT_FWDED;
>  
>  		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
>  	} else {
> @@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
>  		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
>  	}
>  
> -	if (err || res.type != RTN_UNICAST)
> -		return 0;
> +	if (err) {
> +		/* map fib lookup errors to RTN_ type */
> +		if (err == -EINVAL)
> +			return BPF_FIB_LKUP_RET_BLACKHOLE;
> +		if (err == -EHOSTUNREACH)
> +			return BPF_FIB_LKUP_RET_UNREACHABLE;
> +		if (err == -EACCES)
> +			return BPF_FIB_LKUP_RET_PROHIBIT;
> +
> +		return BPF_FIB_LKUP_RET_NOT_FWDED;
> +	}
[...]
You change all the semantics of return code here, but this breaks bpf_skb_fib_lookup().
I cannot see how this would work in that case. The code does the following with the
bpf_ipv{4,6}_fib_lookup() return code:

[...]
        switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
        case AF_INET:
                index = bpf_ipv4_fib_lookup(net, params, flags, false);
                break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6:
                index = bpf_ipv6_fib_lookup(net, params, flags, false);
                break;
#endif
        }

        if (index > 0) {
                struct net_device *dev;

                dev = dev_get_by_index_rcu(net, index);
                if (!is_skb_forwardable(dev, skb))
                        index = 0;
        }
[...]

So the BPF_FIB_LKUP_* results become the dev ifindex here and the !is_skb_forwardable()
case further suggests that the packet *can* be forwarded based on the new semantics
whereas MTU check is bypassed on success.

It probably helps to craft a selftest for XDP *and* tc case in future, so we can be sure
nothing breaks with new changes.

Thanks,
Daniel

^ permalink raw reply

* [PATCH 8/9] networking: e1000.rst: Get rid of Sphinx warnings
From: Mauro Carvalho Chehab @ 2018-06-26  9:49 UTC (permalink / raw)
  To: Linux Doc Mailing List
  Cc: Mauro Carvalho Chehab, Mauro Carvalho Chehab, linux-kernel,
	Jonathan Corbet, Jeff Kirsher, David S. Miller, intel-wired-lan,
	netdev
In-Reply-To: <cover.1530005114.git.mchehab+samsung@kernel.org>

    Documentation/networking/e1000.rst:83: ERROR: Unexpected indentation.
    Documentation/networking/e1000.rst:84: WARNING: Block quote ends without a blank line; unexpected unindent.
    Documentation/networking/e1000.rst:173: WARNING: Definition list ends without a blank line; unexpected unindent.
    Documentation/networking/e1000.rst:236: WARNING: Definition list ends without a blank line; unexpected unindent.

While here, fix highlights and mark a table as such.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/networking/e1000.rst | 187 +++++++++++++++++------------
 1 file changed, 112 insertions(+), 75 deletions(-)

diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst
index 144b87eef153..f10dd4086921 100644
--- a/Documentation/networking/e1000.rst
+++ b/Documentation/networking/e1000.rst
@@ -34,7 +34,8 @@ Command Line Parameters
 The default value for each parameter is generally the recommended setting,
 unless otherwise noted.
 
-NOTES:  For more information about the AutoNeg, Duplex, and Speed
+NOTES:
+	For more information about the AutoNeg, Duplex, and Speed
         parameters, see the "Speed and Duplex Configuration" section in
         this document.
 
@@ -45,22 +46,27 @@ NOTES:  For more information about the AutoNeg, Duplex, and Speed
 
 AutoNeg
 -------
+
 (Supported only on adapters with copper connections)
-Valid Range:   0x01-0x0F, 0x20-0x2F
-Default Value: 0x2F
+
+:Valid Range:   0x01-0x0F, 0x20-0x2F
+:Default Value: 0x2F
 
 This parameter is a bit-mask that specifies the speed and duplex settings
 advertised by the adapter.  When this parameter is used, the Speed and
 Duplex parameters must not be specified.
 
-NOTE:  Refer to the Speed and Duplex section of this readme for more
+NOTE:
+       Refer to the Speed and Duplex section of this readme for more
        information on the AutoNeg parameter.
 
 Duplex
 ------
+
 (Supported only on adapters with copper connections)
-Valid Range:   0-2 (0=auto-negotiate, 1=half, 2=full)
-Default Value: 0
+
+:Valid Range:   0-2 (0=auto-negotiate, 1=half, 2=full)
+:Default Value: 0
 
 This defines the direction in which data is allowed to flow.  Can be
 either one or two-directional.  If both Duplex and the link partner are
@@ -70,18 +76,22 @@ duplex.
 
 FlowControl
 -----------
-Valid Range:   0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
-Default Value: Reads flow control settings from the EEPROM
+
+:Valid Range:   0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+:Default Value: Reads flow control settings from the EEPROM
 
 This parameter controls the automatic generation(Tx) and response(Rx)
 to Ethernet PAUSE frames.
 
 InterruptThrottleRate
 ---------------------
+
 (not supported on Intel(R) 82542, 82543 or 82544-based adapters)
-Valid Range:   0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative,
-                                 4=simplified balancing)
-Default Value: 3
+
+:Valid Range:
+   0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative,
+   4=simplified balancing)
+:Default Value: 3
 
 The driver can limit the amount of interrupts per second that the adapter
 will generate for incoming packets. It does this by writing a value to the
@@ -135,13 +145,15 @@ Setting InterruptThrottleRate to 0 turns off any interrupt moderation
 and may improve small packet latency, but is generally not suitable
 for bulk throughput traffic.
 
-NOTE:  InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+NOTE:
+       InterruptThrottleRate takes precedence over the TxAbsIntDelay and
        RxAbsIntDelay parameters.  In other words, minimizing the receive
        and/or transmit absolute delays does not force the controller to
        generate more interrupts than what the Interrupt Throttle Rate
        allows.
 
-CAUTION:  If you are using the Intel(R) PRO/1000 CT Network Connection
+CAUTION:
+          If you are using the Intel(R) PRO/1000 CT Network Connection
           (controller 82547), setting InterruptThrottleRate to a value
           greater than 75,000, may hang (stop transmitting) adapters
           under certain network conditions.  If this occurs a NETDEV
@@ -151,7 +163,8 @@ CAUTION:  If you are using the Intel(R) PRO/1000 CT Network Connection
           hang, ensure that InterruptThrottleRate is set no greater
           than 75,000 and is not set to 0.
 
-NOTE:  When e1000 is loaded with default settings and multiple adapters
+NOTE:
+       When e1000 is loaded with default settings and multiple adapters
        are in use simultaneously, the CPU utilization may increase non-
        linearly.  In order to limit the CPU utilization without impacting
        the overall throughput, we recommend that you load the driver as
@@ -168,9 +181,11 @@ NOTE:  When e1000 is loaded with default settings and multiple adapters
 
 RxDescriptors
 -------------
-Valid Range:   48-256 for 82542 and 82543-based adapters
-               48-4096 for all other supported adapters
-Default Value: 256
+
+:Valid Range:
+ - 48-256 for 82542 and 82543-based adapters
+ - 48-4096 for all other supported adapters
+:Default Value: 256
 
 This value specifies the number of receive buffer descriptors allocated
 by the driver.  Increasing this value allows the driver to buffer more
@@ -180,15 +195,17 @@ Each descriptor is 16 bytes.  A receive buffer is also allocated for each
 descriptor and can be either 2048, 4096, 8192, or 16384 bytes, depending
 on the MTU setting. The maximum MTU size is 16110.
 
-NOTE:  MTU designates the frame size.  It only needs to be set for Jumbo
+NOTE:
+       MTU designates the frame size.  It only needs to be set for Jumbo
        Frames.  Depending on the available system resources, the request
        for a higher number of receive descriptors may be denied.  In this
        case, use a lower number.
 
 RxIntDelay
 ----------
-Valid Range:   0-65535 (0=off)
-Default Value: 0
+
+:Valid Range:   0-65535 (0=off)
+:Default Value: 0
 
 This value delays the generation of receive interrupts in units of 1.024
 microseconds.  Receive interrupt reduction can improve CPU efficiency if
@@ -198,7 +215,8 @@ of TCP traffic.  If the system is reporting dropped receives, this value
 may be set too high, causing the driver to run out of available receive
 descriptors.
 
-CAUTION:  When setting RxIntDelay to a value other than 0, adapters may
+CAUTION:
+          When setting RxIntDelay to a value other than 0, adapters may
           hang (stop transmitting) under certain network conditions.  If
           this occurs a NETDEV WATCHDOG message is logged in the system
           event log.  In addition, the controller is automatically reset,
@@ -207,9 +225,11 @@ CAUTION:  When setting RxIntDelay to a value other than 0, adapters may
 
 RxAbsIntDelay
 -------------
+
 (This parameter is supported only on 82540, 82545 and later adapters.)
-Valid Range:   0-65535 (0=off)
-Default Value: 128
+
+:Valid Range:   0-65535 (0=off)
+:Default Value: 128
 
 This value, in units of 1.024 microseconds, limits the delay in which a
 receive interrupt is generated.  Useful only if RxIntDelay is non-zero,
@@ -220,9 +240,11 @@ conditions.
 
 Speed
 -----
+
 (This parameter is supported only on adapters with copper connections.)
-Valid Settings: 0, 10, 100, 1000
-Default Value:  0 (auto-negotiate at all supported speeds)
+
+:Valid Settings: 0, 10, 100, 1000
+:Default Value:  0 (auto-negotiate at all supported speeds)
 
 Speed forces the line speed to the specified value in megabits per second
 (Mbps).  If this parameter is not specified or is set to 0 and the link
@@ -231,22 +253,26 @@ speed.  Duplex should also be set when Speed is set to either 10 or 100.
 
 TxDescriptors
 -------------
-Valid Range:   48-256 for 82542 and 82543-based adapters
-               48-4096 for all other supported adapters
-Default Value: 256
+
+:Valid Range:
+  - 48-256 for 82542 and 82543-based adapters
+  - 48-4096 for all other supported adapters
+:Default Value: 256
 
 This value is the number of transmit descriptors allocated by the driver.
 Increasing this value allows the driver to queue more transmits.  Each
 descriptor is 16 bytes.
 
-NOTE:  Depending on the available system resources, the request for a
+NOTE:
+       Depending on the available system resources, the request for a
        higher number of transmit descriptors may be denied.  In this case,
        use a lower number.
 
 TxIntDelay
 ----------
-Valid Range:   0-65535 (0=off)
-Default Value: 8
+
+:Valid Range:   0-65535 (0=off)
+:Default Value: 8
 
 This value delays the generation of transmit interrupts in units of
 1.024 microseconds.  Transmit interrupt reduction can improve CPU
@@ -256,9 +282,11 @@ causing the driver to run out of available transmit descriptors.
 
 TxAbsIntDelay
 -------------
+
 (This parameter is supported only on 82540, 82545 and later adapters.)
-Valid Range:   0-65535 (0=off)
-Default Value: 32
+
+:Valid Range:   0-65535 (0=off)
+:Default Value: 32
 
 This value, in units of 1.024 microseconds, limits the delay in which a
 transmit interrupt is generated.  Useful only if TxIntDelay is non-zero,
@@ -269,18 +297,21 @@ network conditions.
 
 XsumRX
 ------
+
 (This parameter is NOT supported on the 82542-based adapter.)
-Valid Range:   0-1
-Default Value: 1
+
+:Valid Range:   0-1
+:Default Value: 1
 
 A value of '1' indicates that the driver should enable IP checksum
 offload for received packets (both UDP and TCP) to the adapter hardware.
 
 Copybreak
 ---------
-Valid Range:   0-xxxxxxx (0=off)
-Default Value: 256
-Usage: modprobe e1000.ko copybreak=128
+
+:Valid Range:   0-xxxxxxx (0=off)
+:Default Value: 256
+:Usage: modprobe e1000.ko copybreak=128
 
 Driver copies all packets below or equaling this size to a fresh RX
 buffer before handing it up the stack.
@@ -292,8 +323,9 @@ it is also available during runtime at
 
 SmartPowerDownEnable
 --------------------
-Valid Range: 0-1
-Default Value:  0 (disabled)
+
+:Valid Range: 0-1
+:Default Value:  0 (disabled)
 
 Allows PHY to turn off in lower power states. The user can turn off
 this parameter in supported chipsets.
@@ -309,14 +341,14 @@ fiber interface board only links at 1000 Mbps full-duplex.
 
 For copper-based boards, the keywords interact as follows:
 
-  The default operation is auto-negotiate.  The board advertises all
+- The default operation is auto-negotiate.  The board advertises all
   supported speed and duplex combinations, and it links at the highest
   common speed and duplex mode IF the link partner is set to auto-negotiate.
 
-  If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps
+- If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps
   is advertised (The 1000BaseT spec requires auto-negotiation.)
 
-  If Speed = 10 or 100, then both Speed and Duplex should be set.  Auto-
+- If Speed = 10 or 100, then both Speed and Duplex should be set.  Auto-
   negotiation is disabled, and the AutoNeg parameter is ignored.  Partner
   SHOULD also be forced.
 
@@ -328,13 +360,15 @@ process.
 The parameter may be specified as either a decimal or hexadecimal value as
 determined by the bitmap below.
 
+============== ====== ====== ======= ======= ====== ====== ======= ======
 Bit position   7      6      5       4       3      2      1       0
 Decimal Value  128    64     32      16      8      4      2       1
 Hex value      80     40     20      10      8      4      2       1
 Speed (Mbps)   N/A    N/A    1000    N/A     100    100    10      10
 Duplex                       Full            Full   Half   Full    Half
+============== ====== ====== ======= ======= ====== ====== ======= ======
 
-Some examples of using AutoNeg:
+Some examples of using AutoNeg::
 
   modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half)
   modprobe e1000 AutoNeg=1 (Same as above)
@@ -357,56 +391,59 @@ Additional Configurations
 
 Jumbo Frames
 ------------
-Jumbo Frames support is enabled by changing the MTU to a value larger
-than the default of 1500.  Use the ifconfig command to increase the MTU
-size.  For example::
+
+  Jumbo Frames support is enabled by changing the MTU to a value larger than
+  the default of 1500.  Use the ifconfig command to increase the MTU size.
+  For example::
 
        ifconfig eth<x> mtu 9000 up
 
-This setting is not saved across reboots.  It can be made permanent if
-you add::
+  This setting is not saved across reboots.  It can be made permanent if
+  you add::
 
        MTU=9000
 
-to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>.  This example
-applies to the Red Hat distributions; other distributions may store this
-setting in a different location.
+  to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>.  This example
+  applies to the Red Hat distributions; other distributions may store this
+  setting in a different location.
 
-Notes: Degradation in throughput performance may be observed in some
-Jumbo frames environments.  If this is observed, increasing the
-application's socket buffer size and/or increasing the
-/proc/sys/net/ipv4/tcp_*mem entry values may help.  See the specific
-application manual and /usr/src/linux*/Documentation/
-networking/ip-sysctl.txt for more details.
+Notes:
+  Degradation in throughput performance may be observed in some Jumbo frames
+  environments. If this is observed, increasing the application's socket buffer
+  size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
+  See the specific application manual and /usr/src/linux*/Documentation/
+  networking/ip-sysctl.txt for more details.
 
-- The maximum MTU setting for Jumbo Frames is 16110.  This value
-  coincides with the maximum Jumbo Frames size of 16128.
+  - The maximum MTU setting for Jumbo Frames is 16110.  This value coincides
+    with the maximum Jumbo Frames size of 16128.
 
-- Using Jumbo frames at 10 or 100 Mbps is not supported and may result
-  in poor performance or loss of link.
+  - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+    poor performance or loss of link.
 
-- Adapters based on the Intel(R) 82542 and 82573V/E controller do not
-  support Jumbo Frames.  These correspond to the following product names:
-  Intel(R) PRO/1000 Gigabit Server Adapter Intel(R) PRO/1000 PM Network
-  Connection
+  - Adapters based on the Intel(R) 82542 and 82573V/E controller do not
+    support Jumbo Frames. These correspond to the following product names::
+
+     Intel(R) PRO/1000 Gigabit Server Adapter
+     Intel(R) PRO/1000 PM Network Connection
 
 ethtool
 -------
-The driver utilizes the ethtool interface for driver configuration and
-diagnostics, as well as displaying statistical information.  The ethtool
-version 1.6 or later is required for this functionality.
 
-The latest release of ethtool can be found from
-https://www.kernel.org/pub/software/network/ethtool/
+  The driver utilizes the ethtool interface for driver configuration and
+  diagnostics, as well as displaying statistical information.  The ethtool
+  version 1.6 or later is required for this functionality.
+
+  The latest release of ethtool can be found from
+  https://www.kernel.org/pub/software/network/ethtool/
 
 Enabling Wake on LAN* (WoL)
 ---------------------------
-WoL is configured through the ethtool* utility.
 
-WoL will be enabled on the system during the next shut down or reboot.
-For this driver version, in order to enable WoL, the e1000 driver must be
-loaded when shutting down or rebooting the system.
+  WoL is configured through the ethtool* utility.
 
+  WoL will be enabled on the system during the next shut down or reboot.
+  For this driver version, in order to enable WoL, the e1000 driver must be
+  loaded when shutting down or rebooting the system.
 
 Support
 =======
-- 
2.17.1

^ permalink raw reply related

* [PATCH 0/9] Fix references for some missing documentation files
From: Mauro Carvalho Chehab @ 2018-06-26  9:49 UTC (permalink / raw)
  To: Linux Doc Mailing List
  Cc: Mauro Carvalho Chehab, Mauro Carvalho Chehab, linux-kernel,
	Jonathan Corbet, Jacek Anaszewski, devicetree, Ingo Molnar,
	linux-kernel, Andrew Morton, linux-leds, intel-wired-lan,
	Mark Rutland, linux-gpio, David S. Miller, James Morris,
	Jeff Kirsher, Changbin Du, Masami Hiramatsu, netdev,
	Steven Rostedt, linux-input, linu

Having nothing to do while waiting for my plane to arrive while
returning back from Japan, I ended by writing a small series of 
patches meant to reduce the number of bad Documentation/* 
links that are detected by:
	./scripts/documentation-file-ref-check

I ended by rebasing this patch series against linux-next, because
of those two patches:
	3b0c3ebe2a42 Documentation: e100: Fix docs build error
	805f16a5f12f Documentation: e1000: Fix docs build error

They basically fix documentation builds with upstream Kernel. Both
got merged on -rc2.

The first two patches in this series makes the script to ignore some
false positives.

Patches 3 to 6 corrects the location of some documentation files.

Patches 7 and 8 were actually two patches meant to fix the build
error. I ended by rebasing them over linux-next, as they fix some
troubles with the ReST syntax with causes warnings.

Patch 9 converts Documentation/trace/histogram.txt to ReST
syntax. It also had to be rebased against linux-next, due to some minor
conflicts with:
    064f35a95224 ("tracing: Fix some errors in histogram documentation")

After this series, the script still produces 16 warnings:

Documentation/devicetree/bindings/input/mtk-pmic-keys.txt: Documentation/devicetree/bindings/input/keys.txt
Documentation/devicetree/bindings/input/mtk-pmic-keys.txt: Documentation/devicetree/bindings/input/keys.txt
Documentation/devicetree/bindings/regulator/rohm,bd71837-regulator.txt: Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.txt
Documentation/devicetree/dynamic-resolution-notes.txt: Documentation/devicetree/dt-object-internal.txt
Documentation/scsi/scsi_mid_low_api.txt: Documentation/Configure.help
Documentation/translations/zh_CN/HOWTO: Documentation/DocBook/
Documentation/translations/zh_CN/basic_profiling.txt: Documentation/basic_profiling
Documentation/translations/zh_CN/basic_profiling.txt: Documentation/basic_profiling
MAINTAINERS: Documentation/fpga/
MAINTAINERS: Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt
arch/powerpc/Kconfig: Documentation/vm/protection-keys.rst
drivers/isdn/mISDN/dsp_core.c: Documentation/isdn/mISDN.cert
drivers/scsi/Kconfig: file:Documentation/scsi/tmscsim.txt
drivers/vhost/vhost.c: Documentation/virtual/lguest/lguest.c
include/linux/fs_context.h: Documentation/filesystems/mounting.txt
include/linux/lsm_hooks.h: Documentation/filesystems/mounting.txt

IMHO, the above should be fixed by the corresponding maintainers.

The ones that scarry me most are the DT binding documentation, as
the binding documentation for some stuff are likely broken.

Btw, two of the above are new on linux-next (include/linux/fs_context.h
and include/linux/lsm_hooks.h) . That makes me wander that we should
likely add some logic (or run the detect script) at checkpatch.pl or make
it to call ./scripts/documentation-file-ref-check.

Mauro Carvalho Chehab (9):
  scripts/documentation-file-ref-check: remove some false positives
  scripts/documentation-file-ref-check: ignore sched-pelt false positive
  docs: zh_CN: fix location of oops-tracing.txt
  devicectree: bindings: fix location of leds common file
  MAINTAINERS: fix location of ina2xx.txt device tree file
  gpio.h: fix location of gpio legacy documentation
  networking: e100.rst: Get rid of Sphinx warnings
  networking: e1000.rst: Get rid of Sphinx warnings
  docs: histogram.txt: convert it to ReST file format

 .../devicetree/bindings/leds/common.txt       |    2 +-
 Documentation/networking/e100.rst             |   27 +-
 Documentation/networking/e1000.rst            |  187 ++-
 Documentation/trace/events.rst                |    2 +-
 .../trace/{histogram.txt => histogram.rst}    | 1242 +++++++++--------
 Documentation/trace/index.rst                 |    1 +
 .../translations/zh_CN/oops-tracing.txt       |    4 +-
 MAINTAINERS                                   |    2 +-
 include/linux/gpio.h                          |    2 +-
 kernel/trace/Kconfig                          |    2 +-
 scripts/documentation-file-ref-check          |    6 +
 11 files changed, 767 insertions(+), 710 deletions(-)
 rename Documentation/trace/{histogram.txt => histogram.rst} (73%)

-- 
2.17.1

^ permalink raw reply

* [PATCH 7/9] networking: e100.rst: Get rid of Sphinx warnings
From: Mauro Carvalho Chehab @ 2018-06-26  9:49 UTC (permalink / raw)
  To: Linux Doc Mailing List
  Cc: Mauro Carvalho Chehab, Mauro Carvalho Chehab, linux-kernel,
	Jonathan Corbet, Jeff Kirsher, David S. Miller, intel-wired-lan,
	netdev
In-Reply-To: <cover.1530005114.git.mchehab+samsung@kernel.org>

    Documentation/networking/e100.rst:57: WARNING: Literal block expected; none found.
    Documentation/networking/e100.rst:68: WARNING: Literal block expected; none found.
    Documentation/networking/e100.rst:75: WARNING: Literal block expected; none found.
    Documentation/networking/e100.rst:84: WARNING: Literal block expected; none found.
    Documentation/networking/e100.rst:93: WARNING: Inline emphasis start-string without end-string.

While here, fix some highlights.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/networking/e100.rst | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst
index 9708f5fa76de..f81111eba9c5 100644
--- a/Documentation/networking/e100.rst
+++ b/Documentation/networking/e100.rst
@@ -47,41 +47,45 @@ Driver Configuration Parameters
 The default value for each parameter is generally the recommended setting,
 unless otherwise noted.
 
-Rx Descriptors: Number of receive descriptors. A receive descriptor is a data
+Rx Descriptors:
+   Number of receive descriptors. A receive descriptor is a data
    structure that describes a receive buffer and its attributes to the network
    controller. The data in the descriptor is used by the controller to write
    data from the controller to host memory. In the 3.x.x driver the valid range
    for this parameter is 64-256. The default value is 256. This parameter can be
    changed using the command::
 
-   ethtool -G eth? rx n
+     ethtool -G eth? rx n
 
    Where n is the number of desired Rx descriptors.
 
-Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data
+Tx Descriptors:
+   Number of transmit descriptors. A transmit descriptor is a data
    structure that describes a transmit buffer and its attributes to the network
    controller. The data in the descriptor is used by the controller to read
    data from the host memory to the controller. In the 3.x.x driver the valid
    range for this parameter is 64-256. The default value is 128. This parameter
    can be changed using the command::
 
-   ethtool -G eth? tx n
+     ethtool -G eth? tx n
 
    Where n is the number of desired Tx descriptors.
 
-Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by
+Speed/Duplex:
+   The driver auto-negotiates the link speed and duplex settings by
    default. The ethtool utility can be used as follows to force speed/duplex.::
 
-   ethtool -s eth?  autoneg off speed {10|100} duplex {full|half}
+     ethtool -s eth?  autoneg off speed {10|100} duplex {full|half}
 
    NOTE: setting the speed/duplex to incorrect values will cause the link to
    fail.
 
-Event Log Message Level:  The driver uses the message level flag to log events
+Event Log Message Level:
+   The driver uses the message level flag to log events
    to syslog. The message level can be set at driver load time. It can also be
    set using the command::
 
-   ethtool -s eth? msglvl n
+     ethtool -s eth? msglvl n
 
 
 Additional Configurations
@@ -92,7 +96,7 @@ Configuring the Driver on Different Distributions
 
 Configuring a network driver to load properly when the system is started
 is distribution dependent.  Typically, the configuration process involves
-adding an alias line to /etc/modprobe.d/*.conf as well as editing other
+adding an alias line to `/etc/modprobe.d/*.conf` as well as editing other
 system startup scripts and/or configuration files.  Many popular Linux
 distributions ship with tools to make these changes for you.  To learn
 the proper way to configure a network device for your system, refer to
@@ -160,7 +164,10 @@ This results in unbalanced receive traffic.
 If you have multiple interfaces in a server, either turn on ARP
 filtering by
 
-(1) entering:: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+(1) entering::
+
+	echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+
     (this only works if your kernel's version is higher than 2.4.5), or
 
 (2) installing the interfaces in separate broadcast domains (either
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH 00/14] ARM: davinci: step towards removing at24_platform_data
From: Bartosz Golaszewski @ 2018-06-26  9:31 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Sekhar Nori, Kevin Hilman, Russell King, Grygorii Strashko,
	David S . Miller, Srinivas Kandagatla, Lukas Wunner, Rob Herring,
	Florian Fainelli, Dan Carpenter, Ivan Khoronzhuk, David Lechner,
	Greg Kroah-Hartman, Linux ARM, Linux Kernel Mailing List,
	linux-omap, netdev, Bartosz Golaszewski
In-Reply-To: <20180626083823.GA28068@lunn.ch>

2018-06-26 10:38 GMT+02:00 Andrew Lunn <andrew@lunn.ch>:
> On Tue, Jun 26, 2018 at 09:44:19AM +0200, Bartosz Golaszewski wrote:
>> 2018-06-25 20:02 GMT+02:00 Andrew Lunn <andrew@lunn.ch>:
>> >> With my patch 1/14 you'll get -EPROBE_DEFER from nvmem_cell_get() if
>> >> the nvmem provider is not yet registered. Will that help in your case?
>> >
>> > I don't think so. My driver instantiates the AT24 device. So if i get
>> > -EPROBE_DEFER, i need to cleanup the probe, and return -EPROBDE_DEFER
>> > to the code. Which means i need to remove the AT24 device...
>> >
>> >        Andrew
>>
>> Are you sure this is the correct approach? I understand that you want
>> to use something like board files for your machine? Wouldn't it be
>> better to register a platform device for at24, register a cell lookup
>> and then depend on that cell from your driver?
>
> Hi Bartosz
>
> The machine is based around a Kontron Com Express module, with an
> intel Ivy Bridge CPU. This is then placed into a custom carrier board,
> which has a number of i2c devices.
>
> I have a platform driver which matches on the DMI system ID for the
> Kontron module.
>
> The Com Express module has a PLD which implements i2c, gpio,
> etc. There is an MFD for this, which instantiates the i2c-kempld i2c
> bus driver.
>
> My platform driver finds this i2c-kempld bus driver. If it does not
> exist yet, it return -EPROBE_DEFER. If it is found it instantiates an
> at24 device on it. I need to look at the content of the EEPROM to
> determine the hardware revision, plus do a checksum.  From that, i
> need to instantiates 1 or 2 additional AT24, up to 4 GPIO expanders,
> and i2c to spi converter, add some gpio-leds on the gpio expanders,
> create a bit-banging MDIO bus, instantiate an Ethernet switch on the
> MDIO bus, maybe add an Ethernet switch to the SPI bus, etc.
>
> As you can see, i have a chain of events. I cannot move onto the next
> part of the chain until i know the probe for the previous part has
> finished. e.g. i cannot add gpio-leds until i know the gpio expander
> has probed. But the gpio expander provides a call back, similar to the
> at24 setup(). The MDIO bus and the SPI bus has a mechanism to register
> an info structure, just like you have done for NVMEM cells. So that
> works out.
>
> The weak link in this chain is that first at24 probe, and knowing when
> i can access the nvmem cells for the revision and checksum
> information. -EPROBE_DEFER does not help me here. I need either some
> sort of blocking wait for the cells to become available, or a callback
> in a context which allows me to instantiate more devices.
>
> I also have some steps which cannot be undone. You don't provide a
> mechanism to unregister the nvmem info structure. The I2C and MDIO
> equivalent also does not provide an unregister for bus info. So once i
> register the first info structure, i'm past the point of no return. I
> cannot return -EPROBE_DEFER because i cannot unregister the info
> structures, so that i can register them again the next time the
> platform driver gets probed.
>
>          Andrew
>

I see. I see it this way: the setup callback comes from the time when
we didn't have nvmem and should go away. I will protest loud whenever
someone will try to use it again and will work towards removing it as
soon as possible.

I will give your problem a thought and will try to get back with some
proposals - maybe we should, as you suggested, extend nvmem even
further to allow to remove nvmem info entries etc.

Best regards,
Bartosz Golaszewski

^ permalink raw reply

* [PATCH net-next] cxgb4: Add new T5 PCI device id 0x50ae
From: Ganesh Goudar @ 2018-06-26  9:21 UTC (permalink / raw)
  To: netdev, davem; +Cc: nirranjan, indranil, Ganesh Goudar

Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index c7f8d04..e3adf43 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -188,6 +188,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x50ac), /* Custom T540-BT */
 	CH_PCI_ID_TABLE_FENTRY(0x50ad), /* Custom T520-CR */
+	CH_PCI_ID_TABLE_FENTRY(0x50ae), /* Custom T540-XL-SO */
 
 	/* T6 adapters:
 	 */
-- 
2.1.0

^ permalink raw reply related

* [PATCH net-next] cxgb4: Add flag tc_flower_initialized
From: Ganesh Goudar @ 2018-06-26  9:18 UTC (permalink / raw)
  To: netdev, davem; +Cc: nirranjan, indranil, Casey Leedom, Ganesh Goudar

From: Casey Leedom <leedom@chelsio.com>

Add flag tc_flower_initialized to indicate the
completion if tc flower initialization.

Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h           | 1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 8 ++++++++
 drivers/net/ethernet/chelsio/cxgb4/sched.c           | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a4ea53d..4a8cbd8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -968,6 +968,7 @@ struct adapter {
 	struct chcr_stats_debug chcr_stats;
 
 	/* TC flower offload */
+	bool tc_flower_initialized;
 	struct rhashtable flower_tbl;
 	struct rhashtable_params flower_ht_params;
 	struct timer_list flower_stats_timer;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 3ddd2c4..623f73d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -874,6 +874,9 @@ int cxgb4_init_tc_flower(struct adapter *adap)
 {
 	int ret;
 
+	if (adap->tc_flower_initialized)
+		return -EEXIST;
+
 	adap->flower_ht_params = cxgb4_tc_flower_ht_params;
 	ret = rhashtable_init(&adap->flower_tbl, &adap->flower_ht_params);
 	if (ret)
@@ -882,13 +885,18 @@ int cxgb4_init_tc_flower(struct adapter *adap)
 	INIT_WORK(&adap->flower_stats_work, ch_flower_stats_handler);
 	timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0);
 	mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
+	adap->tc_flower_initialized = true;
 	return 0;
 }
 
 void cxgb4_cleanup_tc_flower(struct adapter *adap)
 {
+	if (!adap->tc_flower_initialized)
+		return;
+
 	if (adap->flower_stats_timer.function)
 		del_timer_sync(&adap->flower_stats_timer);
 	cancel_work_sync(&adap->flower_stats_work);
 	rhashtable_destroy(&adap->flower_tbl);
+	adap->tc_flower_initialized = false;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index 9148abb..7fc6566 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -539,6 +539,9 @@ void t4_cleanup_sched(struct adapter *adap)
 		struct port_info *pi = netdev2pinfo(adap->port[j]);
 
 		s = pi->sched_tbl;
+		if (!s)
+			continue;
+
 		for (i = 0; i < s->sched_size; i++) {
 			struct sched_class *e;
 
-- 
2.1.0

^ permalink raw reply related

* Re: Request to enable setting the nested network namespace
From: Pamela Mei @ 2018-06-26  9:10 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: netdev
In-Reply-To: <20180614092754.GA2038@nanopsycho>

I don't mean to track the whole history of netns changes as mandatory.
I mean it's better to have an option to ask user to set the new parent
of the child netns, not only the initial one.
Is there any technical bottle neck on this request?

Cheers,
Pamela MEI

On Thu, Jun 14, 2018 at 5:27 PM, Jiri Pirko <jiri@resnulli.us> wrote:
> Thu, Jun 14, 2018 at 10:04:57AM CEST, pamela.mei@gmail.com wrote:
>>In linux, set up 2 network namespaces, ns1 and ns2. "ip netns list"
>>can view the 2 network namespaces.
>>Move one network device from linux root namespace to ns1 then from ns1
>>to ns2, then delete ns2,
>>expect that network device can move back to ns1,
>>but actual result is that eth1 is back to linux root network
>>namespace. I'm not sure whether it's as expected.
>>
>>Here is the detail test steps:
>>
>>1.ip netns add ns1
>>
>>2.ip netns add ns2
>>
>>3.ip link set eth1 netns ns1
>>
>>4.ip netns exec ns1 ip link set eth1 netns ns2
>>
>>5.ip netns del ns2
>>
>>Expected result: eth1 will be in ns1
>>
>>Actual result: eth1 is back in linux root namespace 1
>>
>>Question: is there any method to realize such scenario to make sure
>>device can be back to ns1 not linux root network namespace 1?
>>
>>How about if there's a function to enable nest network namespace e.g.
>>can set ns1 as the parent namespace of ns2, then device can return to
>>ns1 when ns2 is gone.
>
> You would have to track the whole history of netns changes for each
> netdevice. That does not sound right. Move back to initial netns seems
> correct to me.
>
>
>>
>>
>>Cheers,
>>
>>Pamela MEI

^ permalink raw reply

* Re: [PATCH net-next 0/6] mlxsw: Support bridge router interfaces with non-default VLAN
From: David Miller @ 2018-06-26  9:06 UTC (permalink / raw)
  To: idosch; +Cc: netdev, jiri, petrm, mlxsw
In-Reply-To: <20180625074818.17073-1-idosch@mellanox.com>

From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 Jun 2018 10:48:12 +0300

> Petr says:
> 
> When traffic is inserted on a router interface associated with an 802.1q
> bridge, the VLAN that the traffic appears on is determined by PVID of
> the bridge device itself. However currently mlxsw always configures such
> traffic to be forwarded to VLAN 1, regardless of the bridge PVID.
> 
> Fix the problem by modifying the FID-handling code to assign such
> traffic not to FID that corresponds to VLAN 1, but to a FID that
> corresponds to the configured PVID. Bail out if there is no PVID. This
> is implemented in patches #1 and #2.
> 
> From that point on, also forbid any changes to bridge device PVID,
> because such changes would not be reflected. This is implemented in
> patches #3, #4 and #5.
> 
> Finally in patch #6, introduce tests that use bridge as a routed
> interface, and test mlxsw in both the currently-supported scenario of
> using PVID 1, and the newly-supported one of using a custom PVID.

Series applied, thank you.

^ permalink raw reply

* Re: [PATCH 1/1] r8152: napi hangup fix after disconnect
From: David Miller @ 2018-06-26  9:01 UTC (permalink / raw)
  To: jslaby; +Cc: linux-kernel, linux-usb, netdev
In-Reply-To: <20180625072627.9109-1-jslaby@suse.cz>

From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 25 Jun 2018 09:26:27 +0200

> When unplugging an r8152 adapter while the interface is UP, the NIC
> becomes unusable.  usb->disconnect (aka rtl8152_disconnect) deletes
> napi. Then, rtl8152_disconnect calls unregister_netdev and that invokes
> netdev->ndo_stop (aka rtl8152_close). rtl8152_close tries to
> napi_disable, but the napi is already deleted by disconnect above. So
> the first while loop in napi_disable never finishes. This results in
> complete deadlock of the network layer as there is rtnl_mutex held by
> unregister_netdev.
> 
> So avoid the call to napi_disable in rtl8152_close when the device is
> already gone.
> 
> The other calls to usb_kill_urb, cancel_delayed_work_sync,
> netif_stop_queue etc. seem to be fine. The urb and netdev is not
> destroyed yet.
> 
> Signed-off-by: Jiri Slaby <jslaby@suse.cz>

Applied, thank you.

^ permalink raw reply

* [PATCH net] net: macb: initialize bp->queues[0].bp for at91rm9200
From: Alexandre Belloni @ 2018-06-26  8:44 UTC (permalink / raw)
  To: David S. Miller
  Cc: Rafal Ozieblo, Nicolas Ferre, netdev, linux-kernel,
	Alexandre Belloni

The macb driver currently crashes on at91rm9200 with the following trace:

Unable to handle kernel NULL pointer dereference at virtual address 00000014
[...]
[<c031da44>] (macb_rx_desc) from [<c031f2bc>] (at91ether_open+0x2e8/0x3f8)
[<c031f2bc>] (at91ether_open) from [<c041e8d8>] (__dev_open+0x120/0x13c)
[<c041e8d8>] (__dev_open) from [<c041ec08>] (__dev_change_flags+0x17c/0x1a8)
[<c041ec08>] (__dev_change_flags) from [<c041ec4c>] (dev_change_flags+0x18/0x4c)
[<c041ec4c>] (dev_change_flags) from [<c07a5f4c>] (ip_auto_config+0x220/0x10b0)
[<c07a5f4c>] (ip_auto_config) from [<c000a4fc>] (do_one_initcall+0x78/0x18c)
[<c000a4fc>] (do_one_initcall) from [<c0783e50>] (kernel_init_freeable+0x184/0x1c4)
[<c0783e50>] (kernel_init_freeable) from [<c0574d70>] (kernel_init+0x8/0xe8)
[<c0574d70>] (kernel_init) from [<c00090e0>] (ret_from_fork+0x14/0x34)

Solve that by initializing bp->queues[0].bp in at91ether_init (as is done
in macb_init).

Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues")
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/net/ethernet/cadence/macb_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 3e93df5d4e3b..96cc03a6d942 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3726,6 +3726,8 @@ static int at91ether_init(struct platform_device *pdev)
 	int err;
 	u32 reg;
 
+	bp->queues[0].bp = bp;
+
 	dev->netdev_ops = &at91ether_netdev_ops;
 	dev->ethtool_ops = &macb_ethtool_ops;
 
-- 
2.18.0

^ permalink raw reply related

* Re: [PATCH 00/14] ARM: davinci: step towards removing at24_platform_data
From: Andrew Lunn @ 2018-06-26  8:38 UTC (permalink / raw)
  To: Bartosz Golaszewski
  Cc: Rob Herring, Grygorii Strashko, David Lechner, Ivan Khoronzhuk,
	Kevin Hilman, Greg Kroah-Hartman, Sekhar Nori, Russell King,
	Linux Kernel Mailing List, Bartosz Golaszewski, Lukas Wunner,
	Srinivas Kandagatla, Linux ARM, netdev, Florian Fainelli,
	linux-omap, David S . Miller, Dan Carpenter
In-Reply-To: <CAMRc=MejDn=t6EPbKOfhMfRAb_O-5hxtWRyjAWH1mb5BUySrDA@mail.gmail.com>

On Tue, Jun 26, 2018 at 09:44:19AM +0200, Bartosz Golaszewski wrote:
> 2018-06-25 20:02 GMT+02:00 Andrew Lunn <andrew@lunn.ch>:
> >> With my patch 1/14 you'll get -EPROBE_DEFER from nvmem_cell_get() if
> >> the nvmem provider is not yet registered. Will that help in your case?
> >
> > I don't think so. My driver instantiates the AT24 device. So if i get
> > -EPROBE_DEFER, i need to cleanup the probe, and return -EPROBDE_DEFER
> > to the code. Which means i need to remove the AT24 device...
> >
> >        Andrew
> 
> Are you sure this is the correct approach? I understand that you want
> to use something like board files for your machine? Wouldn't it be
> better to register a platform device for at24, register a cell lookup
> and then depend on that cell from your driver?

Hi Bartosz

The machine is based around a Kontron Com Express module, with an
intel Ivy Bridge CPU. This is then placed into a custom carrier board,
which has a number of i2c devices.

I have a platform driver which matches on the DMI system ID for the
Kontron module.

The Com Express module has a PLD which implements i2c, gpio,
etc. There is an MFD for this, which instantiates the i2c-kempld i2c
bus driver.

My platform driver finds this i2c-kempld bus driver. If it does not
exist yet, it return -EPROBE_DEFER. If it is found it instantiates an
at24 device on it. I need to look at the content of the EEPROM to
determine the hardware revision, plus do a checksum.  From that, i
need to instantiates 1 or 2 additional AT24, up to 4 GPIO expanders,
and i2c to spi converter, add some gpio-leds on the gpio expanders,
create a bit-banging MDIO bus, instantiate an Ethernet switch on the
MDIO bus, maybe add an Ethernet switch to the SPI bus, etc.

As you can see, i have a chain of events. I cannot move onto the next
part of the chain until i know the probe for the previous part has
finished. e.g. i cannot add gpio-leds until i know the gpio expander
has probed. But the gpio expander provides a call back, similar to the
at24 setup(). The MDIO bus and the SPI bus has a mechanism to register
an info structure, just like you have done for NVMEM cells. So that
works out.

The weak link in this chain is that first at24 probe, and knowing when
i can access the nvmem cells for the revision and checksum
information. -EPROBE_DEFER does not help me here. I need either some
sort of blocking wait for the cells to become available, or a callback
in a context which allows me to instantiate more devices.

I also have some steps which cannot be undone. You don't provide a
mechanism to unregister the nvmem info structure. The I2C and MDIO
equivalent also does not provide an unregister for bus info. So once i
register the first info structure, i'm past the point of no return. I
cannot return -EPROBE_DEFER because i cannot unregister the info
structures, so that i can register them again the next time the
platform driver gets probed.

	 Andrew

^ permalink raw reply

* [PATCH net-next V3 2/2] cxgb4: Support ethtool private flags
From: Ganesh Goudar @ 2018-06-26  8:25 UTC (permalink / raw)
  To: netdev, davem
  Cc: nirranjan, indranil, venkatesh, Arjun Vynipadath, Casey Leedom,
	Ganesh Goudar

From: Arjun Vynipadath <arjun@chelsio.com>

This is used to change TX workrequests, which helps in
host->vf communication.

Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 42 ++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index f7eef93..ddb8b9e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -177,6 +177,10 @@ static char loopback_stats_strings[][ETH_GSTRING_LEN] = {
 	"bg3_frames_trunc       ",
 };
 
+static const char cxgb4_priv_flags_strings[][ETH_GSTRING_LEN] = {
+	[PRIV_FLAG_PORT_TX_VM_BIT] = "port_tx_vm_wr",
+};
+
 static int get_sset_count(struct net_device *dev, int sset)
 {
 	switch (sset) {
@@ -185,6 +189,8 @@ static int get_sset_count(struct net_device *dev, int sset)
 		       ARRAY_SIZE(adapter_stats_strings) +
 		       ARRAY_SIZE(channel_stats_strings) +
 		       ARRAY_SIZE(loopback_stats_strings);
+	case ETH_SS_PRIV_FLAGS:
+		return ARRAY_SIZE(cxgb4_priv_flags_strings);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -235,6 +241,7 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 			 FW_HDR_FW_VER_MINOR_G(exprom_vers),
 			 FW_HDR_FW_VER_MICRO_G(exprom_vers),
 			 FW_HDR_FW_VER_BUILD_G(exprom_vers));
+	info->n_priv_flags = ARRAY_SIZE(cxgb4_priv_flags_strings);
 }
 
 static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -250,6 +257,9 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data)
 		data += sizeof(channel_stats_strings);
 		memcpy(data, loopback_stats_strings,
 		       sizeof(loopback_stats_strings));
+	} else if (stringset == ETH_SS_PRIV_FLAGS) {
+		memcpy(data, cxgb4_priv_flags_strings,
+		       sizeof(cxgb4_priv_flags_strings));
 	}
 }
 
@@ -1499,6 +1509,36 @@ static int cxgb4_get_module_eeprom(struct net_device *dev,
 			 offset, len, &data[eprom->len - len]);
 }
 
+static u32 cxgb4_get_priv_flags(struct net_device *netdev)
+{
+	struct port_info *pi = netdev_priv(netdev);
+	struct adapter *adapter = pi->adapter;
+
+	return (adapter->eth_flags | pi->eth_flags);
+}
+
+/**
+ *	set_flags - set/unset specified flags if passed in new_flags
+ *	@cur_flags: pointer to current flags
+ *	@new_flags: new incoming flags
+ *	@flags: set of flags to set/unset
+ */
+static inline void set_flags(u32 *cur_flags, u32 new_flags, u32 flags)
+{
+	*cur_flags = (*cur_flags & ~flags) | (new_flags & flags);
+}
+
+static int cxgb4_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct port_info *pi = netdev_priv(netdev);
+	struct adapter *adapter = pi->adapter;
+
+	set_flags(&adapter->eth_flags, flags, PRIV_FLAGS_ADAP);
+	set_flags(&pi->eth_flags, flags, PRIV_FLAGS_PORT);
+
+	return 0;
+}
+
 static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_link_ksettings = get_link_ksettings,
 	.set_link_ksettings = set_link_ksettings,
@@ -1535,6 +1575,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.get_dump_data     = get_dump_data,
 	.get_module_info   = cxgb4_get_module_info,
 	.get_module_eeprom = cxgb4_get_module_eeprom,
+	.get_priv_flags    = cxgb4_get_priv_flags,
+	.set_priv_flags    = cxgb4_set_priv_flags,
 };
 
 void cxgb4_set_ethtool_ops(struct net_device *netdev)
-- 
2.1.0

^ permalink raw reply related

* [PATCH net-next V3 1/2] cxgb4: Add support for FW_ETH_TX_PKT_VM_WR
From: Ganesh Goudar @ 2018-06-26  8:24 UTC (permalink / raw)
  To: netdev, davem
  Cc: nirranjan, indranil, venkatesh, Arjun Vynipadath, Casey Leedom,
	Ganesh Goudar

From: Arjun Vynipadath <arjun@chelsio.com>

The present TX workrequest(FW_ETH_TX_PKT_WR) cant be used for
host->vf communication, since it doesn't loopback the outgoing
packets to virtual interfaces on the same port. This can be done using
FW_ETH_TX_PKT_VM_WR.
This fix depends on ethtool_flags to determine what WR to use for
TX path. Support for setting this flags by user is added in next commit.

Based on the original work by : Casey Leedom <leedom@chelsio.com>

V3
- Made eth_flags type consistent across struct adapter and
  struct port_info.
V2
- Renamed t4_eth_xmit() and t4vf_eth_xmit(), since some compilers
  were warning about conflicting definition in cxgb4vf driver

Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      |  13 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c        | 372 +++++++++++++++++++++++-
 3 files changed, 383 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 1adb968..a4ea53d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -522,6 +522,15 @@ enum {
 	MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
 };
 
+enum {
+	PRIV_FLAG_PORT_TX_VM_BIT,
+};
+
+#define PRIV_FLAG_PORT_TX_VM		BIT(PRIV_FLAG_PORT_TX_VM_BIT)
+
+#define PRIV_FLAGS_ADAP			0
+#define PRIV_FLAGS_PORT			PRIV_FLAG_PORT_TX_VM
+
 struct adapter;
 struct sge_rspq;
 
@@ -558,6 +567,7 @@ struct port_info {
 	struct hwtstamp_config tstamp_config;
 	bool ptp_enable;
 	struct sched_table *sched_tbl;
+	u32 eth_flags;
 };
 
 struct dentry;
@@ -868,6 +878,7 @@ struct adapter {
 	unsigned int flags;
 	unsigned int adap_idx;
 	enum chip_type chip;
+	u32 eth_flags;
 
 	int msg_enable;
 	__be16 vxlan_port;
@@ -1334,7 +1345,7 @@ void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat);
 void t4_free_sge_resources(struct adapter *adap);
 void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q);
 irq_handler_t t4_intr_handler(struct adapter *adap);
-netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 		     const struct pkt_gl *gl);
 int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index bc03c17..d3b0f9c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3217,7 +3217,7 @@ static netdev_features_t cxgb_fix_features(struct net_device *dev,
 static const struct net_device_ops cxgb4_netdev_ops = {
 	.ndo_open             = cxgb_open,
 	.ndo_stop             = cxgb_close,
-	.ndo_start_xmit       = t4_eth_xmit,
+	.ndo_start_xmit       = t4_start_xmit,
 	.ndo_select_queue     =	cxgb_select_queue,
 	.ndo_get_stats64      = cxgb_get_stats,
 	.ndo_set_rx_mode      = cxgb_set_rxmode,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 395e2a0..f1311fd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1288,13 +1288,13 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
 }
 
 /**
- *	t4_eth_xmit - add a packet to an Ethernet Tx queue
+ *	cxgb4_eth_xmit - add a packet to an Ethernet Tx queue
  *	@skb: the packet
  *	@dev: the egress net device
  *
  *	Add a packet to an SGE Ethernet Tx queue.  Runs with softirqs disabled.
  */
-netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	u32 wr_mid, ctrl0, op;
 	u64 cntrl, *end, *sgl;
@@ -1547,6 +1547,374 @@ out_free:	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
 
+/* Constants ... */
+enum {
+	/* Egress Queue sizes, producer and consumer indices are all in units
+	 * of Egress Context Units bytes.  Note that as far as the hardware is
+	 * concerned, the free list is an Egress Queue (the host produces free
+	 * buffers which the hardware consumes) and free list entries are
+	 * 64-bit PCI DMA addresses.
+	 */
+	EQ_UNIT = SGE_EQ_IDXSIZE,
+	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
+	TXD_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
+
+	T4VF_ETHTXQ_MAX_HDR = (sizeof(struct fw_eth_tx_pkt_vm_wr) +
+			       sizeof(struct cpl_tx_pkt_lso_core) +
+			       sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64),
+};
+
+/**
+ *	t4vf_is_eth_imm - can an Ethernet packet be sent as immediate data?
+ *	@skb: the packet
+ *
+ *	Returns whether an Ethernet packet is small enough to fit completely as
+ *	immediate data.
+ */
+static inline int t4vf_is_eth_imm(const struct sk_buff *skb)
+{
+	/* The VF Driver uses the FW_ETH_TX_PKT_VM_WR firmware Work Request
+	 * which does not accommodate immediate data.  We could dike out all
+	 * of the support code for immediate data but that would tie our hands
+	 * too much if we ever want to enhace the firmware.  It would also
+	 * create more differences between the PF and VF Drivers.
+	 */
+	return false;
+}
+
+/**
+ *	t4vf_calc_tx_flits - calculate the number of flits for a packet TX WR
+ *	@skb: the packet
+ *
+ *	Returns the number of flits needed for a TX Work Request for the
+ *	given Ethernet packet, including the needed WR and CPL headers.
+ */
+static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb)
+{
+	unsigned int flits;
+
+	/* If the skb is small enough, we can pump it out as a work request
+	 * with only immediate data.  In that case we just have to have the
+	 * TX Packet header plus the skb data in the Work Request.
+	 */
+	if (t4vf_is_eth_imm(skb))
+		return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt),
+				    sizeof(__be64));
+
+	/* Otherwise, we're going to have to construct a Scatter gather list
+	 * of the skb body and fragments.  We also include the flits necessary
+	 * for the TX Packet Work Request and CPL.  We always have a firmware
+	 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+	 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+	 * message or, if we're doing a Large Send Offload, an LSO CPL message
+	 * with an embedded TX Packet Write CPL message.
+	 */
+	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+	if (skb_shinfo(skb)->gso_size)
+		flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
+			  sizeof(struct cpl_tx_pkt_lso_core) +
+			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+	else
+		flits += (sizeof(struct fw_eth_tx_pkt_vm_wr) +
+			  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+	return flits;
+}
+
+/**
+ *	cxgb4_vf_eth_xmit - add a packet to an Ethernet TX queue
+ *	@skb: the packet
+ *	@dev: the egress net device
+ *
+ *	Add a packet to an SGE Ethernet TX queue.  Runs with softirqs disabled.
+ */
+static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
+{
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	const struct skb_shared_info *ssi;
+	struct fw_eth_tx_pkt_vm_wr *wr;
+	int qidx, credits, max_pkt_len;
+	const size_t fw_hdr_copy_len;
+	struct cpl_tx_pkt_core *cpl;
+	const struct port_info *pi;
+	unsigned int flits, ndesc;
+	struct sge_eth_txq *txq;
+	struct adapter *adapter;
+	u64 cntrl, *end;
+	u32 wr_mid;
+
+	fw_hdr_copy_len = (sizeof(wr->ethmacdst) + sizeof(wr->ethmacsrc) +
+			   sizeof(wr->ethtype) + sizeof(wr->vlantci));
+
+	/* The chip minimum packet length is 10 octets but the firmware
+	 * command that we are using requires that we copy the Ethernet header
+	 * (including the VLAN tag) into the header so we reject anything
+	 * smaller than that ...
+	 */
+	if (unlikely(skb->len < fw_hdr_copy_len))
+		goto out_free;
+
+	/* Discard the packet if the length is greater than mtu */
+	max_pkt_len = ETH_HLEN + dev->mtu;
+	if (skb_vlan_tag_present(skb))
+		max_pkt_len += VLAN_HLEN;
+	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
+		goto out_free;
+
+	/* Figure out which TX Queue we're going to use. */
+	pi = netdev_priv(dev);
+	adapter = pi->adapter;
+	qidx = skb_get_queue_mapping(skb);
+	WARN_ON(qidx >= pi->nqsets);
+	txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
+
+	/* Take this opportunity to reclaim any TX Descriptors whose DMA
+	 * transfers have completed.
+	 */
+	cxgb4_reclaim_completed_tx(adapter, &txq->q, true);
+
+	/* Calculate the number of flits and TX Descriptors we're going to
+	 * need along with how many TX Descriptors will be left over after
+	 * we inject our Work Request.
+	 */
+	flits = t4vf_calc_tx_flits(skb);
+	ndesc = flits_to_desc(flits);
+	credits = txq_avail(&txq->q) - ndesc;
+
+	if (unlikely(credits < 0)) {
+		/* Not enough room for this packet's Work Request.  Stop the
+		 * TX Queue and return a "busy" condition.  The queue will get
+		 * started later on when the firmware informs us that space
+		 * has opened up.
+		 */
+		eth_txq_stop(txq);
+		dev_err(adapter->pdev_dev,
+			"%s: TX ring %u full while queue awake!\n",
+			dev->name, qidx);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (!t4vf_is_eth_imm(skb) &&
+	    unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) {
+		/* We need to map the skb into PCI DMA space (because it can't
+		 * be in-lined directly into the Work Request) and the mapping
+		 * operation failed.  Record the error and drop the packet.
+		 */
+		txq->mapping_err++;
+		goto out_free;
+	}
+
+	wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		/* After we're done injecting the Work Request for this
+		 * packet, we'll be below our "stop threshold" so stop the TX
+		 * Queue now and schedule a request for an SGE Egress Queue
+		 * Update message.  The queue will get started later on when
+		 * the firmware processes this Work Request and sends us an
+		 * Egress Queue Status Update message indicating that space
+		 * has opened up.
+		 */
+		eth_txq_stop(txq);
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+
+	/* Start filling in our Work Request.  Note that we do _not_ handle
+	 * the WR Header wrapping around the TX Descriptor Ring.  If our
+	 * maximum header size ever exceeds one TX Descriptor, we'll need to
+	 * do something else here.
+	 */
+	WARN_ON(DIV_ROUND_UP(T4VF_ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
+	wr = (void *)&txq->q.desc[txq->q.pidx];
+	wr->equiq_to_len16 = cpu_to_be32(wr_mid);
+	wr->r3[0] = cpu_to_be32(0);
+	wr->r3[1] = cpu_to_be32(0);
+	skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
+	end = (u64 *)wr + flits;
+
+	/* If this is a Large Send Offload packet we'll put in an LSO CPL
+	 * message with an encapsulated TX Packet CPL message.  Otherwise we
+	 * just use a TX Packet CPL message.
+	 */
+	ssi = skb_shinfo(skb);
+	if (ssi->gso_size) {
+		struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
+		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
+		int l3hdr_len = skb_network_header_len(skb);
+		int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
+
+		wr->op_immdlen =
+			cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
+				    FW_WR_IMMDLEN_V(sizeof(*lso) +
+						    sizeof(*cpl)));
+		 /* Fill in the LSO CPL message. */
+		lso->lso_ctrl =
+			cpu_to_be32(LSO_OPCODE_V(CPL_TX_PKT_LSO) |
+				    LSO_FIRST_SLICE_F |
+				    LSO_LAST_SLICE_F |
+				    LSO_IPV6_V(v6) |
+				    LSO_ETHHDR_LEN_V(eth_xtra_len / 4) |
+				    LSO_IPHDR_LEN_V(l3hdr_len / 4) |
+				    LSO_TCPHDR_LEN_V(tcp_hdr(skb)->doff));
+		lso->ipid_ofst = cpu_to_be16(0);
+		lso->mss = cpu_to_be16(ssi->gso_size);
+		lso->seqno_offset = cpu_to_be32(0);
+		if (is_t4(adapter->params.chip))
+			lso->len = cpu_to_be32(skb->len);
+		else
+			lso->len = cpu_to_be32(LSO_T5_XFER_SIZE_V(skb->len));
+
+		/* Set up TX Packet CPL pointer, control word and perform
+		 * accounting.
+		 */
+		cpl = (void *)(lso + 1);
+
+		if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5)
+			cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len);
+		else
+			cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len);
+
+		cntrl |= TXPKT_CSUM_TYPE_V(v6 ?
+					   TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) |
+			 TXPKT_IPHDR_LEN_V(l3hdr_len);
+		txq->tso++;
+		txq->tx_cso += ssi->gso_segs;
+	} else {
+		int len;
+
+		len = (t4vf_is_eth_imm(skb)
+		       ? skb->len + sizeof(*cpl)
+		       : sizeof(*cpl));
+		wr->op_immdlen =
+			cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_PKT_VM_WR) |
+				    FW_WR_IMMDLEN_V(len));
+
+		/* Set up TX Packet CPL pointer, control word and perform
+		 * accounting.
+		 */
+		cpl = (void *)(wr + 1);
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			cntrl = hwcsum(adapter->params.chip, skb) |
+				TXPKT_IPCSUM_DIS_F;
+			txq->tx_cso++;
+		} else {
+			cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+		}
+	}
+
+	/* If there's a VLAN tag present, add that to the list of things to
+	 * do in this Work Request.
+	 */
+	if (skb_vlan_tag_present(skb)) {
+		txq->vlan_ins++;
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+	}
+
+	 /* Fill in the TX Packet CPL message header. */
+	cpl->ctrl0 = cpu_to_be32(TXPKT_OPCODE_V(CPL_TX_PKT_XT) |
+				 TXPKT_INTF_V(pi->port_id) |
+				 TXPKT_PF_V(0));
+	cpl->pack = cpu_to_be16(0);
+	cpl->len = cpu_to_be16(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	/* Fill in the body of the TX Packet CPL message with either in-lined
+	 * data or a Scatter/Gather List.
+	 */
+	if (t4vf_is_eth_imm(skb)) {
+		/* In-line the packet's data and free the skb since we don't
+		 * need it any longer.
+		 */
+		cxgb4_inline_tx_skb(skb, &txq->q, cpl + 1);
+		dev_consume_skb_any(skb);
+	} else {
+		/* Write the skb's Scatter/Gather list into the TX Packet CPL
+		 * message and retain a pointer to the skb so we can free it
+		 * later when its DMA completes.  (We store the skb pointer
+		 * in the Software Descriptor corresponding to the last TX
+		 * Descriptor used by the Work Request.)
+		 *
+		 * The retained skb will be freed when the corresponding TX
+		 * Descriptors are reclaimed after their DMAs complete.
+		 * However, this could take quite a while since, in general,
+		 * the hardware is set up to be lazy about sending DMA
+		 * completion notifications to us and we mostly perform TX
+		 * reclaims in the transmit routine.
+		 *
+		 * This is good for performamce but means that we rely on new
+		 * TX packets arriving to run the destructors of completed
+		 * packets, which open up space in their sockets' send queues.
+		 * Sometimes we do not get such new packets causing TX to
+		 * stall.  A single UDP transmitter is a good example of this
+		 * situation.  We have a clean up timer that periodically
+		 * reclaims completed packets but it doesn't run often enough
+		 * (nor do we want it to) to prevent lengthy stalls.  A
+		 * solution to this problem is to run the destructor early,
+		 * after the packet is queued but before it's DMAd.  A con is
+		 * that we lie to socket memory accounting, but the amount of
+		 * extra memory is reasonable (limited by the number of TX
+		 * descriptors), the packets do actually get freed quickly by
+		 * new packets almost always, and for protocols like TCP that
+		 * wait for acks to really free up the data the extra memory
+		 * is even less.  On the positive side we run the destructors
+		 * on the sending CPU rather than on a potentially different
+		 * completing CPU, usually a good thing.
+		 *
+		 * Run the destructor before telling the DMA engine about the
+		 * packet to make sure it doesn't complete and get freed
+		 * prematurely.
+		 */
+		struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1);
+		struct sge_txq *tq = &txq->q;
+		int last_desc;
+
+		/* If the Work Request header was an exact multiple of our TX
+		 * Descriptor length, then it's possible that the starting SGL
+		 * pointer lines up exactly with the end of our TX Descriptor
+		 * ring.  If that's the case, wrap around to the beginning
+		 * here ...
+		 */
+		if (unlikely((void *)sgl == (void *)tq->stat)) {
+			sgl = (void *)tq->desc;
+			end = (void *)((void *)tq->desc +
+				       ((void *)end - (void *)tq->stat));
+		}
+
+		cxgb4_write_sgl(skb, tq, sgl, end, 0, addr);
+		skb_orphan(skb);
+
+		last_desc = tq->pidx + ndesc - 1;
+		if (last_desc >= tq->size)
+			last_desc -= tq->size;
+		tq->sdesc[last_desc].skb = skb;
+		tq->sdesc[last_desc].sgl = sgl;
+	}
+
+	/* Advance our internal TX Queue state, tell the hardware about
+	 * the new TX descriptors and return success.
+	 */
+	txq_advance(&txq->q, ndesc);
+
+	cxgb4_ring_tx_db(adapter, &txq->q, ndesc);
+	return NETDEV_TX_OK;
+
+out_free:
+	/* An error of some sort happened.  Free the TX skb and tell the
+	 * OS that we've "dealt" with the packet ...
+	 */
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+netdev_tx_t t4_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct port_info *pi = netdev_priv(dev);
+
+	if (unlikely(pi->eth_flags & PRIV_FLAG_PORT_TX_VM))
+		return cxgb4_vf_eth_xmit(skb, dev);
+
+	return cxgb4_eth_xmit(skb, dev);
+}
+
 /**
  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
  *	@q: the SGE control Tx queue
-- 
2.1.0

^ permalink raw reply related

* Re: [PATCH v2 net] nfp: cast sizeof() to int when comparing with error code
From: cgxu519 @ 2018-06-26  8:21 UTC (permalink / raw)
  To: Julia Lawall
  Cc: Joe Perches, jakub.kicinski, davem, LKML, cocci, oss-drivers,
	netdev, Dmitry Torokhov, linux-input, linux-s390
In-Reply-To: <alpine.DEB.2.20.1806261005190.4512@hadrien>

On 06/26/2018 04:06 PM, Julia Lawall wrote:
>
> On Mon, 25 Jun 2018, Joe Perches wrote:
>
>> On Tue, 2018-06-26 at 09:16 +0800, Chengguang Xu wrote:
>>> sizeof() will return unsigned value so in the error check
>>> negative error code will be always larger than sizeof().
>> This looks like a general class of error in the kernel
>> where a signed result that could be returning a -errno
>> is tested against < or <= sizeof()
>>
>> A couple examples:
>>
>> drivers/input/mouse/elan_i2c_smbus.c:
>>
>> 		len = i2c_smbus_read_block_data(client,
>> 						ETP_SMBUS_IAP_PASSWORD_READ,
>> 						val);
>> 		if (len < sizeof(u16)) {
>>
>> i2c_smbus_read_block_data can return a negative errno
>>
>>
>> net/smc/smc_clc.c:
>>
>> 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
>> 			     sizeof(struct smc_clc_msg_decline));
>> 	if (len < sizeof(struct smc_clc_msg_decline))
>>
>> where kernel_sendmsg can return a negative errno
>>
>> There are probably others, I didn't look hard.
>>
>> Perhaps a cocci script to find these could be generated?
> Here's another one:
>
> drivers/usb/serial/ir-usb.c
> @@ -126,13 +126,8 @@ irda_usb_find_class_desc(struct usb_seri
>   	if (!desc)
>   		return NULL;
>
> -	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
> -			USB_REQ_CS_IRDA_GET_CLASS_DESC,
> -			USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
> -			0, ifnum, desc, sizeof(*desc), 1000);
>
>   	dev_dbg(&serial->dev->dev, "%s -  ret=%d\n", __func__, ret);
> -	if (ret < sizeof(*desc)) {
>   		dev_dbg(&serial->dev->dev,
>   			"%s - class descriptor read %s (%d)\n", __func__,
>   			(ret < 0) ? "failed" : "too short", ret);
>
> There are other results, but I haven't checked all of them.

Hi Julia,

Thanks for your check. I posted a patch yesterday to fix three places in 
usb subsystem
and the patch is just in queue now, so you can skip these places.

The detail of patch.
---

diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 24b06c7e5e2d..7643716b5299 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -132,7 +132,7 @@ irda_usb_find_class_desc(struct usb_serial *serial, unsigned int ifnum)
  			0, ifnum, desc, sizeof(*desc), 1000);
  
  	dev_dbg(&serial->dev->dev, "%s -  ret=%d\n", __func__, ret);
-	if (ret < sizeof(*desc)) {
+	if (ret < (int)sizeof(*desc)) {
  		dev_dbg(&serial->dev->dev,
  			"%s - class descriptor read %s (%d)\n", __func__,
  			(ret < 0) ? "failed" : "too short", ret);
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index 958e12e1e7c7..ff2322ea5e14 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -194,7 +194,7 @@ static inline int qt2_getregister(struct usb_device *dev,
  	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
  			      QT_SET_GET_REGISTER, 0xc0, reg,
  			      uart, data, sizeof(*data), QT2_USB_TIMEOUT);
-	if (ret < sizeof(*data)) {
+	if (ret < (int)sizeof(*data)) {
  		if (ret >= 0)
  			ret = -EIO;
  	}
diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c
index 2083c267787b..0900b47b5f57 100644
--- a/drivers/usb/serial/ssu100.c
+++ b/drivers/usb/serial/ssu100.c
@@ -104,7 +104,7 @@ static inline int ssu100_getregister(struct usb_device *dev,
  	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
  			      QT_SET_GET_REGISTER, 0xc0, reg,
  			      uart, data, sizeof(*data), 300);
-	if (ret < sizeof(*data)) {
+	if (ret < (int)sizeof(*data)) {
  		if (ret >= 0)
  			ret = -EIO;
  	}
---




Thanks,
Chengguang.

^ permalink raw reply related

* [PATCH v3] net: ethernet: stmmac: dwmac-rk: Add GMAC support for px30
From: David Wu @ 2018-06-26  8:19 UTC (permalink / raw)
  To: davem, heiko, robh+dt
  Cc: mark.rutland, huangtao, netdev, linux-kernel, linux-rockchip,
	David Wu, linux-arm-kernel

Add constants and callback functions for the dwmac on px30 Soc.
The base structure is the same, but registers and the bits in
them are moved slightly, and add the clk_mac_speed for selecting
mac speed.

Signed-off-by: David Wu <david.wu@rock-chips.com>
---
Change in v3:
- Add the clock enable/disable for clk_mac_speed.

Change in v2:
- Fix some error in commit title and message.

 .../devicetree/bindings/net/rockchip-dwmac.txt     |  1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c     | 71 ++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index 9c16ee2..3b71da7 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -4,6 +4,7 @@ The device node has following properties.
 
 Required properties:
  - compatible: should be "rockchip,<name>-gamc"
+   "rockchip,px30-gmac":   found on PX30 SoCs
    "rockchip,rk3128-gmac": found on RK312x SoCs
    "rockchip,rk3228-gmac": found on RK322x SoCs
    "rockchip,rk3288-gmac": found on RK3288 SoCs
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 13133b3..fc5fef7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -61,6 +61,7 @@ struct rk_priv_data {
 	struct clk *mac_clk_tx;
 	struct clk *clk_mac_ref;
 	struct clk *clk_mac_refout;
+	struct clk *clk_mac_speed;
 	struct clk *aclk_mac;
 	struct clk *pclk_mac;
 	struct clk *clk_phy;
@@ -83,6 +84,64 @@ struct rk_priv_data {
 	(((tx) ? soc##_GMAC_TXCLK_DLY_ENABLE : soc##_GMAC_TXCLK_DLY_DISABLE) | \
 	 ((rx) ? soc##_GMAC_RXCLK_DLY_ENABLE : soc##_GMAC_RXCLK_DLY_DISABLE))
 
+#define PX30_GRF_GMAC_CON1		0x0904
+
+/* PX30_GRF_GMAC_CON1 */
+#define PX30_GMAC_PHY_INTF_SEL_RMII	(GRF_CLR_BIT(4) | GRF_CLR_BIT(5) | \
+					 GRF_BIT(6))
+#define PX30_GMAC_SPEED_10M		GRF_CLR_BIT(2)
+#define PX30_GMAC_SPEED_100M		GRF_BIT(2)
+
+static void px30_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+		     PX30_GMAC_PHY_INTF_SEL_RMII);
+}
+
+static void px30_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+	int ret;
+
+	if (IS_ERR(bsp_priv->clk_mac_speed)) {
+		dev_err(dev, "%s: Missing clk_mac_speed clock\n", __func__);
+		return;
+	}
+
+	if (speed == 10) {
+		regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+			     PX30_GMAC_SPEED_10M);
+
+		ret = clk_set_rate(bsp_priv->clk_mac_speed, 2500000);
+		if (ret)
+			dev_err(dev, "%s: set clk_mac_speed rate 2500000 failed: %d\n",
+				__func__, ret);
+	} else if (speed == 100) {
+		regmap_write(bsp_priv->grf, PX30_GRF_GMAC_CON1,
+			     PX30_GMAC_SPEED_100M);
+
+		ret = clk_set_rate(bsp_priv->clk_mac_speed, 25000000);
+		if (ret)
+			dev_err(dev, "%s: set clk_mac_speed rate 25000000 failed: %d\n",
+				__func__, ret);
+
+	} else {
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+	}
+}
+
+static const struct rk_gmac_ops px30_ops = {
+	.set_to_rmii = px30_set_to_rmii,
+	.set_rmii_speed = px30_set_rmii_speed,
+};
+
 #define RK3128_GRF_MAC_CON0	0x0168
 #define RK3128_GRF_MAC_CON1	0x016c
 
@@ -1042,6 +1101,10 @@ static int rk_gmac_clk_init(struct plat_stmmacenet_data *plat)
 		}
 	}
 
+	bsp_priv->clk_mac_speed = devm_clk_get(dev, "clk_mac_speed");
+	if (IS_ERR(bsp_priv->clk_mac_speed))
+		dev_err(dev, "cannot get clock %s\n", "clk_mac_speed");
+
 	if (bsp_priv->clock_input) {
 		dev_info(dev, "clock input from PHY\n");
 	} else {
@@ -1094,6 +1157,9 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
 			if (!IS_ERR(bsp_priv->mac_clk_tx))
 				clk_prepare_enable(bsp_priv->mac_clk_tx);
 
+			if (!IS_ERR(bsp_priv->clk_mac_speed))
+				clk_prepare_enable(bsp_priv->clk_mac_speed);
+
 			/**
 			 * if (!IS_ERR(bsp_priv->clk_mac))
 			 *	clk_prepare_enable(bsp_priv->clk_mac);
@@ -1128,6 +1194,10 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
 
 			if (!IS_ERR(bsp_priv->mac_clk_tx))
 				clk_disable_unprepare(bsp_priv->mac_clk_tx);
+
+			if (!IS_ERR(bsp_priv->clk_mac_speed))
+				clk_disable_unprepare(bsp_priv->clk_mac_speed);
+
 			/**
 			 * if (!IS_ERR(bsp_priv->clk_mac))
 			 *	clk_disable_unprepare(bsp_priv->clk_mac);
@@ -1424,6 +1494,7 @@ static int rk_gmac_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume);
 
 static const struct of_device_id rk_gmac_dwmac_match[] = {
+	{ .compatible = "rockchip,px30-gmac",	.data = &px30_ops   },
 	{ .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops },
 	{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
 	{ .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v2 net] nfp: cast sizeof() to int when comparing with error code
From: Julia Lawall @ 2018-06-26  8:06 UTC (permalink / raw)
  To: Joe Perches
  Cc: linux-s390, jakub.kicinski, Chengguang Xu, netdev,
	Dmitry Torokhov, LKML, davem, oss-drivers, linux-input, cocci
In-Reply-To: <ff0b2834b771a467246b9f7c1b4600c38f5bb463.camel@perches.com>



On Mon, 25 Jun 2018, Joe Perches wrote:

> On Tue, 2018-06-26 at 09:16 +0800, Chengguang Xu wrote:
> > sizeof() will return unsigned value so in the error check
> > negative error code will be always larger than sizeof().
>
> This looks like a general class of error in the kernel
> where a signed result that could be returning a -errno
> is tested against < or <= sizeof()
>
> A couple examples:
>
> drivers/input/mouse/elan_i2c_smbus.c:
>
> 		len = i2c_smbus_read_block_data(client,
> 						ETP_SMBUS_IAP_PASSWORD_READ,
> 						val);
> 		if (len < sizeof(u16)) {
>
> i2c_smbus_read_block_data can return a negative errno
>
>
> net/smc/smc_clc.c:
>
> 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
> 			     sizeof(struct smc_clc_msg_decline));
> 	if (len < sizeof(struct smc_clc_msg_decline))
>
> where kernel_sendmsg can return a negative errno
>
> There are probably others, I didn't look hard.
>
> Perhaps a cocci script to find these could be generated?

Here's another one:

drivers/usb/serial/ir-usb.c
@@ -126,13 +126,8 @@ irda_usb_find_class_desc(struct usb_seri
 	if (!desc)
 		return NULL;

-	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
-			USB_REQ_CS_IRDA_GET_CLASS_DESC,
-			USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			0, ifnum, desc, sizeof(*desc), 1000);

 	dev_dbg(&serial->dev->dev, "%s -  ret=%d\n", __func__, ret);
-	if (ret < sizeof(*desc)) {
 		dev_dbg(&serial->dev->dev,
 			"%s - class descriptor read %s (%d)\n", __func__,
 			(ret < 0) ? "failed" : "too short", ret);

There are other results, but I haven't checked all of them.

julia

^ permalink raw reply

* [patch net-next v2 9/9] selftests: forwarding: add tests for TC chain templates
From: Jiri Pirko @ 2018-06-26  8:00 UTC (permalink / raw)
  To: netdev
  Cc: davem, jhs, xiyou.wangcong, jakub.kicinski, simon.horman,
	john.hurley, dsahern, mlxsw
In-Reply-To: <20180626080000.12964-1-jiri@resnulli.us>

From: Jiri Pirko <jiri@mellanox.com>

Add basic sanity tests for TC chain templates.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 tools/testing/selftests/net/forwarding/lib.sh      |   9 ++
 .../selftests/net/forwarding/tc_chaintemplates.sh  | 160 +++++++++++++++++++++
 2 files changed, 169 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_chaintemplates.sh

diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index a736d1d7ecdb..128a5b5a8ea9 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -39,6 +39,15 @@ check_tc_shblock_support()
 	fi
 }
 
+check_tc_chaintemplate_support()
+{
+	tc filter help 2>&1|grep template &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing chain template support"
+		exit 1
+	fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
 	echo "SKIP: need root privileges"
 	exit 0
diff --git a/tools/testing/selftests/net/forwarding/tc_chaintemplates.sh b/tools/testing/selftests/net/forwarding/tc_chaintemplates.sh
new file mode 100755
index 000000000000..21f2c18e973a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chaintemplates.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="template_create_destroy template_filter_fits \
+	   template_create_nonempty template_destroy_nonempty"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+template_create_destroy()
+{
+	RET=0
+
+	tc filter template add dev $h2 ingress protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF
+	check_err $? "Failed to create template for default chain"
+
+	tc filter template add dev $h2 ingress chain 1 protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF
+	check_err $? "Failed to create template for chain 1"
+
+	tc filter template del dev $h2 ingress
+	check_err $? "Failed to destroy template for default chain"
+
+	tc filter template del dev $h2 ingress chain 1
+	check_err $? "Failed to destroy template for chain 1"
+
+	log_test "template create destroy"
+}
+
+template_filter_fits()
+{
+	RET=0
+
+	tc filter template add dev $h2 ingress protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+	tc filter template add dev $h2 ingress chain 1 protocol ip \
+		flower src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower dst_mac $h2mac action drop
+	check_err $? "Failed to insert filter which fits template"
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
+		flower src_mac $h2mac action drop &> /dev/null
+	check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower src_mac $h2mac action drop
+	check_err $? "Failed to insert filter which fits template"
+
+	tc filter add dev $h2 ingress chain 1protocol ip pref 1 handle 1102 \
+		flower dst_mac $h2mac action drop &> /dev/null
+	check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+		flower &> /dev/null
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower &> /dev/null
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1102 \
+		flower &> /dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower &> /dev/null
+
+	tc filter template del dev $h2 ingress chain 1
+	tc filter template del dev $h2 ingress
+
+	log_test "template filter fits"
+}
+
+template_create_nonempty()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower dst_mac $h2mac action drop
+	tc filter template add dev $h2 ingress protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+	check_fail $? "Incorrectly succeded to create template for non-empty chain"
+
+	tc filter template del dev $h2 ingress &> /dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 flower
+
+	log_test "template create non-empty"
+}
+
+template_destroy_nonempty()
+{
+	RET=0
+
+	tc filter template add dev $h2 ingress protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower dst_mac $h2mac action drop
+
+	tc filter template del dev $h2 ingress &> /dev/null
+	check_fail $? "Incorrectly succeded to destroy template for non-empty chain"
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 flower
+	tc filter template del dev $h2 ingress &> /dev/null
+	check_err $? "Failed to destroy template for empty chain"
+
+	log_test "template destroy non-empty"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+check_tc_chaintemplate_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
2.14.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox