Netdev List
 help / color / mirror / Atom feed
* [net-next 08/14] igc: Remove unused code
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

Remove unused igc_adv_data_desc definition from igc_base.h file.
Descriptors definition will be added per demand.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.h | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index a5d3f57274a8..76d4991d7284 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -36,28 +36,6 @@ union igc_adv_tx_desc {
 
 #define IGC_RAR_ENTRIES		16
 
-struct igc_adv_data_desc {
-	__le64 buffer_addr;    /* Address of the descriptor's data buffer */
-	union {
-		u32 data;
-		struct {
-			u32 datalen:16; /* Data buffer length */
-			u32 rsvd:4;
-			u32 dtyp:4;  /* Descriptor type */
-			u32 dcmd:8;  /* Descriptor command */
-		} config;
-	} lower;
-	union {
-		u32 data;
-		struct {
-			u32 status:4;  /* Descriptor status */
-			u32 idx:4;
-			u32 popts:6;  /* Packet Options */
-			u32 paylen:18; /* Payload length */
-		} options;
-	} upper;
-};
-
 /* Receive Descriptor - Advanced */
 union igc_adv_rx_desc {
 	struct {
-- 
2.20.1


^ permalink raw reply related

* [net-next 04/14] igc: Fix code redundancy
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

Remove redundant igc_check_for_link_base code and replace it with
an igc_check_for_copper_link method.
Fix duplication of IGC_ADVTXD_PAYLEN_SHIFT mask declaration.
Remove obsolete IGC_SCVPC register definition.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c | 20 ++------------------
 drivers/net/ethernet/intel/igc/igc_base.h |  3 ---
 drivers/net/ethernet/intel/igc/igc_regs.h |  1 -
 3 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index df40af759542..19ff987922d2 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -53,22 +53,6 @@ static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw)
 	return ret_val;
 }
 
-/**
- * igc_check_for_link_base - Check for link
- * @hw: pointer to the HW structure
- *
- * If sgmii is enabled, then use the pcs register to determine link, otherwise
- * use the generic interface for determining link.
- */
-static s32 igc_check_for_link_base(struct igc_hw *hw)
-{
-	s32 ret_val = 0;
-
-	ret_val = igc_check_for_copper_link(hw);
-
-	return ret_val;
-}
-
 /**
  * igc_reset_hw_base - Reset hardware
  * @hw: pointer to the HW structure
@@ -265,7 +249,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw)
 	if (ret_val)
 		return ret_val;
 
-	igc_check_for_link_base(hw);
+	igc_check_for_copper_link(hw);
 
 	/* Verify phy id and set remaining function pointers */
 	switch (phy->id) {
@@ -512,7 +496,7 @@ void igc_rx_fifo_flush_base(struct igc_hw *hw)
 
 static struct igc_mac_operations igc_mac_ops_base = {
 	.init_hw		= igc_init_hw_base,
-	.check_for_link		= igc_check_for_link_base,
+	.check_for_link		= igc_check_for_copper_link,
 	.rar_set		= igc_rar_set,
 	.read_mac_addr		= igc_read_mac_addr_base,
 	.get_speed_and_duplex	= igc_get_link_up_info_base,
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index 35588fa7b8c5..a5d3f57274a8 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -90,9 +90,6 @@ union igc_adv_rx_desc {
 	} wb;  /* writeback */
 };
 
-/* Adv Transmit Descriptor Config Masks */
-#define IGC_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
-
 /* Additional Transmit Descriptor Control definitions */
 #define IGC_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
 
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index a1bd3216c906..f8c835283377 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -188,7 +188,6 @@
 #define IGC_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
 #define IGC_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
 #define IGC_LENERRS	0x04138  /* Length Errors Count */
-#define IGC_SCVPC	0x04228  /* SerDes/SGMII Code Violation Pkt Count */
 #define IGC_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
 
 /* Management registers */
-- 
2.20.1


^ permalink raw reply related

* [net-next 07/14] e1000e: fix a missing check for return value
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem
  Cc: Jeff Kirsher, netdev, nhorman, sassmann, Sasha Neftin, Kangjie Lu,
	Aaron Brown
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

The change is based on the issue found by Kangjie Lu <kjlu@umn.edu> where
we not checking the return value of a register read/write which could result
in a NULL pointer dereference if the read/write fails.

Since we are only trying to disable the far-end loopback, if the read
and write of register fails, we do not want to bail out of the function.
We just want to log that it failed to disable and continue on.

CC: Sasha Neftin <sasha.neftin@intel.com>
CC: Kangjie Lu <kjlu@umn.edu>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
---
 .../net/ethernet/intel/e1000e/80003es2lan.c   | 33 +++++++++++++------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
index 257bd59bc9c6..f86d55657959 100644
--- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -696,11 +696,16 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
 	ret_val =
 	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
 					    &kum_reg_data);
-	if (ret_val)
-		return ret_val;
-	kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
-	e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
-					 kum_reg_data);
+	if (!ret_val) {
+		kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+		ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+						 E1000_KMRNCTRLSTA_INBAND_PARAM,
+						 kum_reg_data);
+		if (ret_val)
+			e_dbg("Error disabling far-end loopback\n");
+	} else {
+		e_dbg("Error disabling far-end loopback\n");
+	}
 
 	ret_val = e1000e_get_auto_rd_done(hw);
 	if (ret_val)
@@ -754,11 +759,19 @@ static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw)
 		return ret_val;
 
 	/* Disable IBIST slave mode (far-end loopback) */
-	e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
-					&kum_reg_data);
-	kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
-	e1000_write_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
-					 kum_reg_data);
+	ret_val =
+	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+					    &kum_reg_data);
+	if (!ret_val) {
+		kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+		ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+						 E1000_KMRNCTRLSTA_INBAND_PARAM,
+						 kum_reg_data);
+		if (ret_val)
+			e_dbg("Error disabling far-end loopback\n");
+	} else {
+		e_dbg("Error disabling far-end loopback\n");
+	}
 
 	/* Set the transmit descriptor write-back policy */
 	reg_data = er32(TXDCTL(0));
-- 
2.20.1


^ permalink raw reply related

* [net-next 05/14] ixgbe: remove magic constant in ixgbe_reset_hw_82599()
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Jiri Kosina, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Jiri Kosina <jkosina@suse.cz>

ixgbe_reset_hw_82599() resets the value of hw->mac.num_rar_entries to
pre-defined value of 128. Let's get rid of that hardcoded literal, and use
IXGBE_82599_RAR_ENTRIES instead, the same way the normal initialization
path does.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 1e49716f52bc..109f8de5a1c2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -1048,7 +1048,7 @@ static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
 	 * clear the multicast table.  Also reset num_rar_entries to 128,
 	 * since we modify this value when programming the SAN MAC address.
 	 */
-	hw->mac.num_rar_entries = 128;
+	hw->mac.num_rar_entries = IXGBE_82599_RAR_ENTRIES;
 	hw->mac.ops.init_rx_addrs(hw);
 
 	/* Store the permanent SAN mac address */
-- 
2.20.1


^ permalink raw reply related

* [net-next 11/14] igc: Remove the 'igc_read_mac_addr_base' method
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

Remove the redundant 'igc_read_mac_addr_base' method and use
the 'igc_read_mac_addr' method directly instead.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index a31ec1682090..51a667c16dea 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -371,19 +371,6 @@ static s32 igc_init_hw_base(struct igc_hw *hw)
 	return ret_val;
 }
 
-/**
- * igc_read_mac_addr_base - Read device MAC address
- * @hw: pointer to the HW structure
- */
-static s32 igc_read_mac_addr_base(struct igc_hw *hw)
-{
-	s32 ret_val = 0;
-
-	ret_val = igc_read_mac_addr(hw);
-
-	return ret_val;
-}
-
 /**
  * igc_power_down_phy_copper_base - Remove link during PHY power down
  * @hw: pointer to the HW structure
@@ -478,7 +465,7 @@ static struct igc_mac_operations igc_mac_ops_base = {
 	.init_hw		= igc_init_hw_base,
 	.check_for_link		= igc_check_for_copper_link,
 	.rar_set		= igc_rar_set,
-	.read_mac_addr		= igc_read_mac_addr_base,
+	.read_mac_addr		= igc_read_mac_addr,
 	.get_speed_and_duplex	= igc_get_speed_and_duplex_copper,
 };
 
-- 
2.20.1


^ permalink raw reply related

* [net-next 14/14] igc: Add ethtool support
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

This patch adds basic ethtool support to the device to allow
for configuration.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/Makefile      |    3 +-
 drivers/net/ethernet/intel/igc/igc.h         |   34 +-
 drivers/net/ethernet/intel/igc/igc_base.c    |    1 +
 drivers/net/ethernet/intel/igc/igc_defines.h |    4 +
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 1032 ++++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_hw.h      |    1 +
 drivers/net/ethernet/intel/igc/igc_main.c    |  109 +-
 drivers/net/ethernet/intel/igc/igc_regs.h    |    3 +
 8 files changed, 1169 insertions(+), 18 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/igc/igc_ethtool.c

diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index 4387f6ba8e67..88c6f88baac5 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -7,4 +7,5 @@
 
 obj-$(CONFIG_IGC) += igc.o
 
-igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o
+igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
+igc_ethtool.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index b1039dd3dd13..80faccc34cda 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -13,19 +13,43 @@
 
 #include "igc_hw.h"
 
-/* main */
+/* forward declaration */
+void igc_set_ethtool_ops(struct net_device *);
+
+struct igc_adapter;
+struct igc_ring;
+
+void igc_up(struct igc_adapter *adapter);
+void igc_down(struct igc_adapter *adapter);
+int igc_setup_tx_resources(struct igc_ring *ring);
+int igc_setup_rx_resources(struct igc_ring *ring);
+void igc_free_tx_resources(struct igc_ring *ring);
+void igc_free_rx_resources(struct igc_ring *ring);
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter);
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues);
+int igc_reinit_queues(struct igc_adapter *adapter);
+bool igc_has_link(struct igc_adapter *adapter);
+void igc_reset(struct igc_adapter *adapter);
+int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
+
 extern char igc_driver_name[];
 extern char igc_driver_version[];
 
+#define IGC_REGS_LEN			740
+#define IGC_RETA_SIZE			128
+
 /* Interrupt defines */
 #define IGC_START_ITR			648 /* ~6000 ints/sec */
 #define IGC_FLAG_HAS_MSI		BIT(0)
-#define IGC_FLAG_QUEUE_PAIRS		BIT(4)
+#define IGC_FLAG_QUEUE_PAIRS		BIT(3)
+#define IGC_FLAG_DMAC			BIT(4)
 #define IGC_FLAG_NEED_LINK_UPDATE	BIT(9)
 #define IGC_FLAG_MEDIA_RESET		BIT(10)
 #define IGC_FLAG_MAS_ENABLE		BIT(12)
 #define IGC_FLAG_HAS_MSIX		BIT(13)
 #define IGC_FLAG_VLAN_PROMISC		BIT(15)
+#define IGC_FLAG_RX_LEGACY		BIT(16)
 
 #define IGC_START_ITR			648 /* ~6000 ints/sec */
 #define IGC_4K_ITR			980
@@ -60,6 +84,7 @@ extern char igc_driver_version[];
 #define IGC_RXBUFFER_2048		2048
 #define IGC_RXBUFFER_3072		3072
 
+#define AUTO_ALL_MODES		0
 #define IGC_RX_HDR_LEN			IGC_RXBUFFER_256
 
 /* RX and TX descriptor control thresholds.
@@ -340,6 +365,8 @@ struct igc_adapter {
 
 	struct igc_mac_addr *mac_table;
 
+	u8 rss_indir_tbl[IGC_RETA_SIZE];
+
 	unsigned long link_check_timeout;
 	struct igc_info ei;
 };
@@ -418,6 +445,9 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
 	return 0;
 }
 
+/* forward declaration */
+void igc_reinit_locked(struct igc_adapter *);
+
 #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
 
 #define IGC_TXD_DCMD	(IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index fe9a9666c70f..51a8b8769c67 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -131,6 +131,7 @@ static s32 igc_init_nvm_params_base(struct igc_hw *hw)
 	if (size > 15)
 		size = 15;
 
+	nvm->type = igc_nvm_eeprom_spi;
 	nvm->word_size = BIT(size);
 	nvm->opcode_bits = 8;
 	nvm->delay_usec = 1;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 8740754ea1fd..7d1bdcd1225a 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -4,6 +4,10 @@
 #ifndef _IGC_DEFINES_H_
 #define _IGC_DEFINES_H_
 
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+
 #define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
 
 /* PCI Bus Info */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
new file mode 100644
index 000000000000..eff37a6c0afa
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -0,0 +1,1032 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+/* ethtool support for igc */
+#include <linux/pm_runtime.h>
+
+#include "igc.h"
+
+static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IGC_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings)
+
+static void igc_get_drvinfo(struct net_device *netdev,
+			    struct ethtool_drvinfo *drvinfo)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	strlcpy(drvinfo->driver,  igc_driver_name, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, igc_driver_version, sizeof(drvinfo->version));
+
+	/* add fw_version here */
+	strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN;
+}
+
+static int igc_get_regs_len(struct net_device *netdev)
+{
+	return IGC_REGS_LEN * sizeof(u32);
+}
+
+static void igc_get_regs(struct net_device *netdev,
+			 struct ethtool_regs *regs, void *p)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	memset(p, 0, IGC_REGS_LEN * sizeof(u32));
+
+	regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = rd32(IGC_CTRL);
+	regs_buff[1] = rd32(IGC_STATUS);
+	regs_buff[2] = rd32(IGC_CTRL_EXT);
+	regs_buff[3] = rd32(IGC_MDIC);
+	regs_buff[4] = rd32(IGC_CONNSW);
+
+	/* NVM Register */
+	regs_buff[5] = rd32(IGC_EECD);
+
+	/* Interrupt */
+	/* Reading EICS for EICR because they read the
+	 * same but EICS does not clear on read
+	 */
+	regs_buff[6] = rd32(IGC_EICS);
+	regs_buff[7] = rd32(IGC_EICS);
+	regs_buff[8] = rd32(IGC_EIMS);
+	regs_buff[9] = rd32(IGC_EIMC);
+	regs_buff[10] = rd32(IGC_EIAC);
+	regs_buff[11] = rd32(IGC_EIAM);
+	/* Reading ICS for ICR because they read the
+	 * same but ICS does not clear on read
+	 */
+	regs_buff[12] = rd32(IGC_ICS);
+	regs_buff[13] = rd32(IGC_ICS);
+	regs_buff[14] = rd32(IGC_IMS);
+	regs_buff[15] = rd32(IGC_IMC);
+	regs_buff[16] = rd32(IGC_IAC);
+	regs_buff[17] = rd32(IGC_IAM);
+
+	/* Flow Control */
+	regs_buff[18] = rd32(IGC_FCAL);
+	regs_buff[19] = rd32(IGC_FCAH);
+	regs_buff[20] = rd32(IGC_FCTTV);
+	regs_buff[21] = rd32(IGC_FCRTL);
+	regs_buff[22] = rd32(IGC_FCRTH);
+	regs_buff[23] = rd32(IGC_FCRTV);
+
+	/* Receive */
+	regs_buff[24] = rd32(IGC_RCTL);
+	regs_buff[25] = rd32(IGC_RXCSUM);
+	regs_buff[26] = rd32(IGC_RLPML);
+	regs_buff[27] = rd32(IGC_RFCTL);
+
+	/* Transmit */
+	regs_buff[28] = rd32(IGC_TCTL);
+	regs_buff[29] = rd32(IGC_TIPG);
+
+	/* Wake Up */
+
+	/* MAC */
+
+	/* Statistics */
+	regs_buff[30] = adapter->stats.crcerrs;
+	regs_buff[31] = adapter->stats.algnerrc;
+	regs_buff[32] = adapter->stats.symerrs;
+	regs_buff[33] = adapter->stats.rxerrc;
+	regs_buff[34] = adapter->stats.mpc;
+	regs_buff[35] = adapter->stats.scc;
+	regs_buff[36] = adapter->stats.ecol;
+	regs_buff[37] = adapter->stats.mcc;
+	regs_buff[38] = adapter->stats.latecol;
+	regs_buff[39] = adapter->stats.colc;
+	regs_buff[40] = adapter->stats.dc;
+	regs_buff[41] = adapter->stats.tncrs;
+	regs_buff[42] = adapter->stats.sec;
+	regs_buff[43] = adapter->stats.htdpmc;
+	regs_buff[44] = adapter->stats.rlec;
+	regs_buff[45] = adapter->stats.xonrxc;
+	regs_buff[46] = adapter->stats.xontxc;
+	regs_buff[47] = adapter->stats.xoffrxc;
+	regs_buff[48] = adapter->stats.xofftxc;
+	regs_buff[49] = adapter->stats.fcruc;
+	regs_buff[50] = adapter->stats.prc64;
+	regs_buff[51] = adapter->stats.prc127;
+	regs_buff[52] = adapter->stats.prc255;
+	regs_buff[53] = adapter->stats.prc511;
+	regs_buff[54] = adapter->stats.prc1023;
+	regs_buff[55] = adapter->stats.prc1522;
+	regs_buff[56] = adapter->stats.gprc;
+	regs_buff[57] = adapter->stats.bprc;
+	regs_buff[58] = adapter->stats.mprc;
+	regs_buff[59] = adapter->stats.gptc;
+	regs_buff[60] = adapter->stats.gorc;
+	regs_buff[61] = adapter->stats.gotc;
+	regs_buff[62] = adapter->stats.rnbc;
+	regs_buff[63] = adapter->stats.ruc;
+	regs_buff[64] = adapter->stats.rfc;
+	regs_buff[65] = adapter->stats.roc;
+	regs_buff[66] = adapter->stats.rjc;
+	regs_buff[67] = adapter->stats.mgprc;
+	regs_buff[68] = adapter->stats.mgpdc;
+	regs_buff[69] = adapter->stats.mgptc;
+	regs_buff[70] = adapter->stats.tor;
+	regs_buff[71] = adapter->stats.tot;
+	regs_buff[72] = adapter->stats.tpr;
+	regs_buff[73] = adapter->stats.tpt;
+	regs_buff[74] = adapter->stats.ptc64;
+	regs_buff[75] = adapter->stats.ptc127;
+	regs_buff[76] = adapter->stats.ptc255;
+	regs_buff[77] = adapter->stats.ptc511;
+	regs_buff[78] = adapter->stats.ptc1023;
+	regs_buff[79] = adapter->stats.ptc1522;
+	regs_buff[80] = adapter->stats.mptc;
+	regs_buff[81] = adapter->stats.bptc;
+	regs_buff[82] = adapter->stats.tsctc;
+	regs_buff[83] = adapter->stats.iac;
+	regs_buff[84] = adapter->stats.rpthc;
+	regs_buff[85] = adapter->stats.hgptc;
+	regs_buff[86] = adapter->stats.hgorc;
+	regs_buff[87] = adapter->stats.hgotc;
+	regs_buff[88] = adapter->stats.lenerrs;
+	regs_buff[89] = adapter->stats.scvpc;
+	regs_buff[90] = adapter->stats.hrmpc;
+
+	for (i = 0; i < 4; i++)
+		regs_buff[91 + i] = rd32(IGC_SRRCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[95 + i] = rd32(IGC_PSRTYPE(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[99 + i] = rd32(IGC_RDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[103 + i] = rd32(IGC_RDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[107 + i] = rd32(IGC_RDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[111 + i] = rd32(IGC_RDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[115 + i] = rd32(IGC_RDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[119 + i] = rd32(IGC_RXDCTL(i));
+
+	for (i = 0; i < 10; i++)
+		regs_buff[123 + i] = rd32(IGC_EITR(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[139 + i] = rd32(IGC_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[145 + i] = rd32(IGC_RAH(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[149 + i] = rd32(IGC_TDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[152 + i] = rd32(IGC_TDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[156 + i] = rd32(IGC_TDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[160 + i] = rd32(IGC_TDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[164 + i] = rd32(IGC_TDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[168 + i] = rd32(IGC_TXDCTL(i));
+}
+
+static u32 igc_get_msglevel(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void igc_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = data;
+}
+
+static int igc_nway_reset(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		igc_reinit_locked(adapter);
+	return 0;
+}
+
+static u32 igc_get_link(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_mac_info *mac = &adapter->hw.mac;
+
+	/* If the link is not reported up to netdev, interrupts are disabled,
+	 * and so the physical link state may have changed since we last
+	 * looked. Set get_link_status to make sure that the true link
+	 * state is interrogated, rather than pulling a cached and possibly
+	 * stale link state from the driver.
+	 */
+	if (!netif_carrier_ok(netdev))
+		mac->get_link_status = 1;
+
+	return igc_has_link(adapter);
+}
+
+static int igc_get_eeprom_len(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->hw.nvm.word_size * 2;
+}
+
+static int igc_get_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int first_word, last_word;
+	u16 *eeprom_buff;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	if (hw->nvm.type == igc_nvm_eeprom_spi) {
+		ret_val = hw->nvm.ops.read(hw, first_word,
+					   last_word - first_word + 1,
+					   eeprom_buff);
+	} else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = hw->nvm.ops.read(hw, first_word + i, 1,
+						   &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+	       eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int igc_set_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 *eeprom_buff;
+	void *ptr;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if (hw->mac.type >= igc_i225 &&
+	    !igc_get_flash_presence_i225(hw)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = hw->nvm.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, first_word, 1,
+					    &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && ret_val == 0) {
+		/* need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, last_word, 1,
+				   &eeprom_buff[last_word - first_word]);
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		eeprom_buff[i] = cpu_to_le16(eeprom_buff[i]);
+
+	ret_val = hw->nvm.ops.write(hw, first_word,
+				    last_word - first_word + 1, eeprom_buff);
+
+	/* Update the checksum if nvm write succeeded */
+	if (ret_val == 0)
+		hw->nvm.ops.update(hw);
+
+	/* check if need: igc_set_fw_version(adapter); */
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void igc_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IGC_MAX_RXD;
+	ring->tx_max_pending = IGC_MAX_TXD;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+}
+
+static int igc_set_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_ring *temp_ring;
+	u16 new_rx_count, new_tx_count;
+	int i, err = 0;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+		return -EINVAL;
+
+	new_rx_count = min_t(u32, ring->rx_pending, IGC_MAX_RXD);
+	new_rx_count = max_t(u16, new_rx_count, IGC_MIN_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = min_t(u32, ring->tx_pending, IGC_MAX_TXD);
+	new_tx_count = max_t(u16, new_tx_count, IGC_MIN_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if (new_tx_count == adapter->tx_ring_count &&
+	    new_rx_count == adapter->rx_ring_count) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	if (adapter->num_tx_queues > adapter->num_rx_queues)
+		temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
+					       adapter->num_tx_queues));
+	else
+		temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
+					       adapter->num_rx_queues));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	igc_down(adapter);
+
+	/* We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the Tx and Rx ring structs.
+	 */
+	if (new_tx_count != adapter->tx_ring_count) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->tx_ring[i],
+			       sizeof(struct igc_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = igc_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igc_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			igc_free_tx_resources(adapter->tx_ring[i]);
+
+			memcpy(adapter->tx_ring[i], &temp_ring[i],
+			       sizeof(struct igc_ring));
+		}
+
+		adapter->tx_ring_count = new_tx_count;
+	}
+
+	if (new_rx_count != adapter->rx_ring_count) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->rx_ring[i],
+			       sizeof(struct igc_ring));
+
+			temp_ring[i].count = new_rx_count;
+			err = igc_setup_rx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igc_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			igc_free_rx_resources(adapter->rx_ring[i]);
+
+			memcpy(adapter->rx_ring[i], &temp_ring[i],
+			       sizeof(struct igc_ring));
+		}
+
+		adapter->rx_ring_count = new_rx_count;
+	}
+err_setup:
+	igc_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IGC_RESETTING, &adapter->state);
+	return err;
+}
+
+static void igc_get_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *pause)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc.current_mode == igc_fc_rx_pause) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == igc_fc_tx_pause) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == igc_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int igc_set_pauseparam(struct net_device *netdev,
+			      struct ethtool_pauseparam *pause)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		hw->fc.requested_mode = igc_fc_default;
+		if (netif_running(adapter->netdev)) {
+			igc_down(adapter);
+			igc_up(adapter);
+		} else {
+			igc_reset(adapter);
+		}
+	} else {
+		if (pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_full;
+		else if (pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_rx_pause;
+		else if (!pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_tx_pause;
+		else if (!pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_none;
+
+		hw->fc.current_mode = hw->fc.requested_mode;
+
+		retval = ((hw->phy.media_type == igc_media_type_copper) ?
+			  igc_force_mac_fc(hw) : igc_setup_link(hw));
+	}
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+	return retval;
+}
+
+static int igc_get_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->rx_itr_setting <= 3)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) {
+		if (adapter->tx_itr_setting <= 3)
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+		else
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+	}
+
+	return 0;
+}
+
+static int igc_set_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (ec->rx_max_coalesced_frames ||
+	    ec->rx_coalesce_usecs_irq ||
+	    ec->rx_max_coalesced_frames_irq ||
+	    ec->tx_max_coalesced_frames ||
+	    ec->tx_coalesce_usecs_irq ||
+	    ec->stats_block_coalesce_usecs ||
+	    ec->use_adaptive_rx_coalesce ||
+	    ec->use_adaptive_tx_coalesce ||
+	    ec->pkt_rate_low ||
+	    ec->rx_coalesce_usecs_low ||
+	    ec->rx_max_coalesced_frames_low ||
+	    ec->tx_coalesce_usecs_low ||
+	    ec->tx_max_coalesced_frames_low ||
+	    ec->pkt_rate_high ||
+	    ec->rx_coalesce_usecs_high ||
+	    ec->rx_max_coalesced_frames_high ||
+	    ec->tx_coalesce_usecs_high ||
+	    ec->tx_max_coalesced_frames_high ||
+	    ec->rate_sample_interval)
+		return -ENOTSUPP;
+
+	if (ec->rx_coalesce_usecs > IGC_MAX_ITR_USECS ||
+	    (ec->rx_coalesce_usecs > 3 &&
+	     ec->rx_coalesce_usecs < IGC_MIN_ITR_USECS) ||
+	    ec->rx_coalesce_usecs == 2)
+		return -EINVAL;
+
+	if (ec->tx_coalesce_usecs > IGC_MAX_ITR_USECS ||
+	    (ec->tx_coalesce_usecs > 3 &&
+	     ec->tx_coalesce_usecs < IGC_MIN_ITR_USECS) ||
+	    ec->tx_coalesce_usecs == 2)
+		return -EINVAL;
+
+	if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
+		return -EINVAL;
+
+	/* If ITR is disabled, disable DMAC */
+	if (ec->rx_coalesce_usecs == 0) {
+		if (adapter->flags & IGC_FLAG_DMAC)
+			adapter->flags &= ~IGC_FLAG_DMAC;
+	}
+
+	/* convert to rate of irq's per second */
+	if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+
+	/* convert to rate of irq's per second */
+	if (adapter->flags & IGC_FLAG_QUEUE_PAIRS)
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+	else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		if (q_vector->rx.ring)
+			q_vector->itr_val = adapter->rx_itr_setting;
+		else
+			q_vector->itr_val = adapter->tx_itr_setting;
+		if (q_vector->itr_val && q_vector->itr_val <= 3)
+			q_vector->itr_val = IGC_START_ITR;
+		q_vector->set_itr = 1;
+	}
+
+	return 0;
+}
+
+void igc_write_rss_indir_tbl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 reg = IGC_RETA(0);
+	u32 shift = 0;
+	int i = 0;
+
+	while (i < IGC_RETA_SIZE) {
+		u32 val = 0;
+		int j;
+
+		for (j = 3; j >= 0; j--) {
+			val <<= 8;
+			val |= adapter->rss_indir_tbl[i + j];
+		}
+
+		wr32(reg, val << shift);
+		reg += 4;
+		i += 4;
+	}
+}
+
+static u32 igc_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return IGC_RETA_SIZE;
+}
+
+static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		indir[i] = adapter->rss_indir_tbl[i];
+
+	return 0;
+}
+
+static int igc_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 num_queues;
+	int i;
+
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	num_queues = adapter->rss_queues;
+
+	/* Verify user input. */
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		if (indir[i] >= num_queues)
+			return -EINVAL;
+
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		adapter->rss_indir_tbl[i] = indir[i];
+
+	igc_write_rss_indir_tbl(adapter);
+
+	return 0;
+}
+
+static unsigned int igc_max_channels(struct igc_adapter *adapter)
+{
+	return igc_get_max_rss_queues(adapter);
+}
+
+static void igc_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = igc_max_channels(adapter);
+
+	/* Report info for other vector */
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		ch->max_other = NON_Q_VECTORS;
+		ch->other_count = NON_Q_VECTORS;
+	}
+
+	ch->combined_count = adapter->rss_queues;
+}
+
+static int igc_set_channels(struct net_device *netdev,
+			    struct ethtool_channels *ch)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	unsigned int count = ch->combined_count;
+	unsigned int max_combined = 0;
+
+	/* Verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* Verify other_count is valid and has not been changed */
+	if (ch->other_count != NON_Q_VECTORS)
+		return -EINVAL;
+
+	/* Verify the number of channels doesn't exceed hw limits */
+	max_combined = igc_max_channels(adapter);
+	if (count > max_combined)
+		return -EINVAL;
+
+	if (count != adapter->rss_queues) {
+		adapter->rss_queues = count;
+		igc_set_flag_queue_pairs(adapter, max_combined);
+
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match the new configuration.
+		 */
+		return igc_reinit_queues(adapter);
+	}
+
+	return 0;
+}
+
+static u32 igc_get_priv_flags(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IGC_FLAG_RX_LEGACY)
+		priv_flags |= IGC_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int igc_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IGC_FLAG_RX_LEGACY;
+	if (priv_flags & IGC_PRIV_FLAGS_LEGACY_RX)
+		flags |= IGC_FLAG_RX_LEGACY;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			igc_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_begin(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_get_sync(&adapter->pdev->dev);
+	return 0;
+}
+
+static void igc_ethtool_complete(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_put(&adapter->pdev->dev);
+}
+
+static int igc_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 status;
+	u32 speed;
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+
+	/* supported link modes */
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 2500baseT_Full);
+
+	/* twisted pair */
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = hw->phy.addr;
+
+	/* advertising link modes */
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full);
+
+	/* set autoneg settings */
+	if (hw->mac.autoneg == 1) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Autoneg);
+	}
+
+	switch (hw->fc.requested_mode) {
+	case igc_fc_full:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		break;
+	case igc_fc_rx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	case igc_fc_tx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	default:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+	}
+
+	status = rd32(IGC_STATUS);
+
+	if (status & IGC_STATUS_LU) {
+		if (status & IGC_STATUS_SPEED_1000) {
+			/* For I225, STATUS will indicate 1G speed in both
+			 * 1 Gbps and 2.5 Gbps link modes.
+			 * An additional bit is used
+			 * to differentiate between 1 Gbps and 2.5 Gbps.
+			 */
+			if (hw->mac.type == igc_i225 &&
+			    (status & IGC_STATUS_SPEED_2500)) {
+				speed = SPEED_2500;
+				hw_dbg("2500 Mbs, ");
+			} else {
+				speed = SPEED_1000;
+				hw_dbg("1000 Mbs, ");
+			}
+		} else if (status & IGC_STATUS_SPEED_100) {
+			speed = SPEED_100;
+			hw_dbg("100 Mbs, ");
+		} else {
+			speed = SPEED_10;
+			hw_dbg("10 Mbs, ");
+		}
+		if ((status & IGC_STATUS_FD) ||
+		    hw->phy.media_type != igc_media_type_copper)
+			cmd->base.duplex = DUPLEX_FULL;
+		else
+			cmd->base.duplex = DUPLEX_HALF;
+	} else {
+		speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+	cmd->base.speed = speed;
+	if (hw->mac.autoneg)
+		cmd->base.autoneg = AUTONEG_ENABLE;
+	else
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if (hw->phy.media_type == igc_media_type_copper)
+		cmd->base.eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+						      ETH_TP_MDI;
+	else
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
+
+	return 0;
+}
+
+static int igc_set_link_ksettings(struct net_device *netdev,
+				  const struct ethtool_link_ksettings *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 advertising;
+
+	/* When adapter in resetting mode, autoneg/speed/duplex
+	 * cannot be changed
+	 */
+	if (igc_check_reset_block(hw)) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot change link characteristics when reset is active.\n");
+		return -EINVAL;
+	}
+
+	/* MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO &&
+		    cmd->base.autoneg != AUTONEG_ENABLE) {
+			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		hw->mac.autoneg = 1;
+		hw->phy.autoneg_advertised = advertising;
+		if (adapter->fc_autoneg)
+			hw->fc.requested_mode = igc_fc_default;
+	} else {
+		/* calling this overrides forced MDI setting */
+		dev_info(&adapter->pdev->dev,
+			 "Force mode currently not supported\n");
+	}
+
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		/* fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
+	}
+
+	/* reset the link */
+	if (netif_running(adapter->netdev)) {
+		igc_down(adapter);
+		igc_up(adapter);
+	} else {
+		igc_reset(adapter);
+	}
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+static const struct ethtool_ops igc_ethtool_ops = {
+	.get_drvinfo		= igc_get_drvinfo,
+	.get_regs_len		= igc_get_regs_len,
+	.get_regs		= igc_get_regs,
+	.get_msglevel		= igc_get_msglevel,
+	.set_msglevel		= igc_set_msglevel,
+	.nway_reset		= igc_nway_reset,
+	.get_link		= igc_get_link,
+	.get_eeprom_len		= igc_get_eeprom_len,
+	.get_eeprom		= igc_get_eeprom,
+	.set_eeprom		= igc_set_eeprom,
+	.get_ringparam		= igc_get_ringparam,
+	.set_ringparam		= igc_set_ringparam,
+	.get_pauseparam		= igc_get_pauseparam,
+	.set_pauseparam		= igc_set_pauseparam,
+	.get_coalesce		= igc_get_coalesce,
+	.set_coalesce		= igc_set_coalesce,
+	.get_rxfh_indir_size	= igc_get_rxfh_indir_size,
+	.get_rxfh		= igc_get_rxfh,
+	.set_rxfh		= igc_set_rxfh,
+	.get_channels		= igc_get_channels,
+	.set_channels		= igc_set_channels,
+	.get_priv_flags		= igc_get_priv_flags,
+	.set_priv_flags		= igc_set_priv_flags,
+	.begin			= igc_ethtool_begin,
+	.complete		= igc_ethtool_complete,
+	.get_link_ksettings	= igc_get_link_ksettings,
+	.set_link_ksettings	= igc_set_link_ksettings,
+};
+
+void igc_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &igc_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index c50414f48f0d..7c88b7bd4799 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -55,6 +55,7 @@ enum igc_media_type {
 
 enum igc_nvm_type {
 	igc_nvm_unknown = 0,
+	igc_nvm_eeprom_spi,
 	igc_nvm_flash_hw,
 	igc_nvm_invm,
 };
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index f20183037fb2..11c75433fe22 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -12,6 +12,8 @@
 #define DRV_VERSION	"0.0.1-k"
 #define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
 
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
 static int debug = -1;
 
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
@@ -66,7 +68,7 @@ enum latency_range {
 	latency_invalid = 255
 };
 
-static void igc_reset(struct igc_adapter *adapter)
+void igc_reset(struct igc_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	struct igc_hw *hw = &adapter->hw;
@@ -150,7 +152,7 @@ static void igc_get_hw_control(struct igc_adapter *adapter)
  *
  * Free all transmit software resources
  */
-static void igc_free_tx_resources(struct igc_ring *tx_ring)
+void igc_free_tx_resources(struct igc_ring *tx_ring)
 {
 	igc_clean_tx_ring(tx_ring);
 
@@ -261,7 +263,7 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
  *
  * Return 0 on success, negative on failure
  */
-static int igc_setup_tx_resources(struct igc_ring *tx_ring)
+int igc_setup_tx_resources(struct igc_ring *tx_ring)
 {
 	struct device *dev = tx_ring->dev;
 	int size = 0;
@@ -381,7 +383,7 @@ static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
  *
  * Free all receive software resources
  */
-static void igc_free_rx_resources(struct igc_ring *rx_ring)
+void igc_free_rx_resources(struct igc_ring *rx_ring)
 {
 	igc_clean_rx_ring(rx_ring);
 
@@ -418,7 +420,7 @@ static void igc_free_all_rx_resources(struct igc_adapter *adapter)
  *
  * Returns 0 on success, negative on failure
  */
-static int igc_setup_rx_resources(struct igc_ring *rx_ring)
+int igc_setup_rx_resources(struct igc_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
 	int size, desc_len;
@@ -1703,7 +1705,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
  * igc_up - Open the interface and prepare it to handle traffic
  * @adapter: board private structure
  */
-static void igc_up(struct igc_adapter *adapter)
+void igc_up(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int i = 0;
@@ -1748,7 +1750,7 @@ static void igc_nfc_filter_exit(struct igc_adapter *adapter)
  * igc_down - Close the interface
  * @adapter: board private structure
  */
-static void igc_down(struct igc_adapter *adapter)
+void igc_down(struct igc_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
@@ -1810,7 +1812,7 @@ static void igc_down(struct igc_adapter *adapter)
 	igc_clean_all_rx_rings(adapter);
 }
 
-static void igc_reinit_locked(struct igc_adapter *adapter)
+void igc_reinit_locked(struct igc_adapter *adapter)
 {
 	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
@@ -1922,7 +1924,7 @@ static void igc_configure(struct igc_adapter *adapter)
 
 /**
  * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
- * @adapter: Pointer to adapter structure
+ * @adapter: address of board private structure
  * @index: Index of the RAR entry which need to be synced with MAC table
  */
 static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
@@ -2298,7 +2300,7 @@ static void igc_update_phy_info(struct timer_list *t)
  * igc_has_link - check shared code for link and determine up/down
  * @adapter: pointer to driver private info
  */
-static bool igc_has_link(struct igc_adapter *adapter)
+bool igc_has_link(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
 	bool link_active = false;
@@ -3501,6 +3503,57 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
 	return value;
 }
 
+int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_mac_info *mac = &adapter->hw.mac;
+
+	mac->autoneg = 0;
+
+	/* Make sure dplx is at most 1 bit and lsb of speed is not set
+	 * for the switch() below to work
+	 */
+	if ((spd & 1) || (dplx & ~1))
+		goto err_inval;
+
+	switch (spd + dplx) {
+	case SPEED_10 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_10_HALF;
+		break;
+	case SPEED_10 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_10_FULL;
+		break;
+	case SPEED_100 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_100_HALF;
+		break;
+	case SPEED_100 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_100_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_FULL:
+		mac->autoneg = 1;
+		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_HALF: /* not supported */
+		goto err_inval;
+	case SPEED_2500 + DUPLEX_FULL:
+		mac->autoneg = 1;
+		adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
+		break;
+	case SPEED_2500 + DUPLEX_HALF: /* not supported */
+	default:
+		goto err_inval;
+	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
+	return 0;
+
+err_inval:
+	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
+	return -EINVAL;
+}
+
 /**
  * igc_probe - Device Initialization Routine
  * @pdev: PCI device information struct
@@ -3568,7 +3621,7 @@ static int igc_probe(struct pci_dev *pdev,
 	hw = &adapter->hw;
 	hw->back = adapter;
 	adapter->port_num = hw->bus.func;
-	adapter->msg_enable = GENMASK(debug - 1, 0);
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
 	err = pci_save_state(pdev);
 	if (err)
@@ -3584,7 +3637,7 @@ static int igc_probe(struct pci_dev *pdev,
 	hw->hw_addr = adapter->io_addr;
 
 	netdev->netdev_ops = &igc_netdev_ops;
-
+	igc_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
 
 	netdev->mem_start = pci_resource_start(pdev, 0);
@@ -3744,8 +3797,8 @@ static struct pci_driver igc_driver = {
 	.remove   = igc_remove,
 };
 
-static void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
-				     const u32 max_rss_queues)
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues)
 {
 	/* Determine if we need to pair queues. */
 	/* If rss_queues > half of max_rss_queues, pair the queues in
@@ -3757,7 +3810,7 @@ static void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
 		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
 }
 
-static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
 {
 	unsigned int max_rss_queues;
 
@@ -3836,6 +3889,32 @@ static int igc_sw_init(struct igc_adapter *adapter)
 	return 0;
 }
 
+/**
+ * igc_reinit_queues - return error
+ * @adapter: pointer to adapter structure
+ */
+int igc_reinit_queues(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (netif_running(netdev))
+		igc_close(netdev);
+
+	igc_reset_interrupt_capability(adapter);
+
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	if (netif_running(netdev))
+		err = igc_open(netdev);
+
+	return err;
+}
+
 /**
  * igc_get_hw_dev - return device
  * @hw: pointer to hardware structure
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index f8c835283377..5afe7a8d3faf 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -80,6 +80,9 @@
 /* MSI-X Table Register Descriptions */
 #define IGC_PBACL		0x05B68  /* MSIx PBA Clear - R/W 1 to clear */
 
+/* Redirection Table - RW Array */
+#define IGC_RETA(_i)		(0x05C00 + ((_i) * 4))
+
 /* Receive Register Descriptions */
 #define IGC_RCTL		0x00100  /* Rx Control - RW */
 #define IGC_SRRCTL(_n)		(0x0C00C + ((_n) * 0x40))
-- 
2.20.1


^ permalink raw reply related

* [net-next 12/14] igc: Remove the 'igc_get_phy_id_base' method
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

Remove the redundant 'igc_get_phy_id_base' method and use
the 'igc_get_phy_id' method directly instead.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 51a667c16dea..fe9a9666c70f 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -107,22 +107,6 @@ static s32 igc_reset_hw_base(struct igc_hw *hw)
 	return ret_val;
 }
 
-/**
- * igc_get_phy_id_base - Retrieve PHY addr and id
- * @hw: pointer to the HW structure
- *
- * Retrieves the PHY address and ID for both PHY's which do and do not use
- * sgmi interface.
- */
-static s32 igc_get_phy_id_base(struct igc_hw *hw)
-{
-	s32  ret_val = 0;
-
-	ret_val = igc_get_phy_id(hw);
-
-	return ret_val;
-}
-
 /**
  * igc_init_nvm_params_base - Init NVM func ptrs.
  * @hw: pointer to the HW structure
@@ -245,7 +229,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw)
 		goto out;
 	}
 
-	ret_val = igc_get_phy_id_base(hw);
+	ret_val = igc_get_phy_id(hw);
 	if (ret_val)
 		return ret_val;
 
-- 
2.20.1


^ permalink raw reply related

* [net-next 03/14] docs/networking: fix formatting of Intel drivers documentation
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Mike Rapoport, netdev, nhorman, sassmann, Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Mike Rapoport <rppt@linux.ibm.com>

The documentation of Intel drivers is missing the heading adornment for
document titles.

This causes the generated html to have TOC entries from these documents to
appear as top level TOC entries:

* Linux* Base Driver for Intel(R) Ethernet Network Connection
* Contents
* Identifying Your Adapter
* Command Line Parameters
  * AutoNeg
  * Duplex
  ...

Add overline heading adornment to document titles.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 Documentation/networking/device_drivers/intel/e100.rst    | 1 +
 Documentation/networking/device_drivers/intel/e1000.rst   | 1 +
 Documentation/networking/device_drivers/intel/e1000e.rst  | 1 +
 Documentation/networking/device_drivers/intel/fm10k.rst   | 1 +
 Documentation/networking/device_drivers/intel/i40e.rst    | 1 +
 Documentation/networking/device_drivers/intel/iavf.rst    | 1 +
 Documentation/networking/device_drivers/intel/ice.rst     | 1 +
 Documentation/networking/device_drivers/intel/igb.rst     | 1 +
 Documentation/networking/device_drivers/intel/igbvf.rst   | 1 +
 Documentation/networking/device_drivers/intel/ixgb.rst    | 1 +
 Documentation/networking/device_drivers/intel/ixgbe.rst   | 1 +
 Documentation/networking/device_drivers/intel/ixgbevf.rst | 1 +
 12 files changed, 12 insertions(+)

diff --git a/Documentation/networking/device_drivers/intel/e100.rst b/Documentation/networking/device_drivers/intel/e100.rst
index 5e2839b4ec92..2b9f4887beda 100644
--- a/Documentation/networking/device_drivers/intel/e100.rst
+++ b/Documentation/networking/device_drivers/intel/e100.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+==============================================================
 Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
 ==============================================================
 
diff --git a/Documentation/networking/device_drivers/intel/e1000.rst b/Documentation/networking/device_drivers/intel/e1000.rst
index 6379d4d20771..956560b6e745 100644
--- a/Documentation/networking/device_drivers/intel/e1000.rst
+++ b/Documentation/networking/device_drivers/intel/e1000.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+===========================================================
 Linux* Base Driver for Intel(R) Ethernet Network Connection
 ===========================================================
 
diff --git a/Documentation/networking/device_drivers/intel/e1000e.rst b/Documentation/networking/device_drivers/intel/e1000e.rst
index 33554e5416c5..01999f05509c 100644
--- a/Documentation/networking/device_drivers/intel/e1000e.rst
+++ b/Documentation/networking/device_drivers/intel/e1000e.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+======================================================
 Linux* Driver for Intel(R) Ethernet Network Connection
 ======================================================
 
diff --git a/Documentation/networking/device_drivers/intel/fm10k.rst b/Documentation/networking/device_drivers/intel/fm10k.rst
index bf5e5942f28d..ac3269e34f55 100644
--- a/Documentation/networking/device_drivers/intel/fm10k.rst
+++ b/Documentation/networking/device_drivers/intel/fm10k.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+==============================================================
 Linux* Base Driver for Intel(R) Ethernet Multi-host Controller
 ==============================================================
 
diff --git a/Documentation/networking/device_drivers/intel/i40e.rst b/Documentation/networking/device_drivers/intel/i40e.rst
index 0cc16c525d10..848fd388fa6e 100644
--- a/Documentation/networking/device_drivers/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/intel/i40e.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+==================================================================
 Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series
 ==================================================================
 
diff --git a/Documentation/networking/device_drivers/intel/iavf.rst b/Documentation/networking/device_drivers/intel/iavf.rst
index f8b42b64eb28..2d0c3baa1752 100644
--- a/Documentation/networking/device_drivers/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/intel/iavf.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+==================================================================
 Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function
 ==================================================================
 
diff --git a/Documentation/networking/device_drivers/intel/ice.rst b/Documentation/networking/device_drivers/intel/ice.rst
index 4d118b827bbb..c220aa2711c6 100644
--- a/Documentation/networking/device_drivers/intel/ice.rst
+++ b/Documentation/networking/device_drivers/intel/ice.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+===================================================================
 Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series
 ===================================================================
 
diff --git a/Documentation/networking/device_drivers/intel/igb.rst b/Documentation/networking/device_drivers/intel/igb.rst
index e87a4a72ea2d..fc8cfaa5dcfa 100644
--- a/Documentation/networking/device_drivers/intel/igb.rst
+++ b/Documentation/networking/device_drivers/intel/igb.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+===========================================================
 Linux* Base Driver for Intel(R) Ethernet Network Connection
 ===========================================================
 
diff --git a/Documentation/networking/device_drivers/intel/igbvf.rst b/Documentation/networking/device_drivers/intel/igbvf.rst
index a8a9ffa4f8d3..9cddabe8108e 100644
--- a/Documentation/networking/device_drivers/intel/igbvf.rst
+++ b/Documentation/networking/device_drivers/intel/igbvf.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+============================================================
 Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet
 ============================================================
 
diff --git a/Documentation/networking/device_drivers/intel/ixgb.rst b/Documentation/networking/device_drivers/intel/ixgb.rst
index 8bd80e27843d..945018207a92 100644
--- a/Documentation/networking/device_drivers/intel/ixgb.rst
+++ b/Documentation/networking/device_drivers/intel/ixgb.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+=====================================================================
 Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
 =====================================================================
 
diff --git a/Documentation/networking/device_drivers/intel/ixgbe.rst b/Documentation/networking/device_drivers/intel/ixgbe.rst
index 86d887a63606..c7d25483fedb 100644
--- a/Documentation/networking/device_drivers/intel/ixgbe.rst
+++ b/Documentation/networking/device_drivers/intel/ixgbe.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+=============================================================================
 Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
 =============================================================================
 
diff --git a/Documentation/networking/device_drivers/intel/ixgbevf.rst b/Documentation/networking/device_drivers/intel/ixgbevf.rst
index 56cde6366c2f..5d4977360157 100644
--- a/Documentation/networking/device_drivers/intel/ixgbevf.rst
+++ b/Documentation/networking/device_drivers/intel/ixgbevf.rst
@@ -1,5 +1,6 @@
 .. SPDX-License-Identifier: GPL-2.0+
 
+=============================================================
 Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet
 =============================================================
 
-- 
2.20.1


^ permalink raw reply related

* [net-next 02/14] igc: Remove unreachable code from igc_phy.c file
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

Address community comment.
Remove the unreachable code leads to the static checker warning.
PHY functionality will be added later per demand.
Reported by Dan Carpenter.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_phy.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
index 38e43e6fc1c7..4c8f96a9a148 100644
--- a/drivers/net/ethernet/intel/igc/igc_phy.c
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -152,7 +152,6 @@ void igc_power_down_phy_copper(struct igc_hw *hw)
 s32 igc_check_downshift(struct igc_hw *hw)
 {
 	struct igc_phy_info *phy = &hw->phy;
-	u16 phy_data, offset, mask;
 	s32 ret_val;
 
 	switch (phy->type) {
@@ -161,15 +160,8 @@ s32 igc_check_downshift(struct igc_hw *hw)
 		/* speed downshift not supported */
 		phy->speed_downgraded = false;
 		ret_val = 0;
-		goto out;
 	}
 
-	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
-
-	if (!ret_val)
-		phy->speed_downgraded = (phy_data & mask) ? true : false;
-
-out:
 	return ret_val;
 }
 
-- 
2.20.1


^ permalink raw reply related

* [net-next 09/14] igc: Remove unneeded code
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Sasha Neftin, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Sasha Neftin <sasha.neftin@intel.com>

Remove the 'igc_get_link_up_info_base method' from igc_base.c file.
Use the 'igc_get_speed_and_duplex_copper' method directly and reduce
the code redundancy.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 19ff987922d2..a31ec1682090 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -333,26 +333,6 @@ static void igc_release_phy_base(struct igc_hw *hw)
 	hw->mac.ops.release_swfw_sync(hw, mask);
 }
 
-/**
- * igc_get_link_up_info_base - Get link speed/duplex info
- * @hw: pointer to the HW structure
- * @speed: stores the current speed
- * @duplex: stores the current duplex
- *
- * This is a wrapper function, if using the serial gigabit media independent
- * interface, use PCS to retrieve the link speed and duplex information.
- * Otherwise, use the generic function to get the link speed and duplex info.
- */
-static s32 igc_get_link_up_info_base(struct igc_hw *hw, u16 *speed,
-				     u16 *duplex)
-{
-	s32 ret_val;
-
-	ret_val = igc_get_speed_and_duplex_copper(hw, speed, duplex);
-
-	return ret_val;
-}
-
 /**
  * igc_init_hw_base - Initialize hardware
  * @hw: pointer to the HW structure
@@ -499,7 +479,7 @@ static struct igc_mac_operations igc_mac_ops_base = {
 	.check_for_link		= igc_check_for_copper_link,
 	.rar_set		= igc_rar_set,
 	.read_mac_addr		= igc_read_mac_addr_base,
-	.get_speed_and_duplex	= igc_get_link_up_info_base,
+	.get_speed_and_duplex	= igc_get_speed_and_duplex_copper,
 };
 
 static const struct igc_phy_operations igc_phy_ops_base = {
-- 
2.20.1


^ permalink raw reply related

* [net-next 13/14] igb: Bump version number
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Todd Fujinaka, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Todd Fujinaka <todd.fujinaka@intel.com>

With recent changes, need to bump the driver version to reflect the
changes.

Signed-off-by: Todd Fujinaka <todd.fujinaka@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index dfa357b1a9d6..d35cc9697e2d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -39,7 +39,7 @@
 #include "igb.h"
 
 #define MAJ 5
-#define MIN 4
+#define MIN 6
 #define BUILD 0
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
-- 
2.20.1


^ permalink raw reply related

* [net-next 10/14] e1000e: fix cyclic resets at link up with active tx
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem
  Cc: Konstantin Khlebnikov, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>

I'm seeing series of e1000e resets (sometimes endless) at system boot
if something generates tx traffic at this time. In my case this is
netconsole who sends message "e1000e 0000:02:00.0: Some CPU C-states
have been disabled in order to enable jumbo frames" from e1000e itself.
As result e1000_watchdog_task sees used tx buffer while carrier is off
and start this reset cycle again.

[   17.794359] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None
[   17.794714] IPv6: ADDRCONF(NETDEV_CHANGE): eth1: link becomes ready
[   22.936455] e1000e 0000:02:00.0 eth1: changing MTU from 1500 to 9000
[   23.033336] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   26.102364] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None
[   27.174495] 8021q: 802.1Q VLAN Support v1.8
[   27.174513] 8021q: adding VLAN 0 to HW filter on device eth1
[   30.671724] cgroup: cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation
[   30.898564] netpoll: netconsole: local port 6666
[   30.898566] netpoll: netconsole: local IPv6 address 2a02:6b8:0:80b:beae:c5ff:fe28:23f8
[   30.898567] netpoll: netconsole: interface 'eth1'
[   30.898568] netpoll: netconsole: remote port 6666
[   30.898568] netpoll: netconsole: remote IPv6 address 2a02:6b8:b000:605c:e61d:2dff:fe03:3790
[   30.898569] netpoll: netconsole: remote ethernet address b0:a8:6e:f4:ff:c0
[   30.917747] console [netcon0] enabled
[   30.917749] netconsole: network logging started
[   31.453353] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   34.185730] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   34.321840] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   34.465822] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   34.597423] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   34.745417] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   34.877356] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   35.005441] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   35.157376] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   35.289362] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   35.417441] e1000e 0000:02:00.0: Some CPU C-states have been disabled in order to enable jumbo frames
[   37.790342] e1000e: eth1 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None

This patch flushes tx buffers only once when carrier is off
rather than at each watchdog iteration.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e19e53d97ac5..736fa51878f8 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5309,8 +5309,13 @@ static void e1000_watchdog_task(struct work_struct *work)
 			/* 8000ES2LAN requires a Rx packet buffer work-around
 			 * on link down event; reset the controller to flush
 			 * the Rx packet buffer.
+			 *
+			 * If the link is lost the controller stops DMA, but
+			 * if there is queued Tx work it cannot be done.  So
+			 * reset the controller to flush the Tx packet buffers.
 			 */
-			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
+			if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
+			    e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
 				adapter->flags |= FLAG_RESTART_NOW;
 			else
 				pm_schedule_suspend(netdev->dev.parent,
@@ -5333,14 +5338,6 @@ static void e1000_watchdog_task(struct work_struct *work)
 	adapter->gotc_old = adapter->stats.gotc;
 	spin_unlock(&adapter->stats64_lock);
 
-	/* If the link is lost the controller stops DMA, but
-	 * if there is queued Tx work it cannot be done.  So
-	 * reset the controller to flush the Tx packet buffers.
-	 */
-	if (!netif_carrier_ok(netdev) &&
-	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
-		adapter->flags |= FLAG_RESTART_NOW;
-
 	/* If reset is necessary, do it outside of interrupt context. */
 	if (adapter->flags & FLAG_RESTART_NOW) {
 		schedule_work(&adapter->reset_task);
-- 
2.20.1


^ permalink raw reply related

* [net-next 06/14] fm10k: TRIVIAL cleanup of extra spacing in function comment
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Jacob Keller, netdev, nhorman, sassmann, Andrew Bowers,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Jacob Keller <jacob.e.keller@intel.com>

The function comment for fm10k_iov_msg_msix_pf has an extra space in
a sentence, which is unnecessary.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 8f0a99b6a537..cb4d02629b86 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1148,7 +1148,7 @@ static void fm10k_iov_update_stats_pf(struct fm10k_hw *hw,
  *  @results: Pointer array to message, results[0] is pointer to message
  *  @mbx: Pointer to mailbox information structure
  *
- *  This function is a default handler for MSI-X requests from the VF.  The
+ *  This function is a default handler for MSI-X requests from the VF. The
  *  assumption is that in this case it is acceptable to just directly
  *  hand off the message from the VF to the underlying shared code.
  **/
-- 
2.20.1


^ permalink raw reply related

* [net-next 00/14][pull request] Intel Wired LAN Driver Updates 2019-02-05
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, nhorman, sassmann

This series contains updates to igc, e1000e, ixgbe, fm10k and driver
documentation.

Kai-Heng Feng fixes an e1000e issue where the Wake-On-LAN settings where
being set incorrectly during a system suspend.

Sasha addresses community feedback on the igc driver and provides a
number of code cleanups to remove either unreachable or unused code.  In
addition, added basic ethtool support for the igc driver.

Mike Rapoport fixes the formatting of the kernel driver documentation so
that the title is properly formatted and does not get lumped with the
document sections in the HTML kernel documents generated.

Jiri Kosina updates a hard coded RAR entries value with the existing
define IXGBE_82599_RAR_ENTRIES.

Jake fixes up whitespace in the fm10k driver.

Konstantin Khlebnikov fixes an issue where in some cases, the e1000e
driver will continually reset during a system boot because the watchdog
task sees items in the transmit buffer but the carrier is off (trying to
establish link) causing the device reset to flush the buffer.  To
resolve, just move this check/flush into the watchdog section for when
the carrier is off.

Todd bumps the igb driver version to reflect the recent driver changes.

The following are changes since commit bf2fa12593c2bb77c7ab84254ac69af429d6719b:
  net: marvell: mvpp2: fix lack of link interrupts
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue 1GbE

Jacob Keller (1):
  fm10k: TRIVIAL cleanup of extra spacing in function comment

Jeff Kirsher (1):
  e1000e: fix a missing check for return value

Jiri Kosina (1):
  ixgbe: remove magic constant in ixgbe_reset_hw_82599()

Kai-Heng Feng (1):
  e1000e: Exclude device from suspend direct complete optimization

Konstantin Khlebnikov (1):
  e1000e: fix cyclic resets at link up with active tx

Mike Rapoport (1):
  docs/networking: fix formatting of Intel drivers documentation

Sasha Neftin (7):
  igc: Remove unreachable code from igc_phy.c file
  igc: Fix code redundancy
  igc: Remove unused code
  igc: Remove unneeded code
  igc: Remove the 'igc_read_mac_addr_base' method
  igc: Remove the 'igc_get_phy_id_base' method
  igc: Add ethtool support

Todd Fujinaka (1):
  igb: Bump version number

 .../networking/device_drivers/intel/e100.rst  |    1 +
 .../networking/device_drivers/intel/e1000.rst |    1 +
 .../device_drivers/intel/e1000e.rst           |    1 +
 .../networking/device_drivers/intel/fm10k.rst |    1 +
 .../networking/device_drivers/intel/i40e.rst  |    1 +
 .../networking/device_drivers/intel/iavf.rst  |    1 +
 .../networking/device_drivers/intel/ice.rst   |    1 +
 .../networking/device_drivers/intel/igb.rst   |    1 +
 .../networking/device_drivers/intel/igbvf.rst |    1 +
 .../networking/device_drivers/intel/ixgb.rst  |    1 +
 .../networking/device_drivers/intel/ixgbe.rst |    1 +
 .../device_drivers/intel/ixgbevf.rst          |    1 +
 .../net/ethernet/intel/e1000e/80003es2lan.c   |   33 +-
 drivers/net/ethernet/intel/e1000e/netdev.c    |   17 +-
 drivers/net/ethernet/intel/fm10k/fm10k_pf.c   |    2 +-
 drivers/net/ethernet/intel/igb/igb_main.c     |    2 +-
 drivers/net/ethernet/intel/igc/Makefile       |    3 +-
 drivers/net/ethernet/intel/igc/igc.h          |   34 +-
 drivers/net/ethernet/intel/igc/igc_base.c     |   76 +-
 drivers/net/ethernet/intel/igc/igc_base.h     |   25 -
 drivers/net/ethernet/intel/igc/igc_defines.h  |    4 +
 drivers/net/ethernet/intel/igc/igc_ethtool.c  | 1032 +++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_hw.h       |    1 +
 drivers/net/ethernet/intel/igc/igc_main.c     |  109 +-
 drivers/net/ethernet/intel/igc/igc_phy.c      |    8 -
 drivers/net/ethernet/intel/igc/igc_regs.h     |    4 +-
 .../net/ethernet/intel/ixgbe/ixgbe_82599.c    |    2 +-
 27 files changed, 1220 insertions(+), 144 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/igc/igc_ethtool.c

-- 
2.20.1


^ permalink raw reply

* [net-next 01/14] e1000e: Exclude device from suspend direct complete optimization
From: Jeff Kirsher @ 2019-02-06  2:04 UTC (permalink / raw)
  To: davem; +Cc: Kai-Heng Feng, netdev, nhorman, sassmann, Aaron Brown,
	Jeff Kirsher
In-Reply-To: <20190206020424.12225-1-jeffrey.t.kirsher@intel.com>

From: Kai-Heng Feng <kai.heng.feng@canonical.com>

e1000e sets different WoL settings in system suspend callback and
runtime suspend callback.

The suspend direct complete optimization leaves e1000e in runtime
suspended state with wrong WoL setting during system suspend.

To fix this, we need to disable suspend direct complete optimization to
let e1000e always use suspend callback to set correct WoL during system
suspend.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 189f231075c2..e19e53d97ac5 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7351,6 +7351,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	e1000_print_device_info(adapter);
 
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NEVER_SKIP);
+
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_put_noidle(&pdev->dev);
 
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH 2/2 net-next] net: phy: make use of new MMD accessors
From: Andrew Lunn @ 2019-02-06  2:10 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: Florian Fainelli, David Miller, Russell King,
	netdev@vger.kernel.org, Nikita Yushchenko
In-Reply-To: <d26db84c-b7e1-0e3e-7461-db7b0a548500@gmail.com>

On Tue, Feb 05, 2019 at 10:13:07PM +0100, Heiner Kallweit wrote:
> Make use of the new MMD accessors.
> 
> Andrew Lunn <andrew@lunn.ch>

This is missing the Signed-off-by: prefix.

> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: TC stats / hw offload question
From: Jamal Hadi Salim @ 2019-02-06  2:20 UTC (permalink / raw)
  To: Edward Cree, netdev; +Cc: Jiri Pirko, Cong Wang
In-Reply-To: <26f0cfc9-3bef-8579-72cc-aa6c5ccecd43@solarflare.com>

On 2019-02-05 2:41 p.m., Edward Cree wrote:
> Regarding TC_CLSFLOWER_STATS, when a filter rule modifies the length of
>   the packet, e.g. by adding a VLAN or encap header, should the bytes
>   counter count the length of the packet _before_ edits (i.e. as seen by
>   the match), or the length after edits?  

Classifier stats - when they exist - should be on
"classified" packets. i.e before edits.
Edits are typically done by actions. And even then we dont
have counters which indicate the post edit resizing.

> If the latter, what is the
>   correct behaviour when (say) a packet is mirrored as-is but also
>   redirected with encapsulation?

It is that ambiguity that make it hard to maintain a single
counter for post-edit size. packet and byte counts are on
"entry".

> The fact that the stats live in the struct tc_action suggests a per-
>   action connection that would imply post-edit length,

The classifiers dont mod the packets. The actions do. And they
maintain stats on the size on "entry" i.e pre-edit.

> but then in
>   tcf_exts_dump_stats() we only look at the first action, which seems to
>   imply we really want the pre-edit length.
> I can't find any kind of doc or spec defining what behaviour is required.
> 

Each action keeps its own counters. If you did something like:

tc match using flower blah \
action vlan push tag ... \
action redirect to egress of eth0

And you submited a packet of size x bytes,
then the "match" would record x bytes.
the "vlan action" would record x bytes.
the "redirect" would record size x+vlaninfo bytes
the egress of eth0 would  recorr x+vlaninfo bytes

cheers,
jamal

> -Ed
> 


^ permalink raw reply

* Re: [PATCH bpf-next 2/2] tools/bpf: add log_level to bpf_load_program_attr
From: Alexei Starovoitov @ 2019-02-06  2:26 UTC (permalink / raw)
  To: Yonghong Song; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20190205194823.1805324-1-yhs@fb.com>

On Tue, Feb 05, 2019 at 11:48:23AM -0800, Yonghong Song wrote:
> The kernel verifier has three levels of logs:
>     0: no logs
>     1: logs mostly useful
>   > 1: verbose
> 
> Current libbpf API functions bpf_load_program_xattr() and
> bpf_load_program() cannot specify log_level.
> The bcc, however, provides an interface for user to
> specify log_level 2 for verbose output.
> 
> This patch added log_level into structure
> bpf_load_program_attr, so users, including bcc, can use
> bpf_load_program_xattr() to change log_level.
> 
> The bpf selftest test_sock.c is modified to enable log_level = 2.
> If the "verbose" in test_sock.c is changed to true,
> the test will output logs like below:
>   $ ./test_sock
>   func#0 @0
>   0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
>   0: (bf) r6 = r1
>   1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
>   1: (61) r7 = *(u32 *)(r6 +28)
>   invalid bpf_context access off=28 size=4
> 
>   Test case: bind4 load with invalid access: src_ip6 .. [PASS]
>   ...
>   Test case: bind6 allow all .. [PASS]
>   Summary: 16 PASSED, 0 FAILED
> 
> Some test_sock tests are negative tests and verbose verifier
> log will be printed out as shown in the above.
> 
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  tools/lib/bpf/bpf.c                     | 4 +++-
>  tools/lib/bpf/bpf.h                     | 1 +
>  tools/testing/selftests/bpf/test_sock.c | 9 ++++++++-
>  3 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index 3defad77dc7a..78aa8c2b1a5c 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -214,6 +214,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
>  {
>  	void *finfo = NULL, *linfo = NULL;
>  	union bpf_attr attr;
> +	__u32 log_level;
>  	__u32 name_len;
>  	int fd;
>  
> @@ -292,7 +293,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
>  	/* Try again with log */
>  	attr.log_buf = ptr_to_u64(log_buf);
>  	attr.log_size = log_buf_sz;
> -	attr.log_level = 1;
> +	log_level = load_attr->log_level;
> +	attr.log_level = (log_level >= 2) ? log_level : 1;
>  	log_buf[0] = 0;
>  	fd = sys_bpf_prog_load(&attr, sizeof(attr));
>  done:
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index ed09eed2dc3b..15a8e22e8eae 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -76,6 +76,7 @@ struct bpf_load_program_attr {
>  	const struct bpf_insn *insns;
>  	size_t insns_cnt;
>  	const char *license;
> +	__u32 log_level;
>  	__u32 kern_version;
>  	__u32 prog_ifindex;
>  	__u32 prog_btf_fd;

this will break binary compatibility in libbpf api.
Please add new fields always to the end of *_attr structs.

Also why not to silence bcc instead?
Let it treat log_level > 1 as log_level=1
I don't think anyone but the most extreme verifier hacker used level=2.
Personally I don't remember when was the last time I used it.
It seem like a niche feature that we can safely remove in bcc.


^ permalink raw reply

* Re: [PATCH bpf-next 1/2] tools/bpf: add const qualifier to btf__get_map_kv_tids() map_name parameter
From: Alexei Starovoitov @ 2019-02-06  2:27 UTC (permalink / raw)
  To: Yonghong Song; +Cc: netdev, Alexei Starovoitov, Daniel Borkmann, kernel-team
In-Reply-To: <20190205194822.1804874-1-yhs@fb.com>

On Tue, Feb 05, 2019 at 11:48:22AM -0800, Yonghong Song wrote:
> Commit 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function")
> added the API function btf__get_map_kv_tids():
>   btf__get_map_kv_tids(const struct btf *btf, char *map_name, ...)
> 
> The parameter map_name has type "char *". This is okay inside libbpf library since
> the map_name is from bpf_map->name which also has type "char *".
> 
> This will be problematic if the caller for map_name already has attribute "const",
> e.g., from C++ string.c_str(). It will result in either a warning or an error.
> 
>   /home/yhs/work/bcc/src/cc/btf.cc:166:51:
>     error: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive]
>       return btf__get_map_kv_tids(btf_, map_name.c_str()
> 
> This patch added "const" attributes to map_name parameter.
> 
> Fixes: 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function")
> Signed-off-by: Yonghong Song <yhs@fb.com>

Acked-by: Alexei Starovoitov <ast@kernel.org>


^ permalink raw reply

* Re: [PATCH bpf-next] tools/bpf: fix a selftest test_btf failure
From: Alexei Starovoitov @ 2019-02-06  2:36 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Andrii Nakryiko, netdev, Alexei Starovoitov, Daniel Borkmann,
	kernel-team
In-Reply-To: <20190205222844.2425372-1-yhs@fb.com>

On Tue, Feb 05, 2019 at 02:28:44PM -0800, Yonghong Song wrote:
> Commit 9c651127445c ("selftests/btf: add initial BTF dedup tests")
> added dedup tests in test_btf.c.
> It broke the raw test:
>  BTF raw test[71] (func proto (Bad arg name_off)):
>     btf_raw_create:2905:FAIL Error getting string #65535, strs_cnt:1
> 
> The test itself encodes invalid func_proto parameter name
> offset 0xffffFFFF as a negative test for the kernel.
> The above commit changed the meaning of that offset and
> resulted in a user space error.
>   #define NAME_NTH(N) (0xffff0000 | N)
>   #define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000)
>   #define GET_NAME_NTH_IDX(X) (X & 0x0000ffff)
> 
> Currently, the kernel permits maximum name offset 0xffff.
> Set the test name off as 0x0fffFFFF to trigger the kernel
> verification failure.
> 
> Cc: Andrii Nakryiko <andriin@fb.com>
> Fixes: 9c651127445c ("selftests/btf: add initial BTF dedup tests")
> Signed-off-by: Yonghong Song <yhs@fb.com>

Applied, Thanks

Also I see the following new error in test_btf:
BTF libbpf test[2] (test_btf_nokv.o): libbpf: map:btf_map container_name:____btf_map_btf_map cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?
OK

A bunch of similar errors are in test_progs as well.
I suspect it's related to the last few btf changes.
Andrii, Yonghong, Please investigate.


^ permalink raw reply

* Re: [PATCH bpf-next 1/2] tools/bpf: add const qualifier to btf__get_map_kv_tids() map_name parameter
From: Alexei Starovoitov @ 2019-02-06  2:41 UTC (permalink / raw)
  To: Yonghong Song
  Cc: Network Development, Alexei Starovoitov, Daniel Borkmann,
	Kernel Team
In-Reply-To: <20190206022704.eu5dh6a5fxzzxy3i@ast-mbp>

On Tue, Feb 5, 2019 at 6:27 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Feb 05, 2019 at 11:48:22AM -0800, Yonghong Song wrote:
> > Commit 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function")
> > added the API function btf__get_map_kv_tids():
> >   btf__get_map_kv_tids(const struct btf *btf, char *map_name, ...)
> >
> > The parameter map_name has type "char *". This is okay inside libbpf library since
> > the map_name is from bpf_map->name which also has type "char *".
> >
> > This will be problematic if the caller for map_name already has attribute "const",
> > e.g., from C++ string.c_str(). It will result in either a warning or an error.
> >
> >   /home/yhs/work/bcc/src/cc/btf.cc:166:51:
> >     error: invalid conversion from ‘const char*’ to ‘char*’ [-fpermissive]
> >       return btf__get_map_kv_tids(btf_, map_name.c_str()
> >
> > This patch added "const" attributes to map_name parameter.
> >
> > Fixes: 96408c43447a ("tools/bpf: implement libbpf btf__get_map_kv_tids() API function")
> > Signed-off-by: Yonghong Song <yhs@fb.com>
>
> Acked-by: Alexei Starovoitov <ast@kernel.org>

Actually just applied this patch alone,
since it's independent from 2nd.

^ permalink raw reply

* Re: [PATCH v2 bpf-next 1/2] btf: separate btf creation and loading
From: Alexei Starovoitov @ 2019-02-06  3:03 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: songliubraving, yhs, ast, kafai, netdev, daniel, andrii.nakryiko
In-Reply-To: <20190206002949.1915237-2-andriin@fb.com>

On Tue, Feb 05, 2019 at 04:29:48PM -0800, Andrii Nakryiko wrote:
> This change splits out previous btf__new functionality of constructing
> struct btf and loading it into kernel into two:
> - btf__new() just creates and initializes struct btf
> - btf__load() attempts to load existing struct btf into kernel
> 
> btf__free will still close BTF fd, if it was ever loaded successfully
> into kernel.
> 
> This change allows users of libbpf to manipulate BTF using its API,
> without the need to unnecessarily load it into kernel.
> 
> One of the intended use cases is pahole using libbpf to do DWARF to BTF
> conversion and deduplication using libbpf, while handling ELF sections
> overwrites and other concerns on its own.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> Acked-by: Song Liu <songliubraving@fb.com>

should it be 
Fixes: 2d3feca8c44f ("bpf: btf: print map dump and lookup with btf info")
?
Even back then btf__new() was doing the loading.
So that btf support in bpftool was silently double loading btf.


^ permalink raw reply

* Re: [PATCH v2 bpf-next 2/2] btf: expose API to work with raw btf data
From: Alexei Starovoitov @ 2019-02-06  3:07 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: songliubraving, yhs, ast, kafai, netdev, andrii.nakryiko, daniel
In-Reply-To: <20190206002949.1915237-3-andriin@fb.com>

On Tue, Feb 05, 2019 at 04:29:49PM -0800, Andrii Nakryiko wrote:
> This patch exposes two new APIs btf__get_raw_data_size() and
> btf__get_raw_data() that allows to get a copy of raw BTF data out of
> struct btf. This is useful for external programs that need to manipulate
> raw data, e.g., pahole using btf__dedup() to deduplicate BTF type info
> and then writing it back to file.
> 
> Signed-off-by: Andrii Nakryiko <andriin@fb.com>
> Acked-by: Song Liu <songliubraving@fb.com>
> ---
>  tools/lib/bpf/btf.c      | 10 ++++++++++
>  tools/lib/bpf/btf.h      |  2 ++
>  tools/lib/bpf/libbpf.map |  2 ++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index 1c2ba7182400..34bfb3641aac 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -437,6 +437,16 @@ int btf__fd(const struct btf *btf)
>  	return btf->fd;
>  }
>  
> +__u32 btf__get_raw_data_size(const struct btf *btf)
> +{
> +	return btf->data_size;
> +}
> +
> +void btf__get_raw_data(const struct btf *btf, char *data)
> +{
> +	memcpy(data, btf->data, btf->data_size);
> +}

I cannot think of any other way to use this api,
but to call btf__get_raw_data_size() first,
then malloc that much memory and then call btf__get_raw_data()
to store btf into it.

If so, may be api should be single call that allocates, copies,
and returns pointer to new mem and its size?
Probably less error prone?


^ permalink raw reply

* Re: [RFC bpf-next 0/7] net: flow_dissector: trigger BPF hook when called from eth_get_headlen
From: Alexei Starovoitov @ 2019-02-06  3:12 UTC (permalink / raw)
  To: Stanislav Fomichev
  Cc: Willem de Bruijn, Stanislav Fomichev, Network Development,
	David Miller, Alexei Starovoitov, Daniel Borkmann, simon.horman,
	Willem de Bruijn
In-Reply-To: <20190206005931.GF10769@mini-arch>

On Tue, Feb 05, 2019 at 04:59:31PM -0800, Stanislav Fomichev wrote:
> On 02/05, Alexei Starovoitov wrote:
> > On Tue, Feb 05, 2019 at 12:40:03PM -0800, Stanislav Fomichev wrote:
> > > On 02/05, Willem de Bruijn wrote:
> > > > On Tue, Feb 5, 2019 at 12:57 PM Stanislav Fomichev <sdf@google.com> wrote:
> > > > >
> > > > > Currently, when eth_get_headlen calls flow dissector, it doesn't pass any
> > > > > skb. Because we use passed skb to lookup associated networking namespace
> > > > > to find whether we have a BPF program attached or not, we always use
> > > > > C-based flow dissector in this case.
> > > > >
> > > > > The goal of this patch series is to add new networking namespace argument
> > > > > to the eth_get_headlen and make BPF flow dissector programs be able to
> > > > > work in the skb-less case.
> > > > >
> > > > > The series goes like this:
> > > > > 1. introduce __init_skb and __init_skb_shinfo; those will be used to
> > > > >    initialize temporary skb
> > > > > 2. introduce skb_net which can be used to get networking namespace
> > > > >    associated with an skb
> > > > > 3. add new optional network namespace argument to __skb_flow_dissect and
> > > > >    plumb through the callers
> > > > > 4. add new __flow_bpf_dissect which constructs temporary on-stack skb
> > > > >    (using __init_skb) and calls BPF flow dissector program
> > > > 
> > > > The main concern I see with this series is this cost of skb zeroing
> > > > for every packet in the device driver receive routine, *independent*
> > > > from the real skb allocation and zeroing which will likely happen
> > > > later.
> > > Yes, plus ~200 bytes on the stack for the callers.
> > > 
> > > Not sure how visible this zeroing though, I can probably try to get some
> > > numbers from BPF_PROG_TEST_RUN (running current version vs running with
> > > on-stack skb).
> > 
> > imo extra 256 byte memset for every packet is non starter.
> We can put pre-allocated/initialized skbs without data into percpu or even
> use pcpu_freelist_pop/pcpu_freelist_push to make sure we don't have to think
> about having multiple percpu for irq/softirq/process contexts.
> Any concerns with that approach?
> Any other possible concerns with the overall series?

I'm missing why the whole thing is needed.
You're saying:
" make BPF flow dissector programs be able to work in the skb-less case".
What does it mean specifically?
The only non-skb case is XDP.
Are you saying you want flow_dissector prog to be run in XDP?


^ permalink raw reply

* Re: [PATCH bpf-next 2/4] bpf: fix lockdep false positive in stackmap
From: Eric Dumazet @ 2019-02-06  3:21 UTC (permalink / raw)
  To: Waiman Long, Alexei Starovoitov
  Cc: Peter Zijlstra, Alexei Starovoitov, davem, daniel, edumazet,
	jannh, netdev, kernel-team
In-Reply-To: <e7748be1-8b0b-6613-159f-1c45f932c617@redhat.com>



On 01/30/2019 06:48 PM, Waiman Long wrote:
> On 01/30/2019 09:01 PM, Alexei Starovoitov wrote:
>> On Wed, Jan 30, 2019 at 04:32:12PM -0500, Waiman Long wrote:
>>> On 01/30/2019 04:11 PM, Waiman Long wrote:
>>>> On 01/30/2019 03:10 PM, Alexei Starovoitov wrote:
>>>>> On Wed, Jan 30, 2019 at 02:42:23PM -0500, Waiman Long wrote:
>>>>>> On 01/30/2019 02:30 PM, Alexei Starovoitov wrote:
>>>>>>> On Wed, Jan 30, 2019 at 11:15:30AM +0100, Peter Zijlstra wrote:
>>>>>>>> On Tue, Jan 29, 2019 at 08:04:56PM -0800, Alexei Starovoitov wrote:
>>>>>>>>> Lockdep warns about false positive:
>>>>>>>> This is not a false positive, and you probably also need to use
>>>>>>>> down_read_non_owner() to match this up_read_non_owner().
>>>>>>>>
>>>>>>>> {up,down}_read() and {up,down}_read_non_owner() are not only different
>>>>>>>> in the lockdep annotation; there is also optimistic spin stuff that
>>>>>>>> relies on 'owner' tracking.
>>>>>>> Can you point out in the code the spin bit?
>>>>>>> As far as I can see sem->owner is debug only feature.
>>>>>>> All owner checks are done under CONFIG_DEBUG_RWSEMS.
>>>>>> No, sem->owner is mainly for performing optimistic spinning which is a
>>>>>> performance feature to make rwsem writer-lock performs similar to mutex.
>>>>>> The debugging part is just an add-on. It is not the reason for the
>>>>>> presence of sem->owner.
>>>>> I see. Got it.
>>>>>
>>>>>>> Also there is no down_read_trylock_non_owner() at the moment.
>>>>>>> We can argue about it for -next, but I'd rather silence lockdep
>>>>>>> with this patch today.
>>>>>>>
>>>>>> We can add down_read_trylock_non_owner() if there is a need for it. It
>>>>>> should be easy to do.
>>>>> Yes, but looking through the code it's not clear to me that it's safe
>>>>> to mix non_owner() versions with regular.
>>>>> bpf/stackmap.c does down_read_trylock + up_read.
>>>>> If we add new down_read_trylock_non_owner that set the owner to
>>>>> NULL | RWSEM_* bits is this safe with conccurent read/write
>>>>> that do regular versions?
>>>>> rwsem_can_spin_on_owner() does:
>>>>>         if (owner) {
>>>>>                 ret = is_rwsem_owner_spinnable(owner) &&
>>>>>                       owner_on_cpu(owner);
>>>>> that looks correct.
>>>>> For a second I thought there could be fault here due to non_owner.
>>>>> But there could be other places where it's assumed that owner
>>>>> is never null?
>>>> The content of owner is not the cause of the lockdep warning. The
>>>> lockdep code assumes that the task that acquires the lock will release
>>>> it some time later. That is not the case when you need to acquire the
>>>> lock by one task and released by another. In this case, you have to use
>>>> the non_owner version of down/up_read which disable the lockdep
>>>> acquire/release tracking. That will be the only difference between the
>>>> two set of APIs.
>>>>
>>>> Cheers,
>>>> Longman
>>> BTW, you may want to do something like that to make sure that the lock
>>> ownership is probably tracked.
>>>
>>> -Longman
>>>
>>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>>> index d43b145..79eef9d 100644
>>> --- a/kernel/bpf/stackmap.c
>>> +++ b/kernel/bpf/stackmap.c
>>> @@ -338,6 +338,13 @@ static void stack_map_get_build_id_offset(struct
>>> bpf_stack_
>>>         } else {
>>>                 work->sem = &current->mm->mmap_sem;
>>>                 irq_work_queue(&work->irq_work);
>>> +
>>> +               /*
>>> +                * The irq_work will release the mmap_sem with
>>> +                * up_read_non_owner(). The rwsem_release() is called
>>> +                * here to release the lock from lockdep's perspective.
>>> +                */
>>> +               rwsem_release(&current->mm->mmap_sem.dep_map, 1, _RET_IP_);
>> are you saying the above is enough coupled with up_read_non_owner?
>> Looking at how amdgpu is using this api... I think they just use non_owner
>> version when doing things from different task.
>> So I don't think pairing non_owner with non_owner is strictly necessary.
>> It seems fine to use down_read_trylock() with above rwsem_release() hack
>> plus up_read_non_owner().
>> Am I missing something?
>>
> The statement above is to clear the state for the lockdep so that it
> knows that the task no longer owns the lock. Without doing that, there
> is a possibility of some other kind of incorrect lockdep warning may be
> produced because the code will still think the task hold a read-lock on
> the mmap_sem. It is also possible no warning will be reported.
> 
> The bpf code is kind of special that it acquires the mmap_sem. Later on,
> it either releases it itself (non-NMI) or request irq_work to release it
> (NMI). So either, you use the _non_owner versions for both acquire and
> release or fake the release like the code segment above.
> 
> Peter, please chime in if you have other suggestion.
> 

Hi guys

What are the plans to address the issue then ?

Latest net tree exhibits this trace :

Thanks

[  546.116982] =====================================
[  546.121688] WARNING: bad unlock balance detected!
[  546.126393] 5.0.0-dbg-DEV #559 Not tainted
[  546.130491] -------------------------------------
[  546.135196] taskset/15409 is trying to release lock (&mm->mmap_sem) at:
[  546.141844] [<ffffffffb0233246>] do_up_read+0x16/0x30
[  546.146919] but there are no more locks to release!
[  546.151790] 
               other info that might help us debug this:
[  546.158325] 1 lock held by taskset/15409:
[  546.162325]  #0: 00000000683db857 (&sig->cred_guard_mutex){+.+.}, at: __do_execve_file.isra.38+0x13e/0xbc0
[  546.171978] 
               stack backtrace:
[  546.176327] CPU: 0 PID: 15409 Comm: taskset Not tainted 5.0.0-dbg-DEV #559
[  546.183214] Hardware name: Intel RML,PCH/Iota_QC_19, BIOS 2.54.0 06/07/2018
[  546.190182] Call Trace:
[  546.192633]  <IRQ>
[  546.194672]  dump_stack+0x67/0x95
[  546.198006]  ? do_up_read+0x16/0x30
[  546.201491]  print_unlock_imbalance_bug.part.33+0xd0/0xd7
[  546.206914]  ? do_up_read+0x16/0x30
[  546.210406]  lock_release+0x213/0x2d0
[  546.214088]  up_read+0x20/0xa0
[  546.217138]  do_up_read+0x16/0x30
[  546.220457]  irq_work_run_list+0x4a/0x70
[  546.224408]  irq_work_run+0x18/0x40
[  546.227911]  smp_irq_work_interrupt+0x54/0x1d0
[  546.232362]  irq_work_interrupt+0xf/0x20
[  546.236280]  </IRQ>
[  546.238396] RIP: 0010:release_pages+0x69/0x650
[  546.242839] Code: 01 4c 8b 2f 4c 8d 67 08 48 8d 44 f7 08 45 31 f6 48 89 04 24 eb 04 49 83 c4 08 48 8b 05 20 fe 31 01 49 39 c5 74 26 4d 8b 7d 08 <4d> 8d 47 ff 41 83 e7 01 4d 0f 44 c5 41 8b 40 34 4d 89 c7 85 c0 0f
[  546.261615] RSP: 0018:ffffac604732bb00 EFLAGS: 00000287 ORIG_RAX: ffffffffffffff09
[  546.269190] RAX: ffffe4c9c0ca8000 RBX: 0000000000000020 RCX: 0000000000000006
[  546.276352] RDX: 0000000000000006 RSI: ffff8cf7facb6aa8 RDI: ffff8cf7facb6240
[  546.283482] RBP: ffffac604732bb70 R08: ffffe4c9c0ba3d80 R09: 0000000000000000
[  546.290613] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8cf7db3292d8
[  546.297752] R13: ffffe4c9c0ba3dc0 R14: 0000000000000000 R15: ffffe4c9c0ba3d88
[  546.304907]  free_pages_and_swap_cache+0xe6/0x100
[  546.309618]  tlb_flush_mmu_free+0x36/0x60
[  546.313625]  arch_tlb_finish_mmu+0x28/0x1e0
[  546.317801]  tlb_finish_mmu+0x23/0x30
[  546.321459]  exit_mmap+0xc9/0x1f0
[  546.324787]  ? __mutex_unlock_slowpath+0x11/0x2e0
[  546.329502]  mmput+0x62/0x130
[  546.332481]  flush_old_exec+0x60e/0x810
[  546.336314]  load_elf_binary+0x830/0x18c0
[  546.340323]  ? search_binary_handler+0x8a/0x200
[  546.344848]  search_binary_handler+0x99/0x200
[  546.349221]  __do_execve_file.isra.38+0x76d/0xbc0
[  546.353918]  __x64_sys_execve+0x39/0x50
[  546.357757]  do_syscall_64+0x5a/0x460
[  546.361440]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  546.366489] RIP: 0033:0x7fd599e3c067
[  546.370069] Code: Bad RIP value.
[  546.373306] RSP: 002b:00007ffcf7092e58 EFLAGS: 00000202 ORIG_RAX: 000000000000003b
[  546.380880] RAX: ffffffffffffffda RBX: 00007ffcf7093058 RCX: 00007fd599e3c067
[  546.388010] RDX: 00007ffcf7093070 RSI: 00007ffcf7093058 RDI: 00007ffcf70937c0
[  546.395132] RBP: 00007ffcf7092ec0 R08: 00000000ffffffff R09: 0000000001b1ae40
[  546.402265] R10: 00007ffcf7092c80 R11: 0000000000000202 R12: 00007ffcf70937c0
[  546.409385] R13: 00007ffcf7093070 R14: 0000000000000000 R15: 00007ffcf7093048


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox